~cytrogen/gstack

ref: 403637f0c894f1fd0ebbbb2f2728b439e607ff47 gstack/bin/gstack-telemetry-log -rwxr-xr-x 8.3 KiB
403637f0 — Garry Tan feat: rotating founder resources in /office-hours closing (v0.13.10.0) (#652) 10 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env bash
# gstack-telemetry-log — append a telemetry event to local JSONL
#
# Data flow:
#   preamble (start) ──▶ .pending marker
#   preamble (epilogue) ──▶ gstack-telemetry-log ──▶ skill-usage.jsonl
#                                                 └──▶ gstack-telemetry-sync (bg)
#
# Usage:
#   gstack-telemetry-log --skill qa --duration 142 --outcome success \
#     --used-browse true --session-id "12345-1710756600"
#
# Env overrides (for testing):
#   GSTACK_STATE_DIR  — override ~/.gstack state directory
#   GSTACK_DIR        — override auto-detected gstack root
#
# NOTE: Uses set -uo pipefail (no -e) — telemetry must never exit non-zero
set -uo pipefail

GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}"
STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}"
ANALYTICS_DIR="$STATE_DIR/analytics"
JSONL_FILE="$ANALYTICS_DIR/skill-usage.jsonl"
PENDING_DIR="$ANALYTICS_DIR"  # .pending-* files live here
CONFIG_CMD="$GSTACK_DIR/bin/gstack-config"
VERSION_FILE="$GSTACK_DIR/VERSION"

# ─── Parse flags ─────────────────────────────────────────────
SKILL=""
DURATION=""
OUTCOME="unknown"
USED_BROWSE="false"
SESSION_ID=""
ERROR_CLASS=""
ERROR_MESSAGE=""
FAILED_STEP=""
EVENT_TYPE="skill_run"
SOURCE=""

while [ $# -gt 0 ]; do
  case "$1" in
    --skill)         SKILL="$2"; shift 2 ;;
    --duration)      DURATION="$2"; shift 2 ;;
    --outcome)       OUTCOME="$2"; shift 2 ;;
    --used-browse)   USED_BROWSE="$2"; shift 2 ;;
    --session-id)    SESSION_ID="$2"; shift 2 ;;
    --error-class)   ERROR_CLASS="$2"; shift 2 ;;
    --error-message) ERROR_MESSAGE="$2"; shift 2 ;;
    --failed-step)   FAILED_STEP="$2"; shift 2 ;;
    --event-type)    EVENT_TYPE="$2"; shift 2 ;;
    --source)        SOURCE="$2"; shift 2 ;;
    *) shift ;;
  esac
done

# Source: flag > env > default 'live'
SOURCE="${SOURCE:-${GSTACK_TELEMETRY_SOURCE:-live}}"

# ─── Read telemetry tier ─────────────────────────────────────
TIER="$("$CONFIG_CMD" get telemetry 2>/dev/null || true)"
TIER="${TIER:-off}"

# Validate tier
case "$TIER" in
  off|anonymous|community) ;;
  *) TIER="off" ;;  # invalid value → default to off
esac

if [ "$TIER" = "off" ]; then
  # Still clear pending markers for this session even if telemetry is off
  [ -n "$SESSION_ID" ] && rm -f "$PENDING_DIR/.pending-$SESSION_ID" 2>/dev/null || true
  exit 0
fi

# ─── Finalize stale .pending markers ────────────────────────
# Each session gets its own .pending-$SESSION_ID file to avoid races
# between concurrent sessions. Finalize any that don't match our session.
for PFILE in "$PENDING_DIR"/.pending-*; do
  [ -f "$PFILE" ] || continue
  # Skip our own session's marker (it's still in-flight)
  PFILE_BASE="$(basename "$PFILE")"
  PFILE_SID="${PFILE_BASE#.pending-}"
  [ "$PFILE_SID" = "$SESSION_ID" ] && continue

  PENDING_DATA="$(cat "$PFILE" 2>/dev/null || true)"
  rm -f "$PFILE" 2>/dev/null || true
  if [ -n "$PENDING_DATA" ]; then
    # Extract fields from pending marker using grep -o + awk
    P_SKILL="$(echo "$PENDING_DATA" | grep -o '"skill":"[^"]*"' | head -1 | awk -F'"' '{print $4}')"
    P_TS="$(echo "$PENDING_DATA" | grep -o '"ts":"[^"]*"' | head -1 | awk -F'"' '{print $4}')"
    P_SID="$(echo "$PENDING_DATA" | grep -o '"session_id":"[^"]*"' | head -1 | awk -F'"' '{print $4}')"
    P_VER="$(echo "$PENDING_DATA" | grep -o '"gstack_version":"[^"]*"' | head -1 | awk -F'"' '{print $4}')"
    P_OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
    P_ARCH="$(uname -m)"

    # Write the stale event as outcome: unknown
    mkdir -p "$ANALYTICS_DIR"
    printf '{"v":1,"ts":"%s","event_type":"skill_run","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":null,"outcome":"unknown","error_class":null,"used_browse":false,"sessions":1}\n' \
      "$P_TS" "$P_SKILL" "$P_SID" "$P_VER" "$P_OS" "$P_ARCH" >> "$JSONL_FILE" 2>/dev/null || true
  fi
done

# Clear our own session's pending marker (we're about to log the real event)
[ -n "$SESSION_ID" ] && rm -f "$PENDING_DIR/.pending-$SESSION_ID" 2>/dev/null || true

# ─── Collect metadata ────────────────────────────────────────
TS="$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%S 2>/dev/null || echo "")"
GSTACK_VERSION="$(cat "$VERSION_FILE" 2>/dev/null | tr -d '[:space:]' || echo "unknown")"
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
SESSIONS="1"
if [ -d "$STATE_DIR/sessions" ]; then
  _SC="$(find "$STATE_DIR/sessions" -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' \n\r\t')"
  [ -n "$_SC" ] && [ "$_SC" -gt 0 ] 2>/dev/null && SESSIONS="$_SC"
fi

# Generate installation_id for community tier
# Uses a random UUID stored locally — not derived from hostname/user so it
# can't be guessed or correlated by someone who knows your machine identity.
INSTALL_ID=""
if [ "$TIER" = "community" ]; then
  ID_FILE="$HOME/.gstack/installation-id"
  if [ -f "$ID_FILE" ]; then
    INSTALL_ID="$(cat "$ID_FILE" 2>/dev/null)"
  fi
  if [ -z "$INSTALL_ID" ]; then
    # Generate a random UUID v4
    if command -v uuidgen >/dev/null 2>&1; then
      INSTALL_ID="$(uuidgen | tr '[:upper:]' '[:lower:]')"
    elif [ -r /proc/sys/kernel/random/uuid ]; then
      INSTALL_ID="$(cat /proc/sys/kernel/random/uuid)"
    else
      # Fallback: random hex from /dev/urandom
      INSTALL_ID="$(od -An -tx1 -N16 /dev/urandom 2>/dev/null | tr -d ' \n')"
    fi
    if [ -n "$INSTALL_ID" ]; then
      mkdir -p "$(dirname "$ID_FILE")" 2>/dev/null
      printf '%s' "$INSTALL_ID" > "$ID_FILE" 2>/dev/null
    fi
  fi
fi

# Local-only fields (never sent remotely)
REPO_SLUG=""
BRANCH=""
if command -v git >/dev/null 2>&1; then
  REPO_SLUG="$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-' 2>/dev/null || true)"
  BRANCH="$(git rev-parse --abbrev-ref HEAD 2>/dev/null || true)"
fi

# ─── Construct and append JSON ───────────────────────────────
mkdir -p "$ANALYTICS_DIR"

# Sanitize string fields for JSON safety (strip quotes, backslashes, control chars)
json_safe() { printf '%s' "$1" | tr -d '"\\\n\r\t' | head -c 200; }
SKILL="$(json_safe "$SKILL")"
OUTCOME="$(json_safe "$OUTCOME")"
SESSION_ID="$(json_safe "$SESSION_ID")"
SOURCE="$(json_safe "$SOURCE")"
EVENT_TYPE="$(json_safe "$EVENT_TYPE")"
REPO_SLUG="$(json_safe "$REPO_SLUG")"
BRANCH="$(json_safe "$BRANCH")"

# Escape null fields — sanitize ERROR_CLASS and FAILED_STEP via json_safe()
ERR_FIELD="null"
[ -n "$ERROR_CLASS" ] && ERR_FIELD="\"$(json_safe "$ERROR_CLASS")\""

ERR_MSG_FIELD="null"
[ -n "$ERROR_MESSAGE" ] && ERR_MSG_FIELD="\"$(printf '%s' "$ERROR_MESSAGE" | head -c 200 | sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/	/\\t/g' | tr '\n\r' '  ')\""

STEP_FIELD="null"
[ -n "$FAILED_STEP" ] && STEP_FIELD="\"$(json_safe "$FAILED_STEP")\""

# Cap unreasonable durations
if [ -n "$DURATION" ] && [ "$DURATION" -gt 86400 ] 2>/dev/null; then
  DURATION=""  # null if > 24h
fi
if [ -n "$DURATION" ] && [ "$DURATION" -lt 0 ] 2>/dev/null; then
  DURATION=""  # null if negative
fi

DUR_FIELD="null"
[ -n "$DURATION" ] && DUR_FIELD="$DURATION"

INSTALL_FIELD="null"
[ -n "$INSTALL_ID" ] && INSTALL_FIELD="\"$INSTALL_ID\""

BROWSE_BOOL="false"
[ "$USED_BROWSE" = "true" ] && BROWSE_BOOL="true"

printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"installation_id":%s,"source":"%s","_repo_slug":"%s","_branch":"%s"}\n' \
  "$TS" "$EVENT_TYPE" "$SKILL" "$SESSION_ID" "$GSTACK_VERSION" "$OS" "$ARCH" \
  "$DUR_FIELD" "$OUTCOME" "$ERR_FIELD" "$ERR_MSG_FIELD" "$STEP_FIELD" \
  "$BROWSE_BOOL" "${SESSIONS:-1}" \
  "$INSTALL_FIELD" "$SOURCE" "$REPO_SLUG" "$BRANCH" >> "$JSONL_FILE" 2>/dev/null || true

# ─── Trigger sync if tier is not off ─────────────────────────
SYNC_CMD="$GSTACK_DIR/bin/gstack-telemetry-sync"
if [ -x "$SYNC_CMD" ]; then
  "$SYNC_CMD" 2>/dev/null &
fi

exit 0