diff --git a/.github/workflows/clone-tracking.yml b/.github/workflows/clone-tracking.yml new file mode 100644 index 00000000..58b1e293 --- /dev/null +++ b/.github/workflows/clone-tracking.yml @@ -0,0 +1,149 @@ +name: GitHub Clone Tracking → data/clone-data.rvf + +# Persists rolling 14-day clone-traffic snapshots to data/clone-data.rvf in +# the ruvector JSONL RVF format. GitHub's /traffic/clones endpoint only +# retains the last 14 days server-side, so without this scheduled scrape +# the data is gone forever the moment it falls outside the window. +# +# Format: JSONL RVF +# - line 1 is a `metadata` segment that initializes the file +# - each subsequent run appends one `clone_snapshot` segment carrying the +# 14-day rollup PLUS per-day breakdown +# - file is idempotent: per-day entries are keyed by `timestamp` so a +# downstream reader can dedupe across overlapping snapshot windows +# +# Schedule: every 14 days (1st + 15th of each month, ~14-day cadence in +# practice). Workflow can also be dispatched manually for backfill or test. + +on: + schedule: + # 01:23 UTC on the 1st and 15th of every month — close to 14-day cadence + # without cron's "every 14 days" monthly-reset weirdness. Picking :23 + # avoids the cron herd on :00. + - cron: '23 1 1,15 * *' + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: clone-tracking + cancel-in-progress: false + +jobs: + snapshot: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Fetch /traffic/clones + /traffic/views from GitHub + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + mkdir -p data + gh api repos/${{ github.repository }}/traffic/clones > /tmp/clones.json + gh api repos/${{ github.repository }}/traffic/views > /tmp/views.json + echo "--- clones rollup ---" + jq '{count, uniques, days: (.clones | length)}' /tmp/clones.json + echo "--- views rollup ---" + jq '{count, uniques, days: (.views | length)}' /tmp/views.json + + - name: Append snapshot to data/clone-data.rvf + env: + REPO: ${{ github.repository }} + run: | + set -e + RVF="data/clone-data.rvf" + FETCHED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + # Initialize the file with a metadata segment on first run. + if [ ! -f "$RVF" ]; then + echo "Initializing $RVF with metadata segment" + jq -n --arg repo "$REPO" --arg ts "$FETCHED_AT" '{ + type: "metadata", + name: "ruview-clone-traffic-history", + version: "1.0.0", + schema: "ruvector.rvf.jsonl/v1", + format: "github-traffic-snapshots", + repo: $repo, + source: "GitHub Traffic API /repos/{repo}/traffic/{clones,views}", + policy: "GitHub retains only 14 days server-side; this file is the long-term record.", + segments: ["metadata", "clone_snapshot", "view_snapshot"], + created_at: $ts, + custom: { + cadence: "twice monthly (1st and 15th, ~14-day intervals)", + idempotency_key: "timestamp (per-day records de-duplicate across overlapping snapshot windows)" + } + }' >> "$RVF" + fi + + # Append the clone snapshot. + jq --arg ts "$FETCHED_AT" '{ + type: "clone_snapshot", + fetched_at: $ts, + window_count: .count, + window_uniques: .uniques, + per_day: .clones + }' /tmp/clones.json >> "$RVF" + + # Append the views snapshot (free with the same auth). + jq --arg ts "$FETCHED_AT" '{ + type: "view_snapshot", + fetched_at: $ts, + window_count: .count, + window_uniques: .uniques, + per_day: .views + }' /tmp/views.json >> "$RVF" + + echo "--- RVF tail (last 4 lines) ---" + tail -4 "$RVF" | jq -c '{type, fetched_at, window_count, window_uniques}' || true + echo "--- file size ---" + wc -l "$RVF" + + - name: Compute aggregates for the commit summary + id: agg + run: | + # Count distinct per-day entries across all snapshots so we can + # show "cumulative observed clones" in the commit message. + python3 - <<'PY' + import json, os + path = "data/clone-data.rvf" + per_day_clones = {} + per_day_views = {} + with open(path, encoding="utf-8") as f: + for line in f: + if not line.strip(): + continue + d = json.loads(line) + if d.get("type") == "clone_snapshot": + for entry in d.get("per_day", []): + per_day_clones[entry["timestamp"]] = entry + elif d.get("type") == "view_snapshot": + for entry in d.get("per_day", []): + per_day_views[entry["timestamp"]] = entry + + tot_clones = sum(e.get("count", 0) for e in per_day_clones.values()) + tot_uniq_clones = sum(e.get("uniques", 0) for e in per_day_clones.values()) + tot_views = sum(e.get("count", 0) for e in per_day_views.values()) + tot_uniq_views = sum(e.get("uniques", 0) for e in per_day_views.values()) + print(f"clone days observed: {len(per_day_clones)} total clones: {tot_clones:,} total unique cloners: {tot_uniq_clones:,}") + print(f"view days observed: {len(per_day_views)} total views: {tot_views:,} total unique viewers: {tot_uniq_views:,}") + + with open(os.environ["GITHUB_OUTPUT"], "a") as out: + out.write(f"clones={tot_clones}\n") + out.write(f"clone_days={len(per_day_clones)}\n") + out.write(f"views={tot_views}\n") + out.write(f"view_days={len(per_day_views)}\n") + PY + + - name: Commit + push if changed + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + if git diff --quiet data/clone-data.rvf; then + echo "no changes to commit" + exit 0 + fi + git add data/clone-data.rvf + git commit -m "chore(traffic): clone snapshot — ${{ steps.agg.outputs.clone_days }} days observed → ${{ steps.agg.outputs.clones }} clones, ${{ steps.agg.outputs.view_days }} view-days → ${{ steps.agg.outputs.views }} views" + git push diff --git a/data/clone-data.rvf b/data/clone-data.rvf new file mode 100644 index 00000000..16592910 --- /dev/null +++ b/data/clone-data.rvf @@ -0,0 +1,3 @@ +{"type": "metadata", "name": "ruview-clone-traffic-history", "version": "1.0.0", "schema": "ruvector.rvf.jsonl/v1", "format": "github-traffic-snapshots", "repo": "ruvnet/RuView", "source": "GitHub Traffic API /repos/{repo}/traffic/{clones,views}", "policy": "GitHub retains only 14 days server-side; this file is the long-term record.", "segments": ["metadata", "clone_snapshot", "view_snapshot"], "created_at": "2026-05-19T23:16:22Z", "custom": {"cadence": "twice monthly (1st and 15th, ~14-day intervals)", "idempotency_key": "timestamp (per-day records de-duplicate across overlapping snapshot windows)"}} +{"type": "clone_snapshot", "fetched_at": "2026-05-19T23:16:22Z", "window_count": 27887, "window_uniques": 6611, "per_day": [{"timestamp": "2026-05-05T00:00:00Z", "count": 620, "uniques": 218}, {"timestamp": "2026-05-06T00:00:00Z", "count": 477, "uniques": 232}, {"timestamp": "2026-05-07T00:00:00Z", "count": 685, "uniques": 268}, {"timestamp": "2026-05-08T00:00:00Z", "count": 703, "uniques": 276}, {"timestamp": "2026-05-09T00:00:00Z", "count": 352, "uniques": 184}, {"timestamp": "2026-05-10T00:00:00Z", "count": 205, "uniques": 151}, {"timestamp": "2026-05-11T00:00:00Z", "count": 1160, "uniques": 234}, {"timestamp": "2026-05-12T00:00:00Z", "count": 599, "uniques": 207}, {"timestamp": "2026-05-13T00:00:00Z", "count": 5141, "uniques": 1152}, {"timestamp": "2026-05-14T00:00:00Z", "count": 3420, "uniques": 972}, {"timestamp": "2026-05-15T00:00:00Z", "count": 1974, "uniques": 764}, {"timestamp": "2026-05-16T00:00:00Z", "count": 2917, "uniques": 617}, {"timestamp": "2026-05-17T00:00:00Z", "count": 6690, "uniques": 1169}, {"timestamp": "2026-05-18T00:00:00Z", "count": 2944, "uniques": 625}]} +{"type": "view_snapshot", "fetched_at": "2026-05-19T23:16:22Z", "window_count": 162314, "window_uniques": 75464, "per_day": [{"timestamp": "2026-05-05T00:00:00Z", "count": 5540, "uniques": 2690}, {"timestamp": "2026-05-06T00:00:00Z", "count": 5111, "uniques": 2393}, {"timestamp": "2026-05-07T00:00:00Z", "count": 5585, "uniques": 2708}, {"timestamp": "2026-05-08T00:00:00Z", "count": 7004, "uniques": 3261}, {"timestamp": "2026-05-09T00:00:00Z", "count": 5395, "uniques": 2531}, {"timestamp": "2026-05-10T00:00:00Z", "count": 4761, "uniques": 2219}, {"timestamp": "2026-05-11T00:00:00Z", "count": 4275, "uniques": 2044}, {"timestamp": "2026-05-12T00:00:00Z", "count": 3466, "uniques": 1688}, {"timestamp": "2026-05-13T00:00:00Z", "count": 13561, "uniques": 8473}, {"timestamp": "2026-05-14T00:00:00Z", "count": 21867, "uniques": 12527}, {"timestamp": "2026-05-15T00:00:00Z", "count": 26182, "uniques": 14609}, {"timestamp": "2026-05-16T00:00:00Z", "count": 17406, "uniques": 8868}, {"timestamp": "2026-05-17T00:00:00Z", "count": 28444, "uniques": 14541}, {"timestamp": "2026-05-18T00:00:00Z", "count": 13717, "uniques": 7819}]}