-
Notifications
You must be signed in to change notification settings - Fork 1
285 lines (247 loc) · 10.3 KB
/
watch-repos.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
name: Process Repository Changes
on:
# Run on new commits to configured branches
push:
branches:
- main
- master
# Run when PRs are merged
pull_request:
types:
- closed
# Manual trigger for full repository ingestion
workflow_dispatch:
inputs:
full_ingest:
description: 'Perform full repository ingestion'
required: true
type: boolean
default: false
jobs:
process-changes:
if: >-
github.event_name == 'push' ||
(github.event_name == 'pull_request' && github.event.pull_request.merged == true) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Changed to fetch complete history for better diff
- name: Install yq
run: |
sudo wget https://github.com/mikefarah/yq/releases/download/v4.40.5/yq_linux_amd64 -O /usr/local/bin/yq
sudo chmod +x /usr/local/bin/yq
yq --version
- name: Load Configuration
id: config
run: |
# Verify config files exist
if [ ! -f "watcher/config/repositories.yml" ]; then
echo "::error::repositories.yml not found"
exit 1
fi
# Load repository config directly
OSIRIS_URL=$(yq -r '.osiris_url' watcher/config/repositories.yml)
if [ -z "$OSIRIS_URL" ] || [ "$OSIRIS_URL" = "null" ]; then
echo "::error::osiris_url not configured in repositories.yml"
exit 1
fi
# Load repo config with explicit extension handling
REPO_CONFIG=$(yq -o=json ".repositories[\"${{ github.repository }}\"]" watcher/config/repositories.yml)
# Validate config
if [ "$REPO_CONFIG" == "null" ]; then
echo "Repository ${{ github.repository }} not configured for watching"
exit 0
fi
# Export config using GitHub Actions environment file syntax
echo 'CONFIG<<EOF' >> $GITHUB_ENV
echo "$REPO_CONFIG" >> $GITHUB_ENV
echo 'EOF' >> $GITHUB_ENV
# Also set in outputs
{
echo "config<<EOF"
echo "$REPO_CONFIG"
echo "EOF"
echo "osiris_url=$OSIRIS_URL"
echo "config_exists=true"
} >> $GITHUB_OUTPUT
- name: Setup API Helper
if: steps.config.outputs.config_exists == 'true'
run: |
# Create an api helper script with improved error handling and retries
cat > api_helper.sh << 'EOF'
#!/bin/bash
call_api() {
local url="$1"
local data="$2"
local retries=5
local wait=5
local timeout=60
for i in $(seq 1 $retries); do
echo "API call attempt $i of $retries"
response=$(curl -X POST "$url" \
-H "Content-Type: application/json" \
-H "Accept: application/json" \
--fail \
--silent \
--show-error \
--max-time $timeout \
--retry 3 \
--retry-delay 2 \
--data-raw "$data")
if [ $? -eq 0 ]; then
echo "$response"
return 0
fi
echo "API call failed, waiting ${wait}s before retry..."
sleep $wait
wait=$((wait * 2))
done
echo "::error::API call failed after $retries attempts"
return 1
}
EOF
chmod +x api_helper.sh
- name: Full Repository Ingestion
if: >-
steps.config.outputs.config_exists == 'true' &&
github.event_name == 'workflow_dispatch' &&
github.event.inputs.full_ingest == 'true'
run: |
source ./api_helper.sh
echo "Starting full repository ingestion..."
# Call the ingest-repo endpoint
if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-repo" "{
\"repo\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"forceReplace\": true,
\"metadata\": {
\"repository\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"event_type\": \"${{ github.event_name }}\",
\"commit_sha\": \"${{ github.sha }}\",
\"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\",
\"config\": $CONFIG
}
}"; then
echo "::error::Failed to perform full repository ingestion"
exit 1
fi
- name: Process Incremental Changes
if: >-
steps.config.outputs.config_exists == 'true' &&
!(github.event_name == 'workflow_dispatch' && github.event.inputs.full_ingest == 'true')
run: |
source ./api_helper.sh
# Debug: Print full config at start
echo "Full Configuration from env:"
echo "$CONFIG" | jq '.'
# Create extensions file
echo "$CONFIG" | jq -r '.included_extensions[]' | tr -d '\r' > included_extensions.txt
echo "Available extensions:"
cat included_extensions.txt
# Get commit range
if [ "${{ github.event_name }}" == "push" ]; then
BASE_SHA="${{ github.event.before }}"
HEAD_SHA="${{ github.event.after }}"
elif [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
else
BASE_SHA=$(git rev-parse HEAD^)
HEAD_SHA=$(git rev-parse HEAD)
fi
echo "Base SHA: $BASE_SHA"
echo "Head SHA: $HEAD_SHA"
# Process changes with improved debug output
echo "Starting to process changed files..."
# Create temporary directory for processing
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
# Process each changed file
git diff --name-status --no-renames $BASE_SHA $HEAD_SHA | while read -r status filepath; do
echo "Processing: $filepath (Status: $status)"
[ -z "$filepath" ] && continue
ext=$(echo "${filepath##*.}" | tr -d '[:space:]')
echo "File extension: '$ext'"
if grep -ixFq "$ext" included_extensions.txt; then
echo "Extension '$ext' IS included"
if [ "$status" = "M" ] || [ "$status" = "A" ]; then
content=$(git show "$HEAD_SHA:$filepath" 2>/dev/null | jq -Rs) || continue
echo "$status $filepath $content" >> "$TEMP_DIR/changes.txt"
elif [ "$status" = "D" ]; then
echo "$status $filepath" >> "$TEMP_DIR/changes.txt"
fi
else
echo "Extension '$ext' is NOT included"
fi
done
# Process collected changes
if [ -f "$TEMP_DIR/changes.txt" ]; then
echo "Found changes to process"
# Build changes object
changes_json="{\"added\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "A" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\",\"content\":$content}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="],\"modified\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "M" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\",\"content\":$content}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="],\"removed\":["
first=true
while IFS=' ' read -r status filepath content; do
if [ "$status" = "D" ]; then
[ "$first" = true ] && first=false || changes_json+=","
changes_json+="{\"path\":\"$filepath\"}"
fi
done < "$TEMP_DIR/changes.txt"
changes_json+="]}"
# Call ingest-changes endpoint
if ! call_api "${{ steps.config.outputs.osiris_url }}/api/ingest-changes" "{
\"repository\": {
\"fullName\": \"${{ github.repository }}\",
\"defaultBranch\": \"${{ github.ref_name }}\"
},
\"changes\": $changes_json,
\"metadata\": {
\"repository\": \"${{ github.repository }}\",
\"branch\": \"${{ github.ref_name }}\",
\"event_type\": \"${{ github.event_name }}\",
\"commit_sha\": \"${{ github.sha }}\",
\"base_sha\": \"$BASE_SHA\",
\"head_sha\": \"$HEAD_SHA\",
\"max_file_size\": $(echo "$CONFIG" | jq .max_file_size),
\"max_tokens\": $(echo "$CONFIG" | jq .max_tokens),
\"process_timestamp\": \"$(date -u +"%Y-%m-%dT%H:%M:%SZ")\"
}
}"; then
echo "::error::Failed to process changes"
exit 1
fi
else
echo "No relevant file changes detected"
fi
- name: Report Status
if: always()
run: |
if [ "${{ steps.config.outputs.config_exists }}" != "true" ]; then
echo "::notice::Repository not configured for watching"
elif [ "${{ job.status }}" == "success" ]; then
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ "${{ github.event.inputs.full_ingest }}" == "true" ]; then
echo "::notice::Successfully completed full repository ingestion"
else
echo "::notice::Successfully processed changes"
fi
else
echo "::error::Failed to process changes"
fi