Scrape Codex Links #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scrape Codex Links | |
| on: | |
| workflow_dispatch: | |
| push: | |
| paths: | |
| - "prompts/**/*.md" | |
| branches: | |
| - main | |
| jobs: | |
| scrape: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@v4 | |
| - name: Install jq | |
| run: sudo apt-get update && sudo apt-get install -y jq | |
| - name: Find new Codex links | |
| id: find | |
| run: | | |
| links=$(grep -Eroh 'https://chatgpt.com/s/[A-Za-z0-9_]+' prompts || true) | |
| if [ -z "$links" ]; then | |
| echo "No Codex links found." | |
| exit 0 | |
| fi | |
| echo "links<<EOF" >> $GITHUB_OUTPUT | |
| echo "$links" >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| - name: Scrape and append prompt text | |
| if: steps.find.outputs.links != '' | |
| run: | | |
| while read -r link; do | |
| echo "Processing $link..." | |
| # Find each file containing that link | |
| grep -rl "$link" prompts | while read -r file; do | |
| echo " Updating $file..." | |
| json=$(curl -s "http://35.222.246.223:8080/scrape?url=$link") | |
| content=$(echo "$json" | jq -r '.prompt // .text // empty') | |
| if [ -n "$content" ]; then | |
| echo -e "\n\n---\n\n### Extracted Prompt\n$content" >> "$file" | |
| echo " ✅ Appended to $file" | |
| else | |
| echo " ⚠️ No prompt extracted for $link" | |
| fi | |
| done | |
| done <<< "$(echo "${{ steps.find.outputs.links }}")" | |
| - name: Commit and push results | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
| git add prompts | |
| git commit -m "Auto-scraped Codex prompts" || echo "No new content to commit." | |
| git push |