Delete prompts/conversations/test2.md #16
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scrape Codex Links | |
| on: | |
| workflow_dispatch: | |
| push: | |
| paths: | |
| - "prompts/**/*.md" | |
| branches: | |
| - main | |
| jobs: | |
| scrape: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: 20 | |
| - name: Install dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y xvfb jq | |
| npm install [email protected] | |
| - name: Find new Codex links | |
| id: find | |
| run: | | |
| links=$(grep -Eroh 'https://chatgpt.com/s/[A-Za-z0-9_]+' prompts || true) | |
| if [ -z "$links" ]; then | |
| echo "No Codex links found." | |
| exit 0 | |
| fi | |
| echo "links<<EOF" >> $GITHUB_OUTPUT | |
| echo "$links" >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| - name: Scrape and append prompt text | |
| if: steps.find.outputs.links != '' | |
| run: | | |
| cat > scrape.js <<'EOF' | |
| import puppeteer from 'puppeteer'; | |
| const [,, url] = process.argv; | |
| (async () => { | |
| const browser = await puppeteer.launch({ | |
| headless: false, | |
| args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] | |
| }); | |
| const page = await browser.newPage(); | |
| await page.goto(url, { waitUntil: 'networkidle2', timeout: 120000 }); | |
| await new Promise(r => setTimeout(r, 5000)); // delay for hydration | |
| const text = await page.evaluate(() => { | |
| const container = document.querySelector('main') || document.body; | |
| return container.innerText.trim(); | |
| }); | |
| console.log(text.slice(0, 3000)); | |
| await browser.close(); | |
| })(); | |
| EOF | |
| while read -r link; do | |
| echo "Processing $link..." | |
| files=$(grep -rl "$link" prompts || true) | |
| if [ -z "$files" ]; then | |
| echo "⚠️ No file found containing link: $link" | |
| continue | |
| fi | |
| for file in $files; do | |
| if grep -q "Extracted Prompt" "$file"; then | |
| echo "Already scraped: $file" | |
| continue | |
| fi | |
| content=$(xvfb-run -a node scrape.js "$link" || true) | |
| if [ -n "$content" ]; then | |
| echo -e "\n\n---\n\n### Extracted Prompt\n$content" >> "$file" | |
| echo "✅ Appended to $file" | |
| else | |
| echo "⚠️ No prompt extracted for $link" | |
| fi | |
| done | |
| done <<< "$(echo "${{ steps.find.outputs.links }}")" | |
| rm -rf node_modules package-lock.json package.json scrape.js | |
| - name: Commit and push results | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
| git add prompts | |
| git commit -m "Auto-scraped Codex prompts" || echo "No new content to commit." | |
| git push |