Add test3.md with a ChatGPT link #11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| - name: Scrape and append prompt text | ||
| if: steps.find.outputs.links != '' | ||
| run: | | ||
| # Inline Puppeteer scraper | ||
| cat > scrape.js <<'EOF' | ||
| import puppeteer from 'puppeteer'; | ||
| const [,, url] = process.argv; | ||
| (async () => { | ||
| const browser = await puppeteer.launch({ | ||
| headless: false, | ||
| args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] | ||
| }); | ||
| const page = await browser.newPage(); | ||
| await page.goto(url, { waitUntil: 'networkidle2', timeout: 120000 }); | ||
| // Delay for hydration (waitForTimeout removed in newer Puppeteer) | ||
| await new Promise(r => setTimeout(r, 5000)); | ||
| const text = await page.evaluate(() => { | ||
| const container = document.querySelector('main') || document.body; | ||
| return container.innerText.trim(); | ||
| }); | ||
| console.log(text.slice(0, 3000)); | ||
| await browser.close(); | ||
| })(); | ||
| EOF | ||
| while read -r link; do | ||
| echo "Processing $link..." | ||
| files=$(grep -rl "$link" prompts || true) | ||
| if [ -z "$files" ]; then | ||
| echo "⚠️ No file found containing link: $link" | ||
| continue | ||
| fi | ||
| for file in $files; do | ||
| if grep -q "Extracted Prompt" "$file"; then | ||
| echo "Already scraped: $file" | ||
| continue | ||
| fi | ||
| content=$(xvfb-run -a node scrape.js "$link" || true) | ||
| if [ -n "$content" ]; then | ||
| echo -e "\n\n---\n\n### Extracted Prompt\n$content" >> "$file" | ||
| echo "✅ Appended to $file" | ||
| else | ||
| echo "⚠️ No prompt extracted for $link" | ||
| fi | ||
| done | ||
| done <<< "$(echo "${{ steps.find.outputs.links }}")" | ||
| # Clean up temp files to avoid 'untracked files' noise | ||
| rm -rf node_modules package-lock.json package.json scrape.js | ||