|
1 | 1 | import express from 'express';
|
2 | 2 | import fetch from 'node-fetch';
|
| 3 | +import { JSDOM } from 'jsdom'; |
3 | 4 | import dotenv from 'dotenv';
|
4 | 5 | // import { askGPT, initGPT } from './gpt.js';
|
5 |
| -import { initOpenAI, askOpenAI } from './openai.js'; |
| 6 | +import { initOpenAI, askOpenAI, manifestPrompt } from './openai.js'; |
6 | 7 | dotenv.config();
|
7 | 8 | const app = express();
|
8 | 9 | const port = process.env.PORT || 3000;
|
9 | 10 | app.use(express.json());
|
10 |
| -let GPTSesion = null; |
| 11 | +let OpenAISesion = null; |
11 | 12 | app.get('/initAPI', async (req, res) => {
|
12 |
| - GPTSesion = await initOpenAI(); |
13 |
| - if (GPTSesion) { |
14 |
| - res.status(200).send({ message: 'GPT initialized' }); |
| 13 | + OpenAISesion = await initOpenAI(); |
| 14 | + if (OpenAISesion) { |
| 15 | + res.status(200).send({ message: 'LLM initialized' }); |
15 | 16 | }
|
16 | 17 | else {
|
17 |
| - GPTSesion = null; |
18 |
| - res.status(500).send({ error: 'GPT unavailable' }); |
| 18 | + OpenAISesion = null; |
| 19 | + res.status(500).send({ error: 'LLM unavailable' }); |
19 | 20 | }
|
20 | 21 | });
|
21 | 22 | app.get('/generateManifest', async (req, res) => {
|
22 | 23 | if (!req.query.url) {
|
23 | 24 | res.status(400).send({ error: 'URL not specified' });
|
24 | 25 | return;
|
25 | 26 | }
|
26 |
| - const request = await fetch(req.query.url.toString()); |
| 27 | + if (!OpenAISesion) { |
| 28 | + res.status(500).send({ error: 'API not initialized' }); |
| 29 | + return; |
| 30 | + } |
| 31 | + const request = await fetch(req.query.url.toString(), { |
| 32 | + headers: { |
| 33 | + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', |
| 34 | + } |
| 35 | + }); |
27 | 36 | let rawHTML = await request.text();
|
28 | 37 | let headerHTML;
|
29 | 38 | if (typeof rawHTML === 'string') {
|
30 | 39 | rawHTML = rawHTML.replace(/\r|\n/g, '').replace(/\s{2,}/g, '');
|
31 |
| - headerHTML = /<head>(.*)<\/head>/.test(rawHTML) ? rawHTML.match(/<head>(.*)<\/head>/)[0] : 'Head not found'; |
| 40 | + headerHTML = /<head>(.*)<\/head>/.test(rawHTML) ? rawHTML.match(/<head>(.*)<\/head>/)[0] : null; |
32 | 41 | }
|
33 |
| - if (!GPTSesion) { |
34 |
| - res.status(500).send({ error: 'API not initialized' }); |
| 42 | + if (!headerHTML) { |
| 43 | + res.status(400).send({ error: '<head> HTML not found' }); |
35 | 44 | return;
|
36 | 45 | }
|
37 |
| - if (headerHTML) { |
38 |
| - const manifest = await askOpenAI(headerHTML, GPTSesion); |
39 |
| - if (manifest) { |
40 |
| - res.status(200).send({ manifest }); |
41 |
| - } |
42 |
| - else |
43 |
| - res.status(400).send({ error: 'GPT unavailable or failed to generate manifest' }); |
44 |
| - } |
45 |
| - else { |
46 |
| - res.status(400).send({ error: '<head> HTML not found' }); |
| 46 | + console.log(`HEAD: ${headerHTML}`); |
| 47 | + const document = new JSDOM(headerHTML).window.document; |
| 48 | + document.querySelectorAll('script').forEach((script) => script?.remove?.()); |
| 49 | + document.querySelectorAll('style').forEach((style) => style?.remove?.()); |
| 50 | + document.querySelectorAll('meta[http-equiv="origin-trial"],meta[http-equiv="content-type"],meta[name="google-site-verification"]').forEach((meta) => meta?.remove?.()); |
| 51 | + document.querySelectorAll('link[rel="stylesheet"],link[rel="modulepreload"],link[rel="preload"],link[rel="dns-prefetch"],link[rel="preload"]').forEach((link) => link?.remove?.()); |
| 52 | + const preparedHTML = document.head.innerHTML.replace(/&|&{2,}|<!--|-->/g, ''); |
| 53 | + const manifest = await askOpenAI(manifestPrompt(preparedHTML), OpenAISesion); |
| 54 | + if (manifest) { |
| 55 | + res.status(200).send({ manifest }); |
47 | 56 | }
|
| 57 | + else |
| 58 | + res.status(400).send({ error: 'LLM unavailable or failed to generate manifest' }); |
48 | 59 | });
|
49 | 60 | app.post('/generateWinPackage', async (req, res) => {
|
50 | 61 | return;
|
|
0 commit comments