Merge pull request #2472 from DIYgod/master

[pull] master from diygod:master
HenryQW · Apr 16, 2024 · a63313f · a63313f
2 parents 8f3bfde + 0456ca9
commit a63313f
Show file tree

Hide file tree

Showing 8 changed files with 1,150 additions and 634 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -23,7 +23,7 @@ COPY ./package.json /app/
 RUN \
     set -ex && \
     export PUPPETEER_SKIP_DOWNLOAD=true && \
-    corepack enable pnpm && \
+    npm install -g pnpm@8.15.7 && \
     pnpm install --frozen-lockfile && \
     pnpm rb
 
@@ -102,7 +102,7 @@ RUN \
         fi; \
         echo 'Downloading Chromium...' && \
         unset PUPPETEER_SKIP_DOWNLOAD && \
-        corepack enable pnpm && \
+        npm install -g pnpm@8.15.7 && \
         pnpm add puppeteer@$(cat /app/.puppeteer_version) --save-prod && \
         pnpm rb ; \
     else \

diff --git a/lib/middleware/template.tsx b/lib/middleware/template.tsx
@@ -48,6 +48,14 @@ const middleware: MiddlewareHandler = async (ctx, next) => {
                     }
                 }
 
+                if (item.description) {
+                    // https://stackoverflow.com/questions/2507608/error-input-is-not-proper-utf-8-indicate-encoding-using-phps-simplexml-lo/40552083#40552083
+                    // https://stackoverflow.com/questions/1497885/remove-control-characters-from-php-string/1497928#1497928
+                    // remove unicode control characters
+                    // see #14940 #14943 #15262
+                    item.description = item.description.replaceAll(/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F]/g, '');
+                }
+
                 if (typeof item.author === 'string') {
                     item.author = collapseWhitespace(item.author) || '';
                 } else if (typeof item.author === 'object' && item.author !== null) {

diff --git a/lib/routes/accessbriefing/index.ts b/lib/routes/accessbriefing/index.ts
@@ -0,0 +1,217 @@
+import { Route } from '@/types';
+import { getCurrentPath } from '@/utils/helpers';
+const __dirname = getCurrentPath(import.meta.url);
+
+import cache from '@/utils/cache';
+import got from '@/utils/got';
+import { load } from 'cheerio';
+import { parseDate } from '@/utils/parse-date';
+import { art } from '@/utils/render';
+import path from 'node:path';
+
+export const handler = async (ctx) => {
+    const { category = 'latest/news' } = ctx.req.param();
+    const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 30;
+
+    const rootUrl = 'https://www.accessbriefing.com';
+    const currentUrl = new URL(category, rootUrl).href;
+    const apiUrl = new URL('Ajax/GetPagedArticles', rootUrl).href;
+
+    const { data: currentResponse } = await got(currentUrl);
+
+    const brandId = currentResponse.match(/'BrandID':\s(\d+)/)?.[1] ?? '32';
+    const moreID = currentResponse.match(/'MoreID':\s(\d+)/)?.[1] ?? '9282';
+
+    const { data: response } = await got(apiUrl, {
+        searchParams: {
+            navcontentid: moreID,
+            brandid: brandId,
+            page: 0,
+            lastpage: 0,
+            pagesize: limit,
+        },
+    });
+
+    const $ = load(currentResponse);
+
+    const language = $('html').prop('lang');
+
+    let items = response.slice(0, limit).map((item) => {
+        const title = item.Article_Headline;
+        const image = new URL(item.Image, rootUrl).href;
+        const description = art(path.join(__dirname, 'templates/description.art'), {
+            images: image
+                ? [
+                      {
+                          src: image,
+                          alt: title,
+                      },
+                  ]
+                : undefined,
+            intro: item.Article_Intro_Plaintext,
+        });
+        const guid = `accessbriefing-${item.Article_ID}`;
+
+        return {
+            title,
+            description,
+            pubDate: parseDate(item.Article_PublishedDate),
+            link: new URL(item.URL, rootUrl).href,
+            author: item.Authors.join('/'),
+            guid,
+            id: guid,
+            content: {
+                html: description,
+                text: item.Article_Intro_Plaintext,
+            },
+            image,
+            banner: image,
+            language,
+        };
+    });
+
+    items = await Promise.all(
+        items.map((item) =>
+            cache.tryGet(item.link, async () => {
+                const { data: detailResponse } = await got(item.link);
+
+                const $$ = load(detailResponse);
+
+                const title = $$('h1.khl-article-page-title').text();
+                const description =
+                    item.description +
+                    art(path.join(__dirname, 'templates/description.art'), {
+                        description: $$('div.khl-article-page-storybody').html(),
+                    });
+
+                item.title = title;
+                item.description = description;
+                item.category = $$('a.badge[data-id]')
+                    .toArray()
+                    .map((c) => $$(c).text());
+                item.author = $$('div.authorDetails a span b').text();
+                item.content = {
+                    html: description,
+                    text: $$('div.khl-article-page-storybody').text(),
+                };
+                item.language = language;
+
+                return item;
+            })
+        )
+    );
+
+    const image = new URL($('a.navbar-brand img').prop('src'), rootUrl).href;
+
+    return {
+        title: $('title').text(),
+        description: $('meta[property="og:description"]').prop('content'),
+        link: currentUrl,
+        item: items,
+        allowEmpty: true,
+        image,
+        author: $('meta[property="og:site_name"]').prop('content'),
+        language,
+    };
+};
+
+export const route: Route = {
+    path: '/:category{.+}?',
+    name: 'Articles',
+    url: 'accessbriefing.com',
+    maintainers: ['nczitzk'],
+    handler,
+    example: '/accessbriefing/latest/news',
+    parameters: { category: 'Category, Latest News by default' },
+    description: `:::tip
+  If you subscribe to [Latest News](https://www.accessbriefing.com/latest/news)，where the URL is \`https://www.accessbriefing.com/latest/news\`, extract the part \`https://www.accessbriefing.com/\` to the end, and use it as the parameter to fill in. Therefore, the route will be [\`/accessbriefing/latest/news\`](https://rsshub.app/accessbriefing/latest/news).
+  :::
+
+  #### Latest
+
+  | Category                                                                               | ID                                                                                              |
+  | -------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
+  | [News](https://www.accessbriefing.com/latest/news)                                     | [latest/news](https://rsshub.app/target/site/latest/news)                                       |
+  | [Products & Technology](https://www.accessbriefing.com/latest/products-and-technology) | [latest/products-and-technology](https://rsshub.app/target/site/latest/products-and-technology) |
+  | [Rental News](https://www.accessbriefing.com/latest/rental-news)                       | [latest/rental-news](https://rsshub.app/target/site/latest/rental-news)                         |
+  | [People](https://www.accessbriefing.com/latest/people)                                 | [latest/people](https://rsshub.app/target/site/latest/people)                                   |
+  | [Regualtions & Safety](https://www.accessbriefing.com/latest/regualtions-safety)       | [latest/regualtions-safety](https://rsshub.app/target/site/latest/regualtions-safety)           |
+  | [Finance](https://www.accessbriefing.com/latest/finance)                               | [latest/finance](https://rsshub.app/target/site/latest/finance)                                 |
+  | [Sustainability](https://www.accessbriefing.com/latest/sustainability)                 | [latest/sustainability](https://rsshub.app/target/site/latest/sustainability)                   |
+
+  #### Insight
+
+  | Category                                                                          | ID                                                                                        |
+  | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- |
+  | [Interviews](https://www.accessbriefing.com/insight/interviews)                   | [insight/interviews](https://rsshub.app/target/site/insight/interviews)                   |
+  | [Longer reads](https://www.accessbriefing.com/insight/longer-reads)               | [insight/longer-reads](https://rsshub.app/target/site/insight/longer-reads)               |
+  | [Videos and podcasts](https://www.accessbriefing.com/insight/videos-and-podcasts) | [insight/videos-and-podcasts](https://rsshub.app/target/site/insight/videos-and-podcasts) |
+  `,
+    categories: ['new-media'],
+
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportRadar: true,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: false,
+    },
+    radar: [
+        {
+            source: ['accessbriefing.com/:category*'],
+            target: '/:category',
+        },
+        {
+            title: 'Latest - News',
+            source: ['accessbriefing.com/latest/news'],
+            target: '/latest/news',
+        },
+        {
+            title: 'Latest - Products & Technology',
+            source: ['accessbriefing.com/latest/products-and-technology'],
+            target: '/latest/products-and-technology',
+        },
+        {
+            title: 'Latest - Rental News',
+            source: ['accessbriefing.com/latest/rental-news'],
+            target: '/latest/rental-news',
+        },
+        {
+            title: 'Latest - People',
+            source: ['accessbriefing.com/latest/people'],
+            target: '/latest/people',
+        },
+        {
+            title: 'Latest - Regualtions & Safety',
+            source: ['accessbriefing.com/latest/regualtions-safety'],
+            target: '/latest/regualtions-safety',
+        },
+        {
+            title: 'Latest - Finance',
+            source: ['accessbriefing.com/latest/finance'],
+            target: '/latest/finance',
+        },
+        {
+            title: 'Latest - Sustainability',
+            source: ['accessbriefing.com/latest/sustainability'],
+            target: '/latest/sustainability',
+        },
+        {
+            title: 'Insight - Interviews',
+            source: ['accessbriefing.com/insight/interviews'],
+            target: '/insight/interviews',
+        },
+        {
+            title: 'Insight - Longer reads',
+            source: ['accessbriefing.com/insight/longer-reads'],
+            target: '/insight/longer-reads',
+        },
+        {
+            title: 'Insight - Videos and podcasts',
+            source: ['accessbriefing.com/insight/videos-and-podcasts'],
+            target: '/insight/videos-and-podcasts',
+        },
+    ],
+};
diff --git a/lib/routes/accessbriefing/namespace.ts b/lib/routes/accessbriefing/namespace.ts
@@ -0,0 +1,8 @@
+import type { Namespace } from '@/types';
+
+export const namespace: Namespace = {
+    name: 'Access Briefing',
+    url: 'accessbriefing.com',
+    categories: ['new-media'],
+    description: '',
+};
diff --git a/lib/routes/accessbriefing/templates/description.art b/lib/routes/accessbriefing/templates/description.art
@@ -0,0 +1,27 @@
+{{ if images }}
+  {{ each images image }}
+    {{ if image?.src }}
+      <figure>
+        <img
+          {{ if image.alt }}
+            alt="{{ image.alt }}"
+          {{ /if }}
+          {{ if image.width }}
+            alt="{{ image.width }}"
+          {{ /if }}
+          {{ if image.height }}
+            alt="{{ image.height }}"
+          {{ /if }}
+        src="{{ image.src }}">
+      </figure>
+    {{ /if }}
+  {{ /each }}
+{{ /if }}
+
+{{ if intro }}
+  <blockquote>{{ intro }}</blockquote>
+{{ /if }}
+
+{{ if description }}
+  {{@ description }}
+{{ /if }}
diff --git a/lib/routes/bbc/index.ts b/lib/routes/bbc/index.ts
@@ -1,9 +1,9 @@
 import { Route } from '@/types';
 import cache from '@/utils/cache';
-import got from '@/utils/got';
 import parser from '@/utils/rss-parser';
 import { load } from 'cheerio';
 import utils from './utils';
+import ofetch from '@/utils/ofetch';
 
 export const route: Route = {
     path: '/:site?/:channel?',
@@ -61,14 +61,11 @@ async function handler(ctx) {
     const items = await Promise.all(
         feed.items.map((item) =>
             cache.tryGet(item.link, async () => {
-                const response = await got({
-                    method: 'get',
-                    url: item.link,
-                });
+                const response = await ofetch(item.link);
 
-                const $ = load(response.data);
+                const $ = load(response);
 
-                const description = response.request.options.url.pathname.startsWith('/news/av') ? item.content : utils.ProcessFeed($);
+                const description = new URL(item.link).pathname.startsWith('/news/av') ? item.content : utils.ProcessFeed($);
 
                 let section = 'sport';
                 const urlSplit = item.link.split('/');

diff --git a/lib/routes/bing/daily-wallpaper.ts b/lib/routes/bing/daily-wallpaper.ts
@@ -1,5 +1,7 @@
 import { Route } from '@/types';
-import got from '@/utils/got';
+import ofetch from '@/utils/ofetch';
+import { parseDate } from '@/utils/parse-date';
+import timezone from '@/utils/timezone';
 
 export const route: Route = {
     path: '/',
@@ -9,32 +11,31 @@ export const route: Route = {
             target: '',
         },
     ],
-    name: 'Unknown',
+    name: '每日壁纸',
     maintainers: ['FHYunCai'],
     handler,
     url: 'cn.bing.com/',
 };
 
 async function handler(ctx) {
-    const response = await got({
-        method: 'get',
-        prefixUrl: 'https://cn.bing.com',
-        url: 'HPImageArchive.aspx',
-        searchParams: {
+    const response = await ofetch('HPImageArchive.aspx', {
+        baseURL: 'https://cn.bing.com',
+        query: {
             format: 'js',
             idx: 0,
             n: ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 7,
             mkt: 'zh-CN',
         },
     });
-    const data = response.data;
+    const data = response;
     return {
         title: 'Bing每日壁纸',
         link: 'https://cn.bing.com/',
         item: data.images.map((item) => ({
             title: item.copyright,
             description: `<img src="https://cn.bing.com${item.url}">`,
             link: item.copyrightlink,
+            pubDate: timezone(parseDate(item.fullstartdate), 0),
         })),
     };
 }