Skip to content

Commit 6a38334

Browse files
committed
Chore: process line stream
1 parent e2920de commit 6a38334

12 files changed

+55
-31
lines changed

Build/build-cdn-download-conf.ts

+2-6
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,14 @@ import { task } from './trace';
55
import { SHARED_DESCRIPTION } from './constants/description';
66
import { appendArrayInPlace } from './lib/append-array-in-place';
77
import { SOURCE_DIR } from './constants/dir';
8-
import { processLine } from './lib/process-line';
98
import { DomainsetOutput } from './lib/create-file';
109
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
1110

1211
const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
1312
const trie = new HostnameTrie();
1413

15-
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat')) {
16-
const tmp = processLine(line);
17-
if (tmp) {
18-
trie.add(tmp);
19-
}
14+
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
15+
trie.add(line);
2016
}
2117

2218
/**

Build/build-chn-cidr.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
2-
import { processLineFromReadline } from './lib/process-line';
32
import { task } from './trace';
43

54
import { contains as containsCidr, exclude as excludeCidr } from 'fast-cidr-tools';
@@ -19,8 +18,8 @@ const PROBE_CHN_CIDR_V4 = [
1918
export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail(
2019
async function getChnCidr() {
2120
const [_cidr4, cidr6] = await Promise.all([
22-
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt').then(processLineFromReadline),
23-
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt').then(processLineFromReadline)
21+
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt', true).then(Array.fromAsync<string>),
22+
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt', true).then(Array.fromAsync<string>)
2423
]);
2524

2625
const cidr4 = excludeCidr(

Build/build-reject-ip-list.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $f
1515
const ipv4: string[] = [];
1616
const ipv6: string[] = [];
1717

18-
for await (const line of createReadlineInterfaceFromResponse(resp)) {
18+
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
1919
if (line.startsWith('bogus-nxdomain=')) {
2020
const ip = line.slice(15).trim();
2121
if (isProbablyIpv4(ip)) {

Build/build-telegram-cidr.ts

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// @ts-check
22
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
33
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
4-
import { processLine } from './lib/process-line';
54
import { task } from './trace';
65
import { SHARED_DESCRIPTION } from './constants/description';
76
import { createMemoizedPromise } from './lib/memo-promise';
@@ -16,10 +15,7 @@ export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
1615
const ipcidr: string[] = [];
1716
const ipcidr6: string[] = [];
1817

19-
for await (const line of createReadlineInterfaceFromResponse(resp)) {
20-
const cidr = processLine(line);
21-
if (!cidr) continue;
22-
18+
for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
2319
const [subnet] = cidr.split('/');
2420
if (isProbablyIpv4(subnet)) {
2521
ipcidr.push(cidr);

Build/lib/aho-corasick.bench.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { fetchRemoteTextByLine } from './fetch-text-by-line';
2-
import { processLineFromReadline } from './process-line';
32

43
import createKeywordFilter from './aho-corasick';
54

@@ -36,7 +35,7 @@ if (require.main === module) {
3635
(async () => {
3736
const { bench, group, run } = await import('mitata');
3837

39-
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt'));
38+
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt', true));
4039
console.log({ dataLen: data.length });
4140
const keywordsSet = [
4241
'!',

Build/lib/fetch-text-by-line.ts

+10-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { Readable } from 'node:stream';
33
import type { FileHandle } from 'node:fs/promises';
44
import readline from 'node:readline';
55

6-
import { TextLineStream } from './text-line-transform-stream';
6+
import { ProcessLineStream, TextLineStream } from './text-line-transform-stream';
77
import type { ReadableStream } from 'node:stream/web';
88
import { TextDecoderStream } from 'node:stream/web';
99
import { processLine } from './process-line';
@@ -40,7 +40,7 @@ function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | U
4040
return resp.body;
4141
}
4242

43-
export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse) => AsyncIterable<string>) = (resp) => {
43+
export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream<string>) = (resp, processLine = false) => {
4444
const stream = ensureResponseBody(resp);
4545

4646
const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
@@ -51,13 +51,18 @@ export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | Un
5151
: Readable.toWeb(new Readable().wrap(stream))
5252
);
5353

54-
return webStream
54+
const resultStream = webStream
5555
.pipeThrough(new TextDecoderStream())
5656
.pipeThrough(new TextLineStream());
57+
58+
if (processLine) {
59+
return resultStream.pipeThrough(new ProcessLineStream());
60+
}
61+
return resultStream;
5762
};
5863

59-
export function fetchRemoteTextByLine(url: string) {
60-
return $fetch(url).then(createReadlineInterfaceFromResponse);
64+
export function fetchRemoteTextByLine(url: string, processLine = false): Promise<AsyncIterable<string>> {
65+
return $fetch(url).then(resp => createReadlineInterfaceFromResponse(resp, processLine));
6166
}
6267

6368
export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {

Build/lib/parse-dnsmasq.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export function extractDomainsFromFelixDnsmasq(line: string): string | null {
1919
export async function parseFelixDnsmasqFromResp(resp: NodeFetchResponse | UndiciResponseData | Response): Promise<string[]> {
2020
const results: string[] = [];
2121

22-
for await (const line of createReadlineInterfaceFromResponse(resp)) {
22+
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
2323
const domain = extractDomainsFromFelixDnsmasq(line);
2424
if (domain && isDomainLoose(domain)) {
2525
results.push(domain);

Build/lib/process-line.ts

+19-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ export function processLine(line: string): string | null {
1111
const line_0: string = trimmed[0];
1212

1313
if (
14-
line_0 === '#'
15-
|| line_0 === ' '
14+
line_0 === ' '
1615
|| line_0 === '\r'
1716
|| line_0 === '\n'
1817
|| line_0 === '!'
@@ -21,6 +20,24 @@ export function processLine(line: string): string | null {
2120
return null;
2221
}
2322

23+
if (line_0 === '#') {
24+
if (trimmed[1] !== '#') {
25+
// # Comment
26+
return null;
27+
}
28+
if (trimmed[2] === '#' && trimmed[3] === '#') {
29+
// ################## EOF ##################
30+
return null;
31+
}
32+
/**
33+
* AdGuard Filter can be:
34+
*
35+
* ##.class
36+
* ##tag.class
37+
* ###id
38+
*/
39+
}
40+
2441
return trimmed;
2542
}
2643

Build/lib/set-add-from-array.bench.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import { fetchRemoteTextByLine } from './fetch-text-by-line';
2-
import { processLineFromReadline } from './process-line';
32

43
import { bench, group, run } from 'mitata';
54

65
(async () => {
7-
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
6+
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
87

98
group(() => {
109
bench('setAddFromArray', () => {

Build/lib/stable-sort-domain.bench.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import { fetchRemoteTextByLine } from './fetch-text-by-line';
2-
import { processLineFromReadline } from './process-line';
32
import { sortDomains } from './stable-sort-domain';
43

54
import { bench, group, run } from 'mitata';
65

76
(async () => {
8-
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
7+
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
98

109
group(() => {
1110
bench('sortDomains', () => sortDomains(data));

Build/lib/text-line-transform-stream.ts

+15
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Modified by Sukka (https://skk.moe) to increase compatibility and performance with Bun.
44

55
import { TransformStream } from 'node:stream/web';
6+
import { processLine } from './process-line';
67

78
interface TextLineStreamOptions {
89
/** Allow splitting by solo \r */
@@ -78,3 +79,17 @@ export class TextLineStream extends TransformStream<string, string> {
7879
});
7980
}
8081
}
82+
83+
export class ProcessLineStream extends TransformStream<string, string> {
84+
// private __buf = '';
85+
constructor() {
86+
super({
87+
transform(l, controller) {
88+
const line = processLine(l);
89+
if (line) {
90+
controller.enqueue(line);
91+
}
92+
}
93+
});
94+
}
95+
}

Build/lib/tldts.bench.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import { fetchRemoteTextByLine } from './fetch-text-by-line';
2-
import { processLineFromReadline } from './process-line';
32

43
import { bench, group, run } from 'mitata';
54

65
import * as tldts from 'tldts';
76
import * as tldtsExperimental from 'tldts-experimental';
87

98
(async () => {
10-
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
9+
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));
1110

1211
const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
1312
allowPrivateDomains: false,

0 commit comments

Comments
 (0)