-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper1.js
134 lines (102 loc) · 7.92 KB
/
scraper1.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/**
* Web scraper for toto 4d
*/
const puppeteer = require('puppeteer');
const random_useragent = require('random-useragent');
const { url } = require('./config');
const fs = require('fs');
async function scrapeProduct() {
// console.log("hello");
try {
// Open Browser
// const browser = await puppeteer.launch({ headless: false });
const browser = await puppeteer.launch();
const page = await browser.newPage();
// await page.screenshot({path: 'example.png'});
// Setup brower
await page.setDefaultTimeout(10000);
await page.setViewport({ width: 1200, height: 800 });
await page.setUserAgent(random_useragent.getRandom());
const data = {};
// Get data from website
const drawDate_selector = 'span.dataDD > font > b';
const drawNo_selector = 'span.dataDD > b > font';
const gameType1_selector = 'span.tit4D > font';
const firstPrize_selector = 'span.dataResultA > b > font';
const secondPrize_xpath = '(//*[@class="dataResultA"])[2]/font/b';
const thirdPrize_xpath = '(//*[@class="dataResultA"])[3]/font/b';
await page.goto(url);
await page.waitForSelector(drawDate_selector);
data["drawDate"] = await page.$eval(drawDate_selector, e => e.innerText.split(':')[1].trim());
data["drawNo"] = await page.$eval(drawNo_selector, e => e.innerText.split('.')[1].trim());
const gameTypes = await page.evaluate(() =>
Array.from(document.querySelectorAll('span.tit4D > font')).map(n => n.innerText.replace(/\s/g, ""))
)
// const gameType1 = await page.$eval(gameType1_selector, e => e.innerText.trim().replace(/\s/g, ""));
const gameType1 = gameTypes[0]; //toto4d
data[gameType1] = {}; //toto4d
data[gameType1]["firstPrize"] = await page.$eval(firstPrize_selector, e => e.innerText.trim());
const [secondP] = await page.$x(secondPrize_xpath);
data[gameType1]["secondPrize"] = await (await secondP.getProperty('textContent')).jsonValue();
const [thirdP] = await page.$x(thirdPrize_xpath);
data[gameType1]["thridPrize"] = await (await thirdP.getProperty('textContent')).jsonValue();
data[gameType1]["specialPrize"] = await page.evaluate(() =>
Array.from(document.querySelector('body > div > center > table > tbody > tr:nth-child(5) > td > table:nth-child(2) > tbody').getElementsByTagName('font')).map(n => n.innerText).filter(k => k % 1 === 0).filter(k => k.length > 1));
data[gameType1]["consolationPrize"] = await page.evaluate(() =>
Array.from(document.querySelector('body > div > center > table > tbody > tr:nth-child(5) > td > table:nth-child(3) > tbody').getElementsByTagName('font')).map(n => n.innerText).filter(k => k % 1 === 0).filter(k => k.length > 1));
const gameType2 = gameTypes[1]
const gameType3 = gameTypes[2] //toto5d
data[gameType3] = {} //toto5d
data[gameType3]["firstPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(1) > td:nth-child(3) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType3]["secondPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(1) > td:nth-child(3) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType3]["thirdPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(3) > td:nth-child(2) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType3]["fourthPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(1) > td:nth-child(5) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType3]["fifthPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(2) > td:nth-child(4) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType3]["sixthPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(3) > tbody > tr:nth-child(3) > td:nth-child(4) > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
const gameType4 = gameTypes[3] //toto6d
data[gameType4] = {} //toto6d
data[gameType4]["firstPrize"] = await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(4) > tbody > tr:nth-child(1) > td:nth-child(3) > div > table > tbody > tr > td > div > center > table > tbody > tr > td')).map(n => n.innerText).join('').replace(/\s/g, '')
);
data[gameType4]["secondPrize"] = (await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(4) > tbody > tr:nth-child(2)')).map(n => n.innerText.replaceAll('\t', '').replace('2nd', '').replace('\nor','').trimLeft('\n'))
)
).toString().split('\n').map(i => i.trim());
// data[gameType4]["secondPrize"] = game4_secondPrz.toString().split('\n').map(i => i.trim());
data[gameType4]["thirdPrize"] = (await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(4) > tbody > tr:nth-child(3)')).map(n => n.innerText.replaceAll('\t', '').replace('3rd', '').replace('\nor','').trimLeft('\n'))
)
).toString().split('\n').map(i => i.trim());
data[gameType4]["fourthPrize"] = (await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(4) > tbody > tr:nth-child(4)')).map(n => n.innerText.replaceAll('\t', '').replace('4th', '').replace('\nor','').trimLeft('\n'))
)
).toString().split('\n').map(i => i.trim());
data[gameType4]["fifthPrize"] = (await page.evaluate(() =>
Array.from(document.querySelectorAll('body > div > center > table > tbody > tr:nth-child(14) > td > table:nth-child(4) > tbody > tr:nth-child(5)')).map(n => n.innerText.replaceAll('\t', '').replace('5th', '').replace('\nor','').trimLeft('\n'))
)
).toString().split('\n').map(i => i.trim());
// console.log(data);
// const json = JSON.parse(data);
const outData = JSON.stringify(data);
fs.writeFile('out.json', outData, function(err, result) {
if (err) console.log('error', err);
});
await browser.close();
}catch(error) {
console.log(error);
process.exit(1);
}
}
scrapeProduct();