-
Notifications
You must be signed in to change notification settings - Fork 284
Expand file tree
/
Copy pathbrowser_session.js
More file actions
338 lines (317 loc) · 12.7 KB
/
browser_session.js
File metadata and controls
338 lines (317 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
'use strict'; /*jslint node:true es9:true*/
import * as playwright from 'playwright';
import {Aria_snapshot_filter} from './aria_snapshot_filter.js';
export class Browser_session {
constructor({cdp_endpoint}){
this.cdp_endpoint = cdp_endpoint;
this._domainSessions = new Map();
this._currentDomain = 'default';
this._dom_refs = new Set();
}
_getDomain(url){
try {
const urlObj = new URL(url);
return urlObj.hostname;
} catch(e){
console.error(`Error extracting domain from ${url}:`, e);
return 'default';
}
}
async _getDomainSession(domain, {log}={}){
if (!this._domainSessions.has(domain))
{
this._domainSessions.set(domain, {
browser: null,
page: null,
browserClosed: true,
requests: new Map()
});
}
return this._domainSessions.get(domain);
}
async get_browser({log, domain='default'}={}){
try {
const session = await this._getDomainSession(domain, {log});
if (session.browser)
{
try { await session.browser.contexts(); }
catch(e){
log?.(`Browser connection lost for domain ${domain} (${e.message}), `
+`reconnecting...`);
session.browser = null;
session.page = null;
session.browserClosed = true;
}
}
if (!session.browser)
{
log?.(`Connecting to Bright Data Scraping Browser for domain ${domain}.`);
session.browser = await playwright.chromium.connectOverCDP(
this.cdp_endpoint);
session.browserClosed = false;
session.browser.on('disconnected', ()=>{
log?.(`Browser disconnected for domain ${domain}`);
session.browser = null;
session.page = null;
session.browserClosed = true;
});
log?.(`Connected to Bright Data Scraping Browser for domain ${domain}`);
}
return session.browser;
} catch(e){
console.error(`Error connecting to browser for domain ${domain}:`, e);
const session = this._domainSessions.get(domain);
if (session)
{
session.browser = null;
session.page = null;
session.browserClosed = true;
}
throw e;
}
}
async get_page({url=null}={}){
if (url)
{
this._currentDomain = this._getDomain(url);
}
const domain = this._currentDomain;
try {
const session = await this._getDomainSession(domain);
if (session.browserClosed || !session.page)
{
const browser = await this.get_browser({domain});
const existingContexts = browser.contexts();
if (existingContexts.length === 0)
{
const context = await browser.newContext();
session.page = await context.newPage();
}
else
{
const existingPages = existingContexts[0]?.pages();
if (existingPages && existingPages.length > 0)
session.page = existingPages[0];
else
session.page = await existingContexts[0].newPage();
}
session.page.on('request', request=>
session.requests.set(request, null));
session.page.on('response', response=>
session.requests.set(response.request(), response));
session.browserClosed = false;
session.page.once('close', ()=>{
session.page = null;
});
}
return session.page;
} catch(e){
console.error(`Error getting page for domain ${domain}:`, e);
const session = this._domainSessions.get(domain);
if (session)
{
session.browser = null;
session.page = null;
session.browserClosed = true;
}
throw e;
}
}
async capture_snapshot({filtered=true}={}){
const page = await this.get_page();
try {
const full_snapshot = await page._snapshotForAI();
if (!filtered)
{
return {
url: page.url(),
title: await page.title(),
aria_snapshot: full_snapshot,
};
}
const filtered_snapshot = Aria_snapshot_filter.filter_snapshot(
full_snapshot);
const dom_snapshot = await page.evaluate(()=>{
const selectors = [
'a[href]', 'button', 'input', 'select', 'textarea',
'option', '.radio-item', '[role]', '[tabindex]',
'[onclick]', '[data-spm-click]', '[data-click]',
'[data-action]', '[data-spm-anchor-id]',
'[aria-pressed]', '[aria-label]', '[aria-haspopup]'
];
const nodes = Array.from(document.querySelectorAll(
selectors.join(',')));
const elements = [];
let counter = 0;
const collapse = text => (text || '')
.replace(/\s+/g, ' ').trim();
const get_labelledby = el=>{
const ids = (el.getAttribute('aria-labelledby') || '')
.split(/\s+/);
return ids.map(id=>{
const ref = document.getElementById(id);
return ref ? collapse(ref.innerText
|| ref.textContent || '') : '';
}).filter(Boolean).join(' ');
};
const get_label_for = el=>{
const id = el.id && el.id.trim();
if (!id)
return '';
const lbl = document.querySelector(
`label[for="${CSS.escape(id)}"]`);
return lbl ? collapse(lbl.innerText
|| lbl.textContent || '') : '';
};
const is_intrinsic = el=>{
const tag = el.tagName.toLowerCase();
if (['a', 'input', 'button', 'select', 'textarea',
'option'].includes(tag))
{
return true;
}
const role = (el.getAttribute('role') || '')
.toLowerCase();
if (['button', 'link', 'radio', 'option', 'tab',
'checkbox', 'menuitem'].includes(role))
{
return true;
}
if (el.classList.contains('radio-item'))
return true;
return el.hasAttribute('onclick')
|| el.hasAttribute('data-click')
|| el.hasAttribute('data-action')
|| el.hasAttribute('data-spm-click')
|| el.hasAttribute('data-spm-anchor-id');
};
const is_clickable = el=>{
const style = window.getComputedStyle(el);
if (style.display=='none' || style.visibility=='hidden'
|| style.pointerEvents=='none')
{
return false;
}
const rect = el.getBoundingClientRect();
if (!rect || rect.width==0 || rect.height==0)
return false;
const center_x = rect.left + rect.width/2;
const center_y = rect.top + rect.height/2;
if (center_x<0 || center_x>window.innerWidth
|| center_y<0 || center_y>window.innerHeight)
{
return false;
}
const top_el = document.elementFromPoint(center_x,
center_y);
if (top_el && (top_el==el || top_el.contains(el)
|| el.contains(top_el)))
{
return true;
}
return is_intrinsic(el);
};
for (const el of nodes)
{
if (!is_clickable(el))
continue;
let name = collapse(el.getAttribute('aria-label'))
|| collapse(get_labelledby(el))
|| collapse(el.getAttribute('title'))
|| collapse(el.getAttribute('alt'))
|| collapse(el.getAttribute('placeholder'))
|| collapse(get_label_for(el));
if (!name)
name = collapse(el.innerText
|| el.textContent || '');
if (name.length>80)
name = name.slice(0, 77)+'...';
const url = (el.href || el.getAttribute('data-url') || '')
.toString();
if (!name && !url)
continue;
if (!el.dataset.fastmcpRef)
el.dataset.fastmcpRef = `dom-${++counter}`;
elements.push({
ref: el.dataset.fastmcpRef,
role: el.getAttribute('role')
|| el.tagName.toLowerCase(),
name,
url,
});
}
return elements;
});
this._dom_refs = new Set(dom_snapshot.map(el=>el.ref));
return {
url: page.url(),
title: await page.title(),
aria_snapshot: filtered_snapshot,
dom_snapshot: Aria_snapshot_filter.format_dom_elements(
dom_snapshot),
};
} catch(e){
throw new Error(`Error capturing ARIA snapshot: ${e.message}`);
}
}
async ref_locator({element, ref}){
const page = await this.get_page();
try {
if (this._dom_refs.has(ref))
{
return page.locator(`[data-fastmcp-ref="${ref}"]`)
.first().describe(element);
}
const snapshot = await page._snapshotForAI();
if (!snapshot.includes(`[ref=${ref}]`))
throw new Error('Ref '+ref+' not found in the current page '
+'snapshot. Try capturing new snapshot.');
return page.locator(`aria-ref=${ref}`).describe(element);
} catch(e){
throw new Error(`Error creating ref locator for ${element} with ref ${ref}: ${e.message}`);
}
}
async get_requests(){
const domain = this._currentDomain;
const session = await this._getDomainSession(domain);
return session.requests;
}
async clear_requests(){
const domain = this._currentDomain;
const session = await this._getDomainSession(domain);
session.requests.clear();
}
async close(domain=null){
if (domain){
const session = this._domainSessions.get(domain);
if (session && session.browser)
{
try { await session.browser.close(); }
catch(e){ console.error(`Error closing browser for domain ${domain}:`, e); }
session.browser = null;
session.page = null;
session.browserClosed = true;
session.requests.clear();
this._domainSessions.delete(domain);
}
}
else {
for (const [domain, session] of this._domainSessions.entries()) {
if (session.browser)
{
try { await session.browser.close(); }
catch(e){ console.error(`Error closing browser for domain ${domain}:`, e); }
session.browser = null;
session.page = null;
session.browserClosed = true;
session.requests.clear();
}
}
this._domainSessions.clear();
}
if (!domain)
{
this._currentDomain = 'default';
}
}
}