1
1
"""
2
2
Tools to let LLMs control a browser.
3
3
"""
4
-
5
4
import atexit
5
+ import logging
6
6
import urllib .parse
7
7
from dataclasses import dataclass
8
- from typing import Optional
8
+ from typing import Literal , Optional
9
9
10
10
from playwright .sync_api import ElementHandle , Page , sync_playwright
11
11
12
12
_p = None
13
+ logger = logging .getLogger (__name__ )
14
+
15
+ EngineType = Literal ["google" , "duckduckgo" ]
13
16
14
17
15
18
def get_browser ():
@@ -43,10 +46,8 @@ def load_page(url: str) -> Page:
43
46
return page
44
47
45
48
46
- def search (query : str , engine : str = "google" ) -> str :
47
- """
48
- Search for a query on a search engine.
49
- """
49
+ def search (query : str , engine : EngineType = "google" ) -> str :
50
+ """Search for a query on a search engine."""
50
51
if engine == "google" :
51
52
return _search_google (query )
52
53
elif engine == "duckduckgo" :
@@ -56,19 +57,16 @@ def search(query: str, engine: str = "google") -> str:
56
57
57
58
58
59
def _search_google (query : str ) -> str :
59
- """
60
- Search for a query on Google.
61
- """
62
60
query = urllib .parse .quote (query )
63
61
url = f"https://www.google.com/search?q={ query } &hl=en"
64
62
page = load_page (url )
65
63
66
64
els = _list_clickable_elements (page )
67
65
for el in els :
68
- print (f"{ el ['type' ]} : { el ['text' ]} " )
69
- if "Accept all" in el [ " text" ] :
70
- el [ " element" ] .click ()
71
- print ("Accepted terms" )
66
+ # print(f"{el['type']}: {el['text']}")
67
+ if "Accept all" in el . text :
68
+ el . element .click ()
69
+ logger . debug ("Accepted Google terms" )
72
70
break
73
71
74
72
# list results
@@ -81,11 +79,7 @@ def _search_duckduckgo(query: str) -> str:
81
79
url = f"https://duckduckgo.com/?q={ query } "
82
80
page = load_page (url )
83
81
84
- el = page .query_selector (".react-results--main" )
85
- if el :
86
- return el .inner_text ()
87
- else :
88
- return "Error: no results found"
82
+ return _list_results_duckduckgo (page )
89
83
90
84
91
85
@dataclass
@@ -105,21 +99,21 @@ def from_element(cls, element: ElementHandle):
105
99
name = element .evaluate ("el => el.name" ),
106
100
href = element .evaluate ("el => el.href" ),
107
101
element = element ,
102
+ # FIXME: is this correct?
108
103
selector = element .evaluate ("el => el.selector" ),
109
104
)
110
105
111
106
112
- def _list_input_elements (page ):
113
- elements = []
114
-
107
+ def _list_input_elements (page ) -> list [Element ]:
115
108
# List all input elements
109
+ elements = []
116
110
inputs = page .query_selector_all ("input" )
117
- print ("Input Elements:" )
118
111
for i , input_element in enumerate (inputs ):
119
112
elements .append (Element .from_element (input_element ))
113
+ return elements
120
114
121
115
122
- def _list_clickable_elements (page , selector = None ) -> list [dict ]:
116
+ def _list_clickable_elements (page , selector = None ) -> list [Element ]:
123
117
elements = []
124
118
125
119
# filter by selector
@@ -131,25 +125,17 @@ def _list_clickable_elements(page, selector=None) -> list[dict]:
131
125
# List all clickable buttons
132
126
clickable = page .query_selector_all (selector )
133
127
for i , el in enumerate (clickable ):
134
- tag_name = el .evaluate ("el => el.tagName" )
135
- text = el .evaluate ("el => el.innerText" )
136
- href = el .evaluate ("el => el.href" )
137
- elements .append (
138
- {
139
- "type" : tag_name ,
140
- "text" : text ,
141
- "href" : href ,
142
- "element" : el ,
143
- "selector" : f"{ tag_name } :has-text('{ text } ')" ,
144
- }
145
- )
128
+ # "selector": f"{tag_name}:has-text('{text}')",
129
+ elements .append (Element .from_element (el ))
146
130
147
131
return elements
148
132
149
133
150
- def _list_results_google (page ):
134
+ def _list_results_google (page ) -> str :
151
135
# fetch the results (elements with .g class)
152
136
results = page .query_selector_all (".g" )
137
+ if not results :
138
+ return "Error: something went wrong with the search."
153
139
154
140
# list results
155
141
s = "Results:"
@@ -160,5 +146,31 @@ def _list_results_google(page):
160
146
title = h3 .inner_text ()
161
147
result .query_selector ("span" ).inner_text ()
162
148
s += f"\n { i + 1 } . { title } ({ url } )"
149
+ return s
150
+
151
+
152
+ def _list_results_duckduckgo (page ) -> str :
153
+ # fetch the results
154
+ results = page .query_selector (".react-results--main" )
155
+ results = results .query_selector_all ("article" )
156
+ if not results :
157
+ return "Error: something went wrong with the search."
163
158
159
+ # list results
160
+ s = "Results:"
161
+ for i , result in enumerate (results ):
162
+ url = result .query_selector ("a" ).evaluate ("el => el.href" )
163
+ h2 = result .query_selector ("h2" )
164
+ if h2 :
165
+ title = h2 .inner_text ()
166
+ result .query_selector ("span" ).inner_text ()
167
+ s += f"\n { i + 1 } . { title } ({ url } )"
164
168
return s
169
+
170
+
171
+ if __name__ == "__main__" :
172
+ print ("DuckDuckGo:" )
173
+ print (search ("test" , engine = "duckduckgo" ))
174
+ print ()
175
+ print ("Google:" )
176
+ print (search ("test" , engine = "google" ))
0 commit comments