Skip to content

Commit e94f7f6

Browse files
committed
test: added tests and fixes to browser tool
1 parent 2ee73af commit e94f7f6

File tree

2 files changed

+60
-44
lines changed

2 files changed

+60
-44
lines changed

gptme/tools/browser.py

+48-36
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
"""
22
Tools to let LLMs control a browser.
33
"""
4-
54
import atexit
5+
import logging
66
import urllib.parse
77
from dataclasses import dataclass
8-
from typing import Optional
8+
from typing import Literal, Optional
99

1010
from playwright.sync_api import ElementHandle, Page, sync_playwright
1111

1212
_p = None
13+
logger = logging.getLogger(__name__)
14+
15+
EngineType = Literal["google", "duckduckgo"]
1316

1417

1518
def get_browser():
@@ -43,10 +46,8 @@ def load_page(url: str) -> Page:
4346
return page
4447

4548

46-
def search(query: str, engine: str = "google") -> str:
47-
"""
48-
Search for a query on a search engine.
49-
"""
49+
def search(query: str, engine: EngineType = "google") -> str:
50+
"""Search for a query on a search engine."""
5051
if engine == "google":
5152
return _search_google(query)
5253
elif engine == "duckduckgo":
@@ -56,19 +57,16 @@ def search(query: str, engine: str = "google") -> str:
5657

5758

5859
def _search_google(query: str) -> str:
59-
"""
60-
Search for a query on Google.
61-
"""
6260
query = urllib.parse.quote(query)
6361
url = f"https://www.google.com/search?q={query}&hl=en"
6462
page = load_page(url)
6563

6664
els = _list_clickable_elements(page)
6765
for el in els:
68-
print(f"{el['type']}: {el['text']}")
69-
if "Accept all" in el["text"]:
70-
el["element"].click()
71-
print("Accepted terms")
66+
# print(f"{el['type']}: {el['text']}")
67+
if "Accept all" in el.text:
68+
el.element.click()
69+
logger.debug("Accepted Google terms")
7270
break
7371

7472
# list results
@@ -81,11 +79,7 @@ def _search_duckduckgo(query: str) -> str:
8179
url = f"https://duckduckgo.com/?q={query}"
8280
page = load_page(url)
8381

84-
el = page.query_selector(".react-results--main")
85-
if el:
86-
return el.inner_text()
87-
else:
88-
return "Error: no results found"
82+
return _list_results_duckduckgo(page)
8983

9084

9185
@dataclass
@@ -105,21 +99,21 @@ def from_element(cls, element: ElementHandle):
10599
name=element.evaluate("el => el.name"),
106100
href=element.evaluate("el => el.href"),
107101
element=element,
102+
# FIXME: is this correct?
108103
selector=element.evaluate("el => el.selector"),
109104
)
110105

111106

112-
def _list_input_elements(page):
113-
elements = []
114-
107+
def _list_input_elements(page) -> list[Element]:
115108
# List all input elements
109+
elements = []
116110
inputs = page.query_selector_all("input")
117-
print("Input Elements:")
118111
for i, input_element in enumerate(inputs):
119112
elements.append(Element.from_element(input_element))
113+
return elements
120114

121115

122-
def _list_clickable_elements(page, selector=None) -> list[dict]:
116+
def _list_clickable_elements(page, selector=None) -> list[Element]:
123117
elements = []
124118

125119
# filter by selector
@@ -131,25 +125,17 @@ def _list_clickable_elements(page, selector=None) -> list[dict]:
131125
# List all clickable buttons
132126
clickable = page.query_selector_all(selector)
133127
for i, el in enumerate(clickable):
134-
tag_name = el.evaluate("el => el.tagName")
135-
text = el.evaluate("el => el.innerText")
136-
href = el.evaluate("el => el.href")
137-
elements.append(
138-
{
139-
"type": tag_name,
140-
"text": text,
141-
"href": href,
142-
"element": el,
143-
"selector": f"{tag_name}:has-text('{text}')",
144-
}
145-
)
128+
# "selector": f"{tag_name}:has-text('{text}')",
129+
elements.append(Element.from_element(el))
146130

147131
return elements
148132

149133

150-
def _list_results_google(page):
134+
def _list_results_google(page) -> str:
151135
# fetch the results (elements with .g class)
152136
results = page.query_selector_all(".g")
137+
if not results:
138+
return "Error: something went wrong with the search."
153139

154140
# list results
155141
s = "Results:"
@@ -160,5 +146,31 @@ def _list_results_google(page):
160146
title = h3.inner_text()
161147
result.query_selector("span").inner_text()
162148
s += f"\n{i+1}. {title} ({url})"
149+
return s
150+
151+
152+
def _list_results_duckduckgo(page) -> str:
153+
# fetch the results
154+
results = page.query_selector(".react-results--main")
155+
results = results.query_selector_all("article")
156+
if not results:
157+
return "Error: something went wrong with the search."
163158

159+
# list results
160+
s = "Results:"
161+
for i, result in enumerate(results):
162+
url = result.query_selector("a").evaluate("el => el.href")
163+
h2 = result.query_selector("h2")
164+
if h2:
165+
title = h2.inner_text()
166+
result.query_selector("span").inner_text()
167+
s += f"\n{i+1}. {title} ({url})"
164168
return s
169+
170+
171+
if __name__ == "__main__":
172+
print("DuckDuckGo:")
173+
print(search("test", engine="duckduckgo"))
174+
print()
175+
print("Google:")
176+
print(search("test", engine="google"))

tests/test_browser.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
import pytest
22

3-
try:
4-
# noreorder
5-
import playwright # fmt: skip # noqa: F401
6-
except ImportError:
7-
pytest.skip("playwright not installed", allow_module_level=True)
3+
playwright = pytest.importorskip("playwright")
84

95
# noreorder
106
from gptme.tools.browser import load_page, search # fmt: skip
117

128

139
@pytest.mark.slow
1410
def test_browser():
15-
content = load_page("https://www.google.com/ncr?hl=en")
11+
content = load_page("https://superuserlabs.org")
1612
print(content)
1713

1814

1915
@pytest.mark.slow
20-
def test_search():
21-
content = search("test")
16+
def test_search_duckduckgo():
17+
content = search("test", "duckduckgo")
2218
print(content)
19+
assert "Results:" in content
20+
21+
22+
@pytest.mark.slow
23+
def test_search_google():
24+
content = search("test", "google")
25+
print(content)
26+
assert "Results:" in content

0 commit comments

Comments
 (0)