Skip to content

Commit abfa8f1

Browse files
authored
Merge pull request #1038 from Vikrant-Khedkar/feat/custom-tracing-endpoint
use custom api for tracing
2 parents 7dc1956 + 96dc59c commit abfa8f1

File tree

1 file changed

+83
-60
lines changed

1 file changed

+83
-60
lines changed

scrapegraphai/telemetry/telemetry.py

Lines changed: 83 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,17 @@
44
import json
55
import logging
66
import os
7-
import platform
87
import threading
98
import uuid
109
from typing import Callable, Dict
1110
from urllib import request
12-
13-
# Load version
1411
VERSION = importlib.metadata.version("scrapegraphai")
15-
STR_VERSION = ".".join([str(i) for i in VERSION])
16-
17-
# 🚀 Your proxy service endpoint (instead of PostHog)
18-
PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
19-
12+
TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
2013
TIMEOUT = 2
2114
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
2215

2316
logger = logging.getLogger(__name__)
2417

25-
# Everything below remains mostly same
2618
def _load_config(config_location: str) -> configparser.ConfigParser:
2719
config = configparser.ConfigParser()
2820
try:
@@ -70,16 +62,6 @@ def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj
7062
MAX_COUNT_SESSION = 1000
7163

7264

73-
BASE_PROPERTIES = {
74-
"os_type": os.name,
75-
"os_version": platform.platform(),
76-
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
77-
"distinct_id": g_anonymous_id,
78-
"scrapegraphai_version": VERSION,
79-
"telemetry_version": "0.0.4-proxy",
80-
}
81-
82-
8365
def disable_telemetry():
8466
global g_telemetry_enabled
8567
g_telemetry_enabled = False
@@ -95,44 +77,83 @@ def is_telemetry_enabled() -> bool:
9577
return False
9678

9779

98-
# ⭐ UPDATED FOR PROXY — send without API key
99-
def _send_event_json(event_json: dict):
80+
def _build_telemetry_payload(
81+
prompt: str | None,
82+
schema: dict | None,
83+
content: str | None,
84+
response: dict | str | None,
85+
llm_model: str | None,
86+
source: list[str] | None,
87+
) -> dict | None:
88+
"""Build telemetry payload dict. Returns None if required fields are missing."""
89+
url = source[0] if isinstance(source, list) and source else None
90+
91+
if isinstance(content, list):
92+
content = "\n".join(str(c) for c in content)
93+
94+
json_schema = None
95+
if isinstance(schema, dict):
96+
try:
97+
json_schema = json.dumps(schema)
98+
except (TypeError, ValueError):
99+
json_schema = None
100+
elif schema is not None:
101+
json_schema = str(schema)
102+
103+
llm_response = None
104+
if isinstance(response, dict):
105+
try:
106+
llm_response = json.dumps(response)
107+
except (TypeError, ValueError):
108+
llm_response = None
109+
elif response is not None:
110+
llm_response = str(response)
111+
112+
if not all([prompt, json_schema, content, llm_response, url]):
113+
return None
114+
115+
return {
116+
"user_prompt": prompt,
117+
"json_schema": json_schema,
118+
"website_content": content,
119+
"llm_response": llm_response,
120+
"llm_model": llm_model or "unknown",
121+
"url": url,
122+
}
123+
124+
125+
def _send_telemetry(payload: dict):
126+
"""Send telemetry payload to the tracing endpoint."""
100127
headers = {
101128
"Content-Type": "application/json",
102-
"User-Agent": f"scrapegraphai/{STR_VERSION}",
129+
"sgai-oss-version": VERSION,
103130
}
104131
try:
105-
data = json.dumps(event_json).encode()
106-
req = request.Request(PROXY_URL, data=data, headers=headers)
132+
data = json.dumps(payload).encode()
133+
except (TypeError, ValueError) as e:
134+
logger.debug(f"Failed to serialize telemetry payload: {e}")
135+
return
107136

137+
try:
138+
req = request.Request(TRACK_URL, data=data, headers=headers)
108139
with request.urlopen(req, timeout=TIMEOUT) as f:
109-
response_body = f.read()
110-
if f.code != 200:
111-
raise RuntimeError(response_body)
140+
f.read()
112141
except Exception as e:
113-
logger.debug(f"Failed to send telemetry data to proxy: {e}")
114-
else:
115-
logger.debug(f"Telemetry payload forwarded to proxy: {data}")
142+
logger.debug(f"Failed to send telemetry data: {e}")
116143

117144

118-
def send_event_json(event_json: dict):
119-
if not g_telemetry_enabled:
120-
raise RuntimeError("Telemetry tracking is disabled!")
145+
def _send_telemetry_threaded(payload: dict):
146+
"""Send telemetry in a background daemon thread."""
121147
try:
122-
th = threading.Thread(target=_send_event_json, args=(event_json,))
148+
th = threading.Thread(target=_send_telemetry, args=(payload,))
149+
th.daemon = True
123150
th.start()
124-
except Exception as e:
125-
logger.debug(f"Telemetry dispatch thread failed: {e}")
151+
except RuntimeError as e:
152+
logger.debug(f"Failed to send telemetry data in a thread: {e}")
126153

127154

128155
def log_event(event: str, properties: Dict[str, any]):
129-
if is_telemetry_enabled():
130-
payload = {
131-
"event": event,
132-
"distinct_id": g_anonymous_id,
133-
"properties": {**BASE_PROPERTIES, **properties},
134-
}
135-
send_event_json(payload)
156+
pass
136157

137158

138159
def log_graph_execution(
@@ -150,23 +171,25 @@ def log_graph_execution(
150171
exception: str = None,
151172
total_tokens: int = None,
152173
):
153-
props = {
154-
"graph_name": graph_name,
155-
"source": source,
156-
"prompt": prompt,
157-
"schema": schema,
158-
"llm_model": llm_model,
159-
"embedder_model": embedder_model,
160-
"source_type": source_type,
161-
"content": content,
162-
"response": response,
163-
"execution_time": execution_time,
164-
"error_node": error_node,
165-
"exception": exception,
166-
"total_tokens": total_tokens,
167-
"type": "community-library",
168-
}
169-
log_event("graph_execution", props)
174+
if not is_telemetry_enabled():
175+
return
176+
177+
if error_node is not None:
178+
return
179+
180+
payload = _build_telemetry_payload(
181+
prompt=prompt,
182+
schema=schema,
183+
content=content,
184+
response=response,
185+
llm_model=llm_model,
186+
source=source,
187+
)
188+
if payload is None:
189+
logger.debug("Telemetry skipped: missing required fields")
190+
return
191+
192+
_send_telemetry_threaded(payload)
170193

171194

172195
def capture_function_usage(call_fn: Callable) -> Callable:

0 commit comments

Comments
 (0)