44import json
55import logging
66import os
7- import platform
87import threading
98import uuid
109from typing import Callable , Dict
1110from urllib import request
12-
13- # Load version
1411VERSION = importlib .metadata .version ("scrapegraphai" )
15- STR_VERSION = "." .join ([str (i ) for i in VERSION ])
16-
17- # 🚀 Your proxy service endpoint (instead of PostHog)
18- PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
19-
12+ TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry"
2013TIMEOUT = 2
2114DEFAULT_CONFIG_LOCATION = os .path .expanduser ("~/.scrapegraphai.conf" )
2215
2316logger = logging .getLogger (__name__ )
2417
25- # Everything below remains mostly same
2618def _load_config (config_location : str ) -> configparser .ConfigParser :
2719 config = configparser .ConfigParser ()
2820 try :
@@ -70,16 +62,6 @@ def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj
7062MAX_COUNT_SESSION = 1000
7163
7264
73- BASE_PROPERTIES = {
74- "os_type" : os .name ,
75- "os_version" : platform .platform (),
76- "python_version" : f"{ platform .python_version ()} /{ platform .python_implementation ()} " ,
77- "distinct_id" : g_anonymous_id ,
78- "scrapegraphai_version" : VERSION ,
79- "telemetry_version" : "0.0.4-proxy" ,
80- }
81-
82-
8365def disable_telemetry ():
8466 global g_telemetry_enabled
8567 g_telemetry_enabled = False
@@ -95,44 +77,83 @@ def is_telemetry_enabled() -> bool:
9577 return False
9678
9779
98- # ⭐ UPDATED FOR PROXY — send without API key
99- def _send_event_json (event_json : dict ):
80+ def _build_telemetry_payload (
81+ prompt : str | None ,
82+ schema : dict | None ,
83+ content : str | None ,
84+ response : dict | str | None ,
85+ llm_model : str | None ,
86+ source : list [str ] | None ,
87+ ) -> dict | None :
88+ """Build telemetry payload dict. Returns None if required fields are missing."""
89+ url = source [0 ] if isinstance (source , list ) and source else None
90+
91+ if isinstance (content , list ):
92+ content = "\n " .join (str (c ) for c in content )
93+
94+ json_schema = None
95+ if isinstance (schema , dict ):
96+ try :
97+ json_schema = json .dumps (schema )
98+ except (TypeError , ValueError ):
99+ json_schema = None
100+ elif schema is not None :
101+ json_schema = str (schema )
102+
103+ llm_response = None
104+ if isinstance (response , dict ):
105+ try :
106+ llm_response = json .dumps (response )
107+ except (TypeError , ValueError ):
108+ llm_response = None
109+ elif response is not None :
110+ llm_response = str (response )
111+
112+ if not all ([prompt , json_schema , content , llm_response , url ]):
113+ return None
114+
115+ return {
116+ "user_prompt" : prompt ,
117+ "json_schema" : json_schema ,
118+ "website_content" : content ,
119+ "llm_response" : llm_response ,
120+ "llm_model" : llm_model or "unknown" ,
121+ "url" : url ,
122+ }
123+
124+
125+ def _send_telemetry (payload : dict ):
126+ """Send telemetry payload to the tracing endpoint."""
100127 headers = {
101128 "Content-Type" : "application/json" ,
102- "User-Agent " : f"scrapegraphai/ { STR_VERSION } " ,
129+ "sgai-oss-version " : VERSION ,
103130 }
104131 try :
105- data = json .dumps (event_json ).encode ()
106- req = request .Request (PROXY_URL , data = data , headers = headers )
132+ data = json .dumps (payload ).encode ()
133+ except (TypeError , ValueError ) as e :
134+ logger .debug (f"Failed to serialize telemetry payload: { e } " )
135+ return
107136
137+ try :
138+ req = request .Request (TRACK_URL , data = data , headers = headers )
108139 with request .urlopen (req , timeout = TIMEOUT ) as f :
109- response_body = f .read ()
110- if f .code != 200 :
111- raise RuntimeError (response_body )
140+ f .read ()
112141 except Exception as e :
113- logger .debug (f"Failed to send telemetry data to proxy: { e } " )
114- else :
115- logger .debug (f"Telemetry payload forwarded to proxy: { data } " )
142+ logger .debug (f"Failed to send telemetry data: { e } " )
116143
117144
118- def send_event_json (event_json : dict ):
119- if not g_telemetry_enabled :
120- raise RuntimeError ("Telemetry tracking is disabled!" )
145+ def _send_telemetry_threaded (payload : dict ):
146+ """Send telemetry in a background daemon thread."""
121147 try :
122- th = threading .Thread (target = _send_event_json , args = (event_json ,))
148+ th = threading .Thread (target = _send_telemetry , args = (payload ,))
149+ th .daemon = True
123150 th .start ()
124- except Exception as e :
125- logger .debug (f"Telemetry dispatch thread failed : { e } " )
151+ except RuntimeError as e :
152+ logger .debug (f"Failed to send telemetry data in a thread : { e } " )
126153
127154
128155def log_event (event : str , properties : Dict [str , any ]):
129- if is_telemetry_enabled ():
130- payload = {
131- "event" : event ,
132- "distinct_id" : g_anonymous_id ,
133- "properties" : {** BASE_PROPERTIES , ** properties },
134- }
135- send_event_json (payload )
156+ pass
136157
137158
138159def log_graph_execution (
@@ -150,23 +171,25 @@ def log_graph_execution(
150171 exception : str = None ,
151172 total_tokens : int = None ,
152173):
153- props = {
154- "graph_name" : graph_name ,
155- "source" : source ,
156- "prompt" : prompt ,
157- "schema" : schema ,
158- "llm_model" : llm_model ,
159- "embedder_model" : embedder_model ,
160- "source_type" : source_type ,
161- "content" : content ,
162- "response" : response ,
163- "execution_time" : execution_time ,
164- "error_node" : error_node ,
165- "exception" : exception ,
166- "total_tokens" : total_tokens ,
167- "type" : "community-library" ,
168- }
169- log_event ("graph_execution" , props )
174+ if not is_telemetry_enabled ():
175+ return
176+
177+ if error_node is not None :
178+ return
179+
180+ payload = _build_telemetry_payload (
181+ prompt = prompt ,
182+ schema = schema ,
183+ content = content ,
184+ response = response ,
185+ llm_model = llm_model ,
186+ source = source ,
187+ )
188+ if payload is None :
189+ logger .debug ("Telemetry skipped: missing required fields" )
190+ return
191+
192+ _send_telemetry_threaded (payload )
170193
171194
172195def capture_function_usage (call_fn : Callable ) -> Callable :
0 commit comments