1- #!/usr/bin/env python3
21"""
32Extract human queries from LangSmith runs and export to JSON.
43
1110 LANGSMITH_PROJECT: Optional project name (default: "default")
1211
1312Example usage:
14- python extract_langsmith .py
15- python extract_langsmith .py --days 7 --output queries.json
13+ uv run export_langsmith_human_queries .py
14+ uv run export_langsmith_human_queries .py --days 7 --output queries.json
1615"""
1716
1817from __future__ import annotations
3231load_dotenv ("../.env" )
3332
3433
35- class MessageType (Enum ):
36- """Supported message types in LangChain format."""
37- HUMAN = "human"
38- HUMANMESSAGE = "humanmessage"
39- CONSTRUCTOR = "constructor"
40-
41-
4234@dataclass
4335class RunQueries :
4436 """Container for queries extracted from a single run."""
4537 run_id : str
4638 queries : list [str ]
39+ mcp_mode : bool
4740
4841 def to_dict (self ) -> dict [str , Any ]:
49- return {"run_id" : str ( self .run_id ) , "queries" : self .queries }
42+ return {"run_id" : self .run_id , "queries" : self .queries , "mcp_mode" : self . mcp_mode }
5043
5144
5245@dataclass
5346class Config :
5447 """Script configuration."""
5548 output_path : Path
5649 days_back : int
57- run_name_filter : str
50+ run_name_filters : list [ str ]
5851 project_name : str
5952
6053 @classmethod
@@ -63,83 +56,11 @@ def from_args(cls, args: argparse.Namespace) -> Config:
6356 return cls (
6457 output_path = Path (args .output ),
6558 days_back = args .days ,
66- run_name_filter = args .name ,
59+ run_name_filters = args .names ,
6760 project_name = project_name ,
6861 )
6962
7063
71- class HumanMessageExtractor :
72- """Extracts human messages from LangChain message structures."""
73-
74- HUMAN_MESSAGE_KEYS = ["chat_history" , "messages" , "inputs" , "input" ]
75-
76- @staticmethod
77- def _is_human_message_by_id (message : dict [str , Any ]) -> bool :
78- """Check if message is human type based on ID field."""
79- msg_id = message ["id" ]
80- if isinstance (msg_id , list ) and msg_id :
81- return str (msg_id [- 1 ]).lower () == MessageType .HUMANMESSAGE .value
82- return False
83-
84- @staticmethod
85- def _is_human_message_by_type (message : dict [str , Any ]) -> bool :
86- """Check if message is human type based on type field."""
87- msg_type = str (message ["type" ]).lower ()
88- return msg_type in {MessageType .HUMAN .value , MessageType .HUMANMESSAGE .value }
89-
90- def _process_message_object (self , obj : Any ) -> list [str ]:
91- """Process a single message object and extract human content."""
92- # Check if this is a human message
93- is_human = (
94- self ._is_human_message_by_id (obj ) or
95- self ._is_human_message_by_type (obj )
96- )
97- if not is_human :
98- return []
99-
100- if "kwargs" not in obj or "content" not in obj ["kwargs" ]:
101- raise ValueError (f"Expected kwargs and content in message object: { obj } " )
102-
103- content = obj ["kwargs" ]["content" ]
104- return [content ] if content else []
105-
106- def _process_value (self , value : Any ) -> list [str ]:
107- if not isinstance (value , list ):
108- raise ValueError (f"Expected list, got { type (value )} : { value } " )
109- results = []
110- for item in value :
111- results .extend (self ._process_message_object (item ))
112- return results
113-
114- def extract (self , inputs : dict [str , Any ]) -> list [str ]:
115- """
116- Extract human messages from LangChain inputs.
117-
118- Args:
119- inputs: Dictionary containing message data in various formats
120-
121- Returns:
122- List of unique human message strings in order of appearance
123- """
124- results = []
125-
126- for key in self .HUMAN_MESSAGE_KEYS :
127- if key not in inputs :
128- continue
129-
130- results .extend (self ._process_value (inputs [key ]))
131-
132- # Deduplicate while preserving order
133- seen = set ()
134- unique_results = []
135- for query in results :
136- if query not in seen :
137- unique_results .append (query )
138- seen .add (query )
139-
140- return unique_results
141-
142-
14364class RunDeduplicator :
14465 """Removes runs whose queries are prefixes of other runs."""
14566
@@ -181,7 +102,6 @@ class LangSmithExporter:
181102 def __init__ (self , config : Config ):
182103 self .config = config
183104 self .client = Client ()
184- self .extractor = HumanMessageExtractor ()
185105 self .deduplicator = RunDeduplicator ()
186106
187107 def _get_time_range (self ) -> tuple [datetime , datetime ]:
@@ -193,24 +113,25 @@ def _get_time_range(self) -> tuple[datetime, datetime]:
193113 def fetch_runs (self ) -> list [RunQueries ]:
194114 start_time , end_time = self ._get_time_range ()
195115
116+
117+ filter_clauses = [f'eq(name, "{ name } ")' for name in self .config .run_name_filters ]
196118 query_params = {
197119 "start_time" : start_time ,
198120 "end_time" : end_time ,
199- "filter" : f'eq(name, " { self . config . run_name_filter } " )' ,
121+ "filter" : f'or( { "," . join ( filter_clauses ) } )' ,
200122 "project_name" : self .config .project_name ,
201123 }
202124
203125 runs = []
204- for run in self .client .list_runs (** query_params ):
126+ all_runs = self .client .list_runs (** query_params )
127+ for run in all_runs :
205128 run_data = run .dict ()
206129 inputs = run_data ["inputs" ]
207-
208- queries = self .extractor .extract (inputs )
209- if queries :
210- runs .append (RunQueries (
211- run_id = run_data ["id" ],
212- queries = queries ,
213- ))
130+ query = inputs ["query" ]
131+ chat_history = inputs ["chat_history" ]
132+ user_queries_in_history = [msg ['content' ] for msg in chat_history if msg ["role" ] == "user" ]
133+ full_query = user_queries_in_history + [query ]
134+ runs .append (RunQueries (run_id = str (run_data ["id" ]), queries = full_query , mcp_mode = inputs ["mcp_mode" ]))
214135
215136 return runs
216137
@@ -253,9 +174,10 @@ def parse_arguments() -> argparse.Namespace:
253174 help = "Number of days to look back (default: %(default)s)" ,
254175 )
255176 parser .add_argument (
256- "--name" ,
257- default = "RunnableSequence" ,
258- help = "Filter runs by name (default: %(default)s)" ,
177+ "--names" ,
178+ default = ["RagPipeline" , "RagPipelineStreaming" ],
179+ nargs = "+" ,
180+ help = "Filter runs by names (default: %(default)s)" ,
259181 )
260182
261183 return parser .parse_args ()
0 commit comments