Skip to content

Commit a50ba81

Browse files
authored
feat: Add Query Insights Infrastructure & Database Layer (#87)
1 parent ec7e07f commit a50ba81

File tree

21 files changed

+1900
-106
lines changed

21 files changed

+1900
-106
lines changed

.gitignore

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ yarn-error.log
1515

1616
# Environment variables
1717
.env
18-
.env.local
19-
.env.development.local
20-
.env.test.local
21-
.env.production.local
18+
.env.*
19+
!.env.example
2220

2321
packages/ui/.env
2422

API_DOCUMENTATION.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,44 @@ Common cases:
352352
- `404 Not Found` — unknown agent id.
353353
- `500 Internal Server Error` — unexpected backend issues.
354354

355+
## Query Insights
356+
357+
The Query Insights API exposes raw interaction logs and lightweight analytics for downstream processing.
358+
359+
### `GET /v1/insights/queries`
360+
361+
Fetch paginated user queries. If no date range is provided, returns the most recent queries ordered by creation time.
362+
363+
- `start_date` _(ISO 8601, optional)_ — inclusive lower bound for filtering by creation time.
364+
- `end_date` _(ISO 8601, optional)_ — inclusive upper bound for filtering by creation time.
365+
- `agent_id` _(optional)_ — filter by agent id when provided.
366+
- `query_text` _(optional)_ — filter by text contained in the query (case-insensitive).
367+
- `limit` _(default `100`)_ — maximum rows returned.
368+
- `offset` _(default `0`)_ — pagination offset.
369+
370+
**Response** `200 OK`
371+
372+
```json
373+
{
374+
"items": [
375+
{
376+
"id": "ad0c2b34-04ab-4d0a-9855-47c19f0f2830",
377+
"created_at": "2024-04-01T12:30:45.123456+00:00",
378+
"agent_id": "cairo-coder",
379+
"query": "How do I declare a storage variable in Cairo 1?",
380+
"chat_history": [
381+
{ "role": "user", "content": "What is Cairo?" },
382+
{ "role": "assistant", "content": "Cairo is a programming language..." }
383+
],
384+
"output": "To declare a storage variable in Cairo 1, you use the #[storage] attribute..."
385+
}
386+
],
387+
"total": 1,
388+
"limit": 100,
389+
"offset": 0
390+
}
391+
```
392+
355393
## Versioning & Compatibility
356394

357395
- Current API version: `1.0.0` (see FastAPI metadata).

python/pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ strict_optional = true
142142
testpaths = ["tests"]
143143
pythonpath = ["src"]
144144
asyncio_mode = "auto"
145+
markers = [
146+
"db: marks tests that require a database (run by default, use -m 'not db' to skip)",
147+
]
145148
filterwarnings = [
146149
"ignore::DeprecationWarning",
147150
"ignore::PendingDeprecationWarning",

python/src/cairo_coder/core/rag_pipeline.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def __init__(self, config: RagPipelineConfig):
8282
self._current_processed_query: ProcessedQuery | None = None
8383
self._current_documents: list[Document] = []
8484

85+
@property
86+
def last_retrieved_documents(self) -> list[Document]:
87+
"""Documents retrieved during the most recent pipeline execution."""
88+
return self._current_documents
89+
8590
async def _aprocess_query_and_retrieve_docs(
8691
self,
8792
query: str,
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""
2+
Database utilities for the Cairo Coder server.
3+
4+
This package exposes helpers for initializing the asyncpg connection pool and
5+
provides Pydantic representations used when persisting query insights data.
6+
"""
7+
8+
from .models import UserInteraction
9+
from .repository import (
10+
create_user_interaction,
11+
get_interactions,
12+
)
13+
from .session import close_pool, execute_schema_scripts, get_pool
14+
15+
__all__ = [
16+
"UserInteraction",
17+
"create_user_interaction",
18+
"get_interactions",
19+
"close_pool",
20+
"execute_schema_scripts",
21+
"get_pool",
22+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Pydantic models representing rows stored in the query insights database tables.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import uuid
8+
from datetime import datetime, timezone
9+
from typing import Any, Optional
10+
11+
from pydantic import BaseModel, Field
12+
13+
14+
class UserInteraction(BaseModel):
15+
"""Represents a record in the user_interactions table."""
16+
17+
id: uuid.UUID = Field(default_factory=uuid.uuid4)
18+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
19+
agent_id: str
20+
mcp_mode: bool = False
21+
chat_history: Optional[list[dict[str, Any]]] = None
22+
query: str
23+
generated_answer: Optional[str] = None
24+
retrieved_sources: Optional[list[dict[str, Any]]] = None
25+
llm_usage: Optional[dict[str, Any]] = None
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
"""
2+
Data access helpers for the Query Insights persistence layer.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import json
8+
from datetime import datetime
9+
from typing import Any
10+
11+
import structlog
12+
13+
from cairo_coder.db.models import UserInteraction
14+
from cairo_coder.db.session import get_pool
15+
16+
logger = structlog.get_logger(__name__)
17+
18+
19+
def _serialize_json_field(value: Any) -> str | None:
20+
"""
21+
Serialize a Python object to JSON string for database storage.
22+
23+
Args:
24+
value: Python object to serialize (dict, list, etc.)
25+
26+
Returns:
27+
JSON string or None if value is None/empty
28+
"""
29+
if value is None:
30+
return None
31+
return json.dumps(value)
32+
33+
34+
def _normalize_json_field(value: Any, default: Any = None) -> Any:
35+
"""
36+
Normalize a JSON field from database (may be string or already parsed).
37+
38+
Args:
39+
value: Value from database (string, dict, list, or None)
40+
default: Default value to use if parsing fails or value is None
41+
42+
Returns:
43+
Parsed JSON object or default value
44+
"""
45+
if value is None:
46+
return default
47+
if isinstance(value, str):
48+
try:
49+
return json.loads(value)
50+
except (json.JSONDecodeError, TypeError):
51+
return default
52+
return value
53+
54+
55+
def _normalize_row(row: dict | None, fields_with_defaults: dict[str, Any]) -> dict | None:
56+
"""
57+
Parse stringified JSON fields in a row dictionary and apply defaults for None values.
58+
59+
Args:
60+
row: Dictionary from database row (or None)
61+
fields_with_defaults: Mapping of field names to default values
62+
63+
Returns:
64+
Normalized dictionary with parsed JSON fields, or None if input row is None
65+
"""
66+
if row is None:
67+
return None
68+
69+
d = dict(row)
70+
for field, default_val in fields_with_defaults.items():
71+
d[field] = _normalize_json_field(d.get(field), default_val)
72+
return d
73+
74+
75+
async def create_user_interaction(interaction: UserInteraction) -> None:
76+
"""Persist a user interaction in the database."""
77+
pool = await get_pool()
78+
try:
79+
async with pool.acquire() as connection:
80+
await connection.execute(
81+
"""
82+
INSERT INTO user_interactions (
83+
id,
84+
agent_id,
85+
mcp_mode,
86+
chat_history,
87+
query,
88+
generated_answer,
89+
retrieved_sources,
90+
llm_usage
91+
)
92+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
93+
""",
94+
interaction.id,
95+
interaction.agent_id,
96+
interaction.mcp_mode,
97+
_serialize_json_field(interaction.chat_history),
98+
interaction.query,
99+
interaction.generated_answer,
100+
_serialize_json_field(interaction.retrieved_sources),
101+
_serialize_json_field(interaction.llm_usage),
102+
)
103+
logger.debug("User interaction logged successfully", interaction_id=str(interaction.id))
104+
except Exception as exc: # pragma: no cover - defensive logging
105+
logger.error("Failed to log user interaction", error=str(exc), exc_info=True)
106+
107+
108+
async def get_interactions(
109+
start_date: datetime | None,
110+
end_date: datetime | None,
111+
agent_id: str | None,
112+
limit: int,
113+
offset: int,
114+
query_text: str | None = None,
115+
) -> tuple[list[dict[str, Any]], int]:
116+
"""Fetch paginated interactions matching the supplied filters.
117+
118+
If start_date and end_date are not provided, returns the last N interactions
119+
ordered by created_at DESC.
120+
"""
121+
pool = await get_pool()
122+
async with pool.acquire() as connection:
123+
params: list[Any] = []
124+
filters = []
125+
126+
if start_date is not None:
127+
params.append(start_date)
128+
filters.append(f"created_at >= ${len(params)}")
129+
130+
if end_date is not None:
131+
params.append(end_date)
132+
filters.append(f"created_at <= ${len(params)}")
133+
134+
if agent_id:
135+
params.append(agent_id)
136+
filters.append(f"agent_id = ${len(params)}")
137+
138+
if query_text:
139+
params.append(f"%{query_text}%")
140+
filters.append(f"query ILIKE ${len(params)}")
141+
142+
where_clause = "WHERE " + " AND ".join(filters) if filters else ""
143+
144+
count_query = f"""
145+
SELECT COUNT(*)
146+
FROM user_interactions
147+
{where_clause}
148+
"""
149+
total = await connection.fetchval(count_query, *params)
150+
151+
params.extend([limit, offset])
152+
limit_placeholder = len(params) - 1
153+
offset_placeholder = len(params)
154+
data_query = f"""
155+
SELECT id, created_at, agent_id, query, chat_history, generated_answer
156+
FROM user_interactions
157+
{where_clause}
158+
ORDER BY created_at DESC
159+
LIMIT ${limit_placeholder}
160+
OFFSET ${offset_placeholder}
161+
"""
162+
rows = await connection.fetch(data_query, *params)
163+
164+
# Normalize JSON fields that may be returned as strings by asyncpg
165+
items = [_normalize_row(dict(row), {"chat_history": []}) for row in rows]
166+
return items, int(total)
167+
168+
169+
async def migrate_user_interaction(interaction: UserInteraction) -> tuple[bool, bool]:
170+
"""
171+
Persist a user interaction for migration purposes with upsert behavior.
172+
173+
Uses ON CONFLICT DO UPDATE to override existing entries based on the ID.
174+
This allows re-running migrations to update data if needed.
175+
176+
Args:
177+
interaction: UserInteraction model with pre-set ID from LangSmith
178+
179+
Returns:
180+
Tuple of (was_modified, was_inserted) where:
181+
- was_modified: True if any action was taken (insert or update)
182+
- was_inserted: True if inserted, False if updated
183+
"""
184+
pool = await get_pool()
185+
try:
186+
async with pool.acquire() as connection:
187+
# Single upsert round-trip; infer insert vs update via system column
188+
row = await connection.fetchrow(
189+
"""
190+
INSERT INTO user_interactions (
191+
id,
192+
created_at,
193+
agent_id,
194+
mcp_mode,
195+
chat_history,
196+
query,
197+
generated_answer,
198+
retrieved_sources,
199+
llm_usage
200+
)
201+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
202+
ON CONFLICT (id) DO UPDATE SET
203+
created_at = EXCLUDED.created_at,
204+
agent_id = EXCLUDED.agent_id,
205+
mcp_mode = EXCLUDED.mcp_mode,
206+
chat_history = EXCLUDED.chat_history,
207+
query = EXCLUDED.query,
208+
generated_answer = EXCLUDED.generated_answer,
209+
retrieved_sources = EXCLUDED.retrieved_sources,
210+
llm_usage = EXCLUDED.llm_usage
211+
RETURNING (xmax = 0) AS inserted
212+
""",
213+
interaction.id,
214+
interaction.created_at,
215+
interaction.agent_id,
216+
interaction.mcp_mode,
217+
_serialize_json_field(interaction.chat_history),
218+
interaction.query,
219+
interaction.generated_answer,
220+
_serialize_json_field(interaction.retrieved_sources),
221+
_serialize_json_field(interaction.llm_usage),
222+
)
223+
224+
if row is None:
225+
logger.warning("Unexpected: no result from upsert", interaction_id=str(interaction.id))
226+
return False, False
227+
228+
was_inserted = bool(row["inserted"]) if "inserted" in row else False
229+
if was_inserted:
230+
logger.debug("User interaction inserted", interaction_id=str(interaction.id))
231+
else:
232+
logger.debug("User interaction updated", interaction_id=str(interaction.id))
233+
return True, was_inserted
234+
except Exception as exc: # pragma: no cover - defensive logging
235+
logger.error("Failed to migrate user interaction", error=str(exc), exc_info=True)
236+
raise
237+

0 commit comments

Comments
 (0)