Skip to content

Commit 7794ccf

Browse files
committed
dev: reorganize codebase structure
1 parent 2bf5c43 commit 7794ccf

36 files changed

+8120
-4864
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ packages/**/dist
4747
!.trunk/trunk.yaml
4848
!.trunk/.gitignore
4949

50-
starklings/
5150
debug/
5251

5352
fixtures/runner_crate/target

CLAUDE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ ingesters/
118118

119119
```text
120120
Python Summarizer → Generated Markdown → Ingester → PostgreSQL → RAG Pipeline → Code Generation
121-
(python/) (python/src/scripts/ (ingesters/) (pgvector) (python/)
122-
summarizer/generated/)
121+
(python/) (python/src/cairo_coder_tools/ (ingesters/) (pgvector) (python/)
122+
ingestion/generated/)
123123
```
124124

125125
## Configuration

ingester.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ COPY ingesters ./ingesters
1616

1717

1818
# Copy ingester files generated from python
19-
COPY python/src/scripts/summarizer/generated ./python/src/scripts/summarizer/generated
19+
COPY python/src/cairo_coder_tools/ingestion/generated ./python/src/cairo_coder_tools/ingestion/generated
2020

2121
# Install dependencies
2222
WORKDIR /app/ingesters

ingesters/src/ingesters/CairoBookIngester.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ export class CairoBookIngester extends MarkdownIngester {
4646
async readSummaryFile(): Promise<string> {
4747
const summaryPath = getPythonPath(
4848
'src',
49-
'scripts',
50-
'summarizer',
49+
'cairo_coder_tools',
50+
'ingestion',
5151
'generated',
5252
'cairo_book_summary.md',
5353
);

ingesters/src/ingesters/CoreLibDocsIngester.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ export class CoreLibDocsIngester extends MarkdownIngester {
4646
async readCorelibSummaryFile(): Promise<string> {
4747
const summaryPath = getPythonPath(
4848
'src',
49-
'scripts',
50-
'summarizer',
49+
'cairo_coder_tools',
50+
'ingestion',
5151
'generated',
5252
'corelib_summary.md',
5353
);

ingesters/src/ingesters/StarknetBlogIngester.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ export class StarknetBlogIngester extends MarkdownIngester {
4747
async readSummaryFile(): Promise<string> {
4848
const summaryPath = getPythonPath(
4949
'src',
50-
'scripts',
51-
'summarizer',
50+
'cairo_coder_tools',
51+
'ingestion',
5252
'generated',
5353
'blog_summary.md',
5454
);

python/pyproject.toml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,22 +71,26 @@ dev = [
7171
]
7272

7373
[project.scripts]
74+
# Main server
7475
cairo-coder = "cairo_coder.server.app:main"
7576
cairo-coder-api = "cairo_coder.api.server:run"
77+
78+
# Optimization tools
7679
generate_starklings_dataset = "cairo_coder.optimizers.generation.generate_starklings_dataset:cli_main"
7780
optimize_generation = "cairo_coder.optimizers.generation.optimize_generation:main"
78-
starklings_evaluate = "scripts.starklings_evaluate:main"
79-
cairo-coder-summarize = "scripts.summarizer.cli:app"
80-
docs-crawler = "scripts.docs_crawler:main"
81-
cairo-coder-datasets = "scripts.datasets.cli:app"
81+
82+
# Other scripts
83+
eval = "scripts.eval:main"
84+
ingest = "scripts.ingest:app"
85+
dataset = "scripts.dataset:app"
8286

8387
[project.urls]
8488
"Homepage" = "https://github.com/cairo-coder/cairo-coder"
8589
"Bug Tracker" = "https://github.com/cairo-coder/cairo-coder/issues"
8690

8791
[tool.uv.build-backend]
8892
module-root = "src"
89-
module-name = ["cairo_coder", "scripts"]
93+
module-name = ["cairo_coder", "cairo_coder_tools", "scripts"]
9094

9195
[tool.ruff]
9296
line-length = 100
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Cairo Coder Tools - Utilities for evaluation, ingestion, and dataset management."""
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Dataset utilities for Cairo Coder."""
2+
3+
from .analysis import DatasetAnalyzer, analyze_dataset
4+
5+
__all__ = ["DatasetAnalyzer", "analyze_dataset"]
Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1-
# Quick CLI script to analyze a dataset of question-answer pairs.
1+
"""Dataset analysis module for Cairo Coder.
2+
3+
This module provides tools for analyzing question-answer datasets using LLMs.
4+
"""
25

36
import json
7+
from pathlib import Path
8+
from typing import Any
49

510
import dspy
611
from dspy.adapters.baml_adapter import BAMLAdapter
712

813

914
class DatasetAnalyzer(dspy.Signature):
10-
"""
11-
You are provided a dataset of question-answer pairs.
12-
This dataset is related to the Starknet blockchain and the Cairo programming language, and contains
13-
mostly technical questions about code, infrastructure, and the overall Starknet ecosystem.
14-
Your task is to analyze the dataset and provide valuable insights.
15+
"""Analyze a dataset of question-answer pairs.
16+
17+
This signature is designed for analyzing datasets related to the Starknet
18+
blockchain and Cairo programming language, containing mostly technical
19+
questions about code, infrastructure, and the Starknet ecosystem.
1520
"""
1621

17-
dataset: list[dict] = dspy.InputField(
18-
desc="The dataset of question-answer pairs."
19-
)
22+
dataset: list[dict] = dspy.InputField(desc="The dataset of question-answer pairs.")
2023
languages: list[str] = dspy.OutputField(
2124
desc="The list of all languages users have asked queries with."
2225
)
@@ -30,7 +33,7 @@ class DatasetAnalyzer(dspy.Signature):
3033
- "When im importing stuff from a file in my smart contract, what is the difference between super:: and crate:: ?" -> "Cairo language questions"
3134
- "how to use the `assert!` macro in my smart contract" -> "Cairo language questions"
3235
- "I am writing a function in my smart contract. I need to be sure the caller has enough balance or it reverts. how do I do this?" -> "Starknet smart contracts questions"
33-
- "what does this error mean :\n```\n Account validation failed: \"StarknetError { code: KnownErrorCode(ValidateFailure), message: 'The 'validate' entry point panicked with: nError in contract (contract address: 0x0762c126b2655bc371c1075e2914edd42ba40fc2c485b5e8772f05c7e09fec26, class hash: 0x036078334509b514626504edc9fb252328d1a240e4e948bef8d0c08dff45927f, selector: 0x0289da278a8dc833409cabfdad1581e8e7d40e42dcaed693fa4008dcdb4963b3): n0x617267656e742f696e76616c69642d7369676e61747572652d6c656e677468 ('argent invalid signature length'). n' }```" -> "Debugging errors questions"
36+
- "what does this error mean :\\n```\\n Account validation failed: \\"StarknetError { code: KnownErrorCode(ValidateFailure), message: 'The 'validate' entry point panicked with: nError in contract (contract address: 0x0762c126b2655bc371c1075e2914edd42ba40fc2c485b5e8772f05c7e09fec26, class hash: 0x036078334509b514626504edc9fb252328d1a240e4e948bef8d0c08dff45927f, selector: 0x0289da278a8dc833409cabfdad1581e8e7d40e42dcaed693fa4008dcdb4963b3): n0x617267656e742f696e76616c69642d7369676e61747572652d6c656e677468 ('argent invalid signature length'). n' }\\"```" -> "Debugging errors questions"
3437
- "How to declare and deploy a contract with constructor to sepolia or mainnet using starkli?" -> "Starknet network interactions questions"
3538
"""
3639
)
@@ -44,23 +47,50 @@ class DatasetAnalyzer(dspy.Signature):
4447
"""
4548
)
4649

47-
def main():
48-
dspy.configure(lm=dspy.LM("openrouter/x-ai/grok-4-fast:free", max_tokens=30000, cache=False), adapter=BAMLAdapter())
49-
with open("qa_pairs.json") as f:
50+
class AnalysisResponse:
51+
languages: list[str]
52+
topics: list[tuple[str, int]]
53+
analysis: str
54+
55+
def analyze_dataset(
56+
dataset_path: Path,
57+
output_path: Path,
58+
lm_model: str = "openrouter/x-ai/grok-4-fast:free",
59+
max_tokens: int = 30000,
60+
) -> AnalysisResponse:
61+
"""Analyze a dataset of question-answer pairs.
62+
63+
Args:
64+
dataset_path: Path to the input dataset JSON file
65+
output_path: Path to save the analysis results
66+
lm_model: Language model to use for analysis
67+
max_tokens: Maximum tokens for the LLM response
68+
69+
Returns:
70+
Dictionary containing the analysis results
71+
"""
72+
# Configure DSPy
73+
dspy.configure(lm=dspy.LM(lm_model, max_tokens=max_tokens, cache=False), adapter=BAMLAdapter())
74+
75+
# Load dataset
76+
with open(dataset_path) as f:
5077
dataset = json.load(f)
78+
79+
# Run analysis
5180
analyzer = dspy.ChainOfThought(DatasetAnalyzer)
5281
response = analyzer(dataset=dataset)
53-
response_dict = {
54-
"languages": response.languages,
55-
"topics": response.topics,
56-
"analysis": response.analysis
57-
}
58-
59-
with open("analysis.json", "w") as f:
60-
json.dump(response_dict, f, indent=4)
61-
6282

83+
# Create response dictionary
84+
response = AnalysisResponse(
85+
languages=response.languages,
86+
topics=response.topics,
87+
analysis=response.analysis,
88+
)
6389

90+
# Save results
91+
output_path = Path(output_path)
92+
output_path.parent.mkdir(parents=True, exist_ok=True)
93+
with open(output_path, "w") as f:
94+
json.dump(response.model_dump(), f, indent=4)
6495

65-
if __name__ == "__main__":
66-
main()
96+
return response

0 commit comments

Comments
 (0)