feat: add starklings evaluation report #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Starklings Benchmark | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| jobs: | |
| starklings-benchmark: | |
| name: Starklings Benchmark | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| - name: Setup Rust | |
| uses: actions-rs/toolchain@v1 | |
| with: | |
| toolchain: stable | |
| override: true | |
| - name: Install pnpm | |
| uses: pnpm/action-setup@v3 | |
| with: | |
| version: 9 | |
| - name: Install dependencies | |
| run: pnpm install | |
| - name: Build Cairo Coder | |
| run: pnpm build | |
| - name: Setup PostgreSQL | |
| uses: harmon758/postgresql-action@v1 | |
| with: | |
| postgresql version: '15' | |
| postgresql db: 'cairo_coder_test' | |
| postgresql user: 'test_user' | |
| postgresql password: 'test_password' | |
| - name: Install PostgreSQL client and pgvector | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y postgresql-client-15 | |
| sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS vector;" | |
| - name: Setup test configuration | |
| run: | | |
| mkdir -p packages/agents | |
| cat > packages/agents/config.toml << 'EOL' | |
| [API_KEYS] | |
| OPENAI = "${{ secrets.OPENAI_API_KEY }}" | |
| ANTHROPIC = "${{ secrets.ANTHROPIC_API_KEY }}" | |
| GEMINI = "${{ secrets.GEMINI_API_KEY }}" | |
| [VECTOR_DB] | |
| POSTGRES_USER = "test_user" | |
| POSTGRES_HOST = "localhost" | |
| POSTGRES_DB = "cairo_coder_test" | |
| POSTGRES_PASSWORD = "test_password" | |
| POSTGRES_PORT = "5432" | |
| [GENERAL] | |
| PORT = 3001 | |
| SIMILARITY_MEASURE = "cosine" | |
| [PROVIDERS] | |
| DEFAULT_CHAT_PROVIDER = "gemini" | |
| DEFAULT_CHAT_MODEL = "Gemini Flash 2.5" | |
| DEFAULT_FAST_CHAT_PROVIDER = "gemini" | |
| DEFAULT_FAST_CHAT_MODEL = "Gemini Flash 2.5" | |
| DEFAULT_EMBEDDING_PROVIDER = "openai" | |
| DEFAULT_EMBEDDING_MODEL = "Text embedding 3 large" | |
| [VERSIONS] | |
| STARKNET_FOUNDRY = "0.37.0" | |
| SCARB = "2.9.2" | |
| EOL | |
| - name: Create env file | |
| run: | | |
| cat > .env << 'EOL' | |
| POSTGRES_USER=test_user | |
| POSTGRES_HOST=localhost | |
| POSTGRES_DB=cairo_coder_test | |
| POSTGRES_PASSWORD=test_password | |
| POSTGRES_PORT=5432 | |
| EOL | |
| - name: Clone Starklings | |
| run: | | |
| if [ ! -d "starklings" ]; then | |
| git clone https://github.com/starknet-edu/starklings.git | |
| fi | |
| - name: Install Scarb | |
| run: | | |
| curl --proto '=https' --tlsv1.2 -sSf https://docs.swmansion.com/scarb/install.sh | sh | |
| echo "$HOME/.local/bin" >> $GITHUB_PATH | |
| - name: Start Cairo Coder (background) | |
| run: | | |
| pnpm start & | |
| # Attendre que le serveur démarre | |
| for i in {1..30}; do | |
| if curl -s http://localhost:3001/ > /dev/null; then | |
| echo "Server is ready" | |
| break | |
| fi | |
| echo "Waiting for server... ($i/30)" | |
| sleep 2 | |
| done | |
| # Vérifier si le serveur est vraiment prêt | |
| if ! curl -s http://localhost:3001/ > /dev/null; then | |
| echo "Server failed to start" | |
| exit 1 | |
| fi | |
| - name: Run Starklings Evaluation | |
| run: node .github/scripts/starklings-evaluate.js | |
| timeout-minutes: 30 | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: starklings-results | |
| path: | | |
| starklings/ | |
| *.log |