Skip to content

feat: add starklings evaluation report #6

feat: add starklings evaluation report

feat: add starklings evaluation report #6

Workflow file for this run

name: Starklings Benchmark
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
jobs:
starklings-benchmark:
name: Starklings Benchmark
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install pnpm
uses: pnpm/action-setup@v3
with:
version: 9
- name: Install dependencies
run: pnpm install
- name: Build Cairo Coder
run: pnpm build
- name: Setup PostgreSQL
uses: harmon758/postgresql-action@v1
with:
postgresql version: '15'
postgresql db: 'cairo_coder_test'
postgresql user: 'test_user'
postgresql password: 'test_password'
- name: Install PostgreSQL client and pgvector
run: |
sudo apt-get update
sudo apt-get install -y postgresql-client-15
sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS vector;"
- name: Setup test configuration
run: |
mkdir -p packages/agents
cat > packages/agents/config.toml << 'EOL'
[API_KEYS]
OPENAI = "${{ secrets.OPENAI_API_KEY }}"
ANTHROPIC = "${{ secrets.ANTHROPIC_API_KEY }}"
GEMINI = "${{ secrets.GEMINI_API_KEY }}"
[VECTOR_DB]
POSTGRES_USER = "test_user"
POSTGRES_HOST = "localhost"
POSTGRES_DB = "cairo_coder_test"
POSTGRES_PASSWORD = "test_password"
POSTGRES_PORT = "5432"
[GENERAL]
PORT = 3001
SIMILARITY_MEASURE = "cosine"
[PROVIDERS]
DEFAULT_CHAT_PROVIDER = "gemini"
DEFAULT_CHAT_MODEL = "Gemini Flash 2.5"
DEFAULT_FAST_CHAT_PROVIDER = "gemini"
DEFAULT_FAST_CHAT_MODEL = "Gemini Flash 2.5"
DEFAULT_EMBEDDING_PROVIDER = "openai"
DEFAULT_EMBEDDING_MODEL = "Text embedding 3 large"
[VERSIONS]
STARKNET_FOUNDRY = "0.37.0"
SCARB = "2.9.2"
EOL
- name: Create env file
run: |
cat > .env << 'EOL'
POSTGRES_USER=test_user
POSTGRES_HOST=localhost
POSTGRES_DB=cairo_coder_test
POSTGRES_PASSWORD=test_password
POSTGRES_PORT=5432
EOL
- name: Clone Starklings
run: |
if [ ! -d "starklings" ]; then
git clone https://github.com/starknet-edu/starklings.git
fi
- name: Install Scarb
run: |
curl --proto '=https' --tlsv1.2 -sSf https://docs.swmansion.com/scarb/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Start Cairo Coder (background)
run: |
pnpm start &
# Attendre que le serveur démarre
for i in {1..30}; do
if curl -s http://localhost:3001/ > /dev/null; then
echo "Server is ready"
break
fi
echo "Waiting for server... ($i/30)"
sleep 2
done
# Vérifier si le serveur est vraiment prêt
if ! curl -s http://localhost:3001/ > /dev/null; then
echo "Server failed to start"
exit 1
fi
- name: Run Starklings Evaluation
run: node .github/scripts/starklings-evaluate.js
timeout-minutes: 30
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: starklings-results
path: |
starklings/
*.log