Skip to content

Generate Embeddings #32

Generate Embeddings

Generate Embeddings #32

name: Generate Embeddings
on:
schedule:
# Run once per week on Sunday at 00:00 UTC
- cron: "0 0 * * 0"
# Allow manual triggering of the workflow
workflow_dispatch:
permissions:
contents: read
jobs:
generate-embeddings:
name: Generate Embeddings
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Cache Bun dependencies
uses: actions/cache@v4
with:
path: ~/.bun/install/cache
key: ${{ runner.os }}-bun-${{ hashFiles('ingesters/bun.lock') }}
restore-keys: |
${{ runner.os }}-bun-
- name: Install ingester dependencies
run: bun install
working-directory: ingesters
- name: Install Antora globally
run: bun add -g @antora/cli @antora/site-generator
# Required to build starknet foundry
- name: Install mdbook
run: |
mkdir bin
curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.42/mdbook-v0.4.42-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=bin
echo "$(pwd)/bin" >> ${GITHUB_PATH}
- name: Install mdbook-i18n-helpers
run: |
cargo install mdbook-i18n-helpers --locked --version 0.3.4
- name: Create env file
run: |
cat > .env << 'EOL'
# Database Configuration
POSTGRES_USER="${{ secrets.POSTGRES_USER }}"
POSTGRES_PASSWORD="${{ secrets.POSTGRES_PASSWORD }}"
POSTGRES_DB="${{ secrets.POSTGRES_DB }}"
POSTGRES_HOST="postgres"
POSTGRES_PORT="${{ secrets.POSTGRES_PORT }}"
POSTGRES_TABLE_NAME="documents"
# General Configuration
PORT="3001"
SIMILARITY_MEASURE="cosine"
# Provider Configuration
DEFAULT_CHAT_PROVIDER="gemini"
DEFAULT_CHAT_MODEL="Gemini Flash 2.5"
DEFAULT_FAST_CHAT_PROVIDER="gemini"
DEFAULT_FAST_CHAT_MODEL="Gemini Flash 2.5"
DEFAULT_EMBEDDING_PROVIDER="openai"
DEFAULT_EMBEDDING_MODEL="Text embedding 3 large"
# API Keys
OPENAI_API_KEY="${{ secrets.OPENAI }}"
ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC }}"
GEMINI_API_KEY="${{ secrets.GEMINI }}"
# Version Configuration
STARKNET_FOUNDRY_VERSION="0.47.0"
SCARB_VERSION="2.11.4"
EOL
- name: Generate embeddings
run: bun run generate-embeddings:yes
working-directory: ingesters