Add an example for using open_instruct data #2187
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Main | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
on: | |
pull_request: | |
branches: | |
- main | |
push: | |
branches: | |
- main | |
tags: | |
- 'v*.*.*' | |
env: | |
# Change this to invalidate existing cache. | |
CACHE_PREFIX: v1 | |
PYTHONPATH: ./src/ | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} | |
GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }} | |
jobs: | |
checks: | |
name: ${{ matrix.task.name }} | |
runs-on: [ubuntu-latest] | |
timeout-minutes: 10 | |
strategy: | |
fail-fast: false | |
matrix: | |
python: ['3.10'] | |
task: | |
- name: Lint | |
run: make lint-check | |
- name: Test | |
run: | | |
pytest -v --color=yes --durations=3 -n auto --dist=loadfile \ | |
--ignore-glob='src/test/distributed/checkpoint*' \ | |
src/test/ | |
- name: Test checkpoint | |
run: | | |
pytest -v --color=yes --durations=3 src/test/distributed/checkpoint* | |
- name: Type check | |
run: make type-check | |
- name: Build | |
run: make build | |
- name: Style | |
run: make style-check | |
- name: Docs | |
run: | | |
cd docs && make html SPHINXOPTS="-W --keep-going" | |
include: | |
- python: '3.9' | |
task: | |
name: Lint (min Python) | |
run: make lint-check | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Python environment | |
uses: ./.github/actions/setup-venv | |
with: | |
python-version: ${{ matrix.python }} | |
cache-prefix: ${{ env.CACHE_PREFIX }} | |
- name: Setup Google credentials | |
run: | | |
mkdir -p $HOME/.google | |
printenv GOOGLE_CREDENTIALS > $HOME/.google/credentials.json | |
echo "GOOGLE_APPLICATION_CREDENTIALS=$HOME/.google/credentials.json" >> $GITHUB_ENV | |
- name: Restore mypy cache | |
if: matrix.task.name == 'Type check' | |
uses: actions/cache@v4 | |
with: | |
path: .mypy_cache | |
key: mypy-${{ env.CACHE_PREFIX }}-${{ runner.os }}-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-${{ github.ref }}-${{ github.sha }} | |
restore-keys: | | |
mypy-${{ env.CACHE_PREFIX }}-${{ runner.os }}-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-${{ github.ref }} | |
mypy-${{ env.CACHE_PREFIX }}-${{ runner.os }}-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} | |
- name: ${{ matrix.task.name }} | |
run: | | |
. .venv/bin/activate | |
${{ matrix.task.run }} | |
- name: Upload package distribution files | |
if: matrix.task.name == 'Build' | |
uses: actions/upload-artifact@v4 | |
with: | |
name: package | |
path: dist | |
- name: Clean up | |
if: always() | |
run: | | |
. .venv/bin/activate | |
pip uninstall -y ai2-olmo-core | |
gpu_checks: | |
name: ${{ matrix.task.name }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 15 | |
strategy: | |
fail-fast: false | |
matrix: | |
task: | |
- name: Test (GPU) | |
image: petew/olmo-core-tch270cu128-2025-05-16 | |
gpus: 2 | |
run: | | |
pytest -v --color=yes --durations=3 -m gpu \ | |
--ignore-glob='src/test/distributed/checkpoint*' \ | |
--ignore-glob='src/test/nn/transformer*' \ | |
--ignore-glob='src/test/nn/moe*' \ | |
src/test/ | |
- name: Test checkpoint (GPU) | |
image: petew/olmo-core-tch270cu128-2025-05-16 | |
gpus: 2 | |
run: | | |
pytest -v --color=yes --durations=3 -m gpu \ | |
src/test/distributed/checkpoint* | |
- name: Test transformer (GPU) | |
image: petew/olmo-core-tch270cu128-2025-05-16 | |
gpus: 2 | |
run: | | |
pytest -v --color=yes --durations=3 -m gpu \ | |
src/test/nn/transformer* | |
- name: Test MoE (GPU) | |
image: petew/olmo-core-tch270cu128-2025-05-16 | |
gpus: 2 | |
run: | | |
pytest -v --color=yes --durations=3 -m gpu \ | |
src/test/nn/moe* | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Set env vars | |
run: | | |
echo "BEAKER_WORKSPACE=$(make get-beaker-workspace)" >> $GITHUB_ENV | |
- name: Determine current commit SHA (pull request) | |
if: github.event_name == 'pull_request' | |
run: | | |
echo "COMMIT_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV | |
- name: Determine current commit SHA (push) | |
if: github.event_name != 'pull_request' | |
run: | | |
echo "COMMIT_SHA=$GITHUB_SHA" >> $GITHUB_ENV | |
- uses: allenai/setup-beaker@v2 | |
if: env.BEAKER_TOKEN != '' | |
with: | |
token: ${{ env.BEAKER_TOKEN }} | |
workspace: ${{ env.BEAKER_WORKSPACE }} | |
- name: GPU Tests | |
uses: allenai/[email protected] | |
if: env.BEAKER_TOKEN != '' | |
with: | |
spec: | | |
version: v2 | |
description: OLMo-core ${{ matrix.task.name }} | |
budget: ai2/oe-training | |
tasks: | |
- name: tests | |
image: | |
beaker: ${{ matrix.task.image }} | |
context: | |
priority: normal | |
preemptible: true | |
resources: | |
gpuCount: ${{ matrix.task.gpus }} | |
timeout: 8m | |
hostNetworking: true | |
constraints: | |
cluster: | |
# H100 clusters | |
- ai2/jupiter-cirrascale-2 | |
- ai2/augusta-google-1 | |
- ai2/ceres-cirrascale | |
# A100 clusters | |
- ai2/saturn-cirrascale | |
# - ai2/allennlp-elanding-a100-40g | |
envVars: | |
- name: CUBLAS_WORKSPACE_CONFIG | |
value: ":16:8" | |
- name: TOKENIZERS_PARALLELISM | |
value: "false" | |
- name: AWS_ACCESS_KEY_ID | |
secret: AWS_ACCESS_KEY_ID | |
- name: AWS_SECRET_ACCESS_KEY | |
secret: AWS_SECRET_ACCESS_KEY | |
- name: GOOGLE_APPLICATION_CREDENTIALS | |
value: "/.google/credentials.json" | |
datasets: | |
- mountPath: "/.google/credentials.json" | |
source: | |
secret: GOOGLE_CREDENTIALS | |
command: | |
- "bash" | |
- "-c" | |
- "git clone https://github.com/allenai/OLMo-core.git && cd OLMo-core && git checkout ${{ env.COMMIT_SHA }} && pip install -e .[all] && ${{ matrix.task.run }}" | |
result: | |
path: /unused | |
token: ${{ env.BEAKER_TOKEN }} | |
workspace: ${{ env.BEAKER_WORKSPACE }} | |
release: | |
name: Release | |
runs-on: ubuntu-latest | |
needs: [checks] | |
if: startsWith(github.ref, 'refs/tags/') | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Setup Python environment | |
uses: ./.github/actions/setup-venv | |
with: | |
python-version: '3.10' | |
cache-prefix: ${{ env.CACHE_PREFIX }} | |
- name: Prepare environment | |
run: | | |
echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV | |
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV | |
- name: Download package distribution files | |
uses: actions/download-artifact@v4 | |
with: | |
name: package | |
path: dist | |
- name: Generate release notes | |
run: | | |
. .venv/bin/activate | |
python src/scripts/release/release_notes.py > ${{ github.workspace }}-RELEASE_NOTES.md | |
- name: Publish package to PyPI | |
run: | | |
. .venv/bin/activate | |
twine upload -u __token__ -p '${{ secrets.PYPI_TOKEN }}' dist/* | |
- name: Publish GitHub release | |
uses: softprops/action-gh-release@v2 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
body_path: ${{ github.workspace }}-RELEASE_NOTES.md | |
prerelease: ${{ contains(env.TAG, 'rc') }} | |
files: | | |
dist/* | |
- name: Add PR comments on release | |
env: | |
GH_TOKEN: ${{ github.token }} | |
run: | | |
./src/scripts/release/add_pr_comments_on_release.sh | |
- name: Send Slack notifications about release | |
run: | | |
. .venv/bin/activate | |
python ./src/scripts/release/slack_notification.py '${{ secrets.SLACK_WEBHOOK_URL}}' |