Skip to content

Nfs server module and storage/nfs server with Slurm integration #252

Nfs server module and storage/nfs server with Slurm integration

Nfs server module and storage/nfs server with Slurm integration #252

Workflow file for this run

name: 'Terraform'
on:
pull_request:
paths-ignore:
- 'soperator/**'
- '.github/workflows/soperator.yml'
# schedule:
# - cron: '30 * * * *'
permissions:
contents: write
concurrency:
group: project-e00pjzzrtk1fs3yavy
env:
TF_VAR_parent_id: project-e00pjzzrtk1fs3yavy
jobs:
terraform:
name: 'Terraform ${{ matrix.solution.name }}'
environment:
name: project-e00pjzzrtk1fs3yavy
runs-on: ubuntu-latest
strategy:
max-parallel: 2
matrix:
solution:
- name: k8s-inference
- name: k8s-training
- name: slurm
- name: wireguard
defaults:
run:
shell: bash
working-directory: ${{ matrix.solution.name }}
env:
TF_VAR_subnet_id: vpcsubnet-e00dgdntmhgkeej1z3
TF_VAR_loki_access_key_id: ${{ secrets.SA_ACCESS_KEY_ID }}
TF_VAR_loki_secret_key: ${{ secrets.SA_SECRET_KEY }}
steps:
# Checkout the repository to the GitHub Actions runner
- name: Checkout
uses: actions/checkout@v4
- name: Setup SSH keys and agent
run: |
ssh-keygen -t rsa -f ~/.ssh/id_rsa -N ''
eval $(ssh-agent)
echo "SSH_AUTH_SOCK=$SSH_AUTH_SOCK" >> $GITHUB_ENV
echo "SSH_AGENT_PID=$SSH_AGENT_PID" >> $GITHUB_ENV
ssh-add
- name: Setup s3cmd
run: |
pip3 install s3cmd --no-cache --quiet
s3cmd --dump-config \
--access_key="${{ secrets.SA_ACCESS_KEY_ID }}" \
--secret_key="${{ secrets.SA_SECRET_KEY }}" \
--host="storage.eu-north1.nebius.cloud:443" \
--host-bucket="%(bucket)s.storage.eu-north1.nebius.cloud" \
> ~/.s3cfg
mkdir -p tests/reports
- name: Install XMLStarlet
run: sudo apt install -y xmlstarlet
- name: Install Nebius CLI
run: |
curl -sSL https://storage.ai.nebius.cloud/nebius/install.sh | bash
echo "${HOME}/.nebius/bin" >> $GITHUB_PATH
- name: Nebius CLI init
run: |
cat << EOF > /tmp/sa.pem
${{ secrets.TERRAFORM_SA_PRIVATE_KEY }}
EOF
nebius profile create ${{ vars.TERRAFORM_SA_ID }} --skip-auth \
--endpoint api.eu-north1.nebius.cloud \
--service-account-id ${{ vars.TERRAFORM_SA_ID }} \
--public-key-id ${{ secrets.TERRAFORM_SA_PUBLIC_KEY_ID }} \
--parent-id ${{ env.TF_VAR_parent_id }} \
--private-key-file /tmp/sa.pem
export NEBIUS_IAM_TOKEN=$(nebius iam get-access-token)
echo "::add-mask::$NEBIUS_IAM_TOKEN"
export TF_VAR_iam_token=$NEBIUS_IAM_TOKEN
echo "NEBIUS_IAM_TOKEN=$NEBIUS_IAM_TOKEN" >> $GITHUB_ENV
echo "TF_VAR_iam_token=$TF_VAR_iam_token" >> $GITHUB_ENV
# Install the latest version of Terraform CLI and configure the Terraform CLI configuration file with a Terraform Cloud user API token
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_version: "v1.10.0-alpha20240911"
# Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc.
- name: Terraform Init
run: terraform init
# Checks that all Terraform configuration files adhere to a canonical format
- name: Terraform Format
run: terraform fmt -check -recursive . ../modules
# Validate terraform code
- name: Terraform Validate
run: terraform validate
# Generates an execution plan for Terraform
- name: Terraform Plan
run: terraform plan -input=false
# Run Terraform Tests
- name: Terraform Test
run: terraform test -junit-xml=tests/reports/TEST-result-${{ github.run_id }}.xml
- name: Set date in report
run: |
xmlstarlet ed \
--inplace \
-i '/testsuites' -t attr -n timestamp -v $(date --iso-8601=seconds) \
-i '/testsuites/testsuite[*]' -t attr -n timestamp -v $(date --iso-8601=seconds) \
-i '/testsuites/testsuite[*]/testcase' -t attr -n timestamp -v $(date --iso-8601=seconds) \
tests/reports/TEST-result-${{ github.run_id }}.xml
- name: Upload test results
run: s3cmd sync tests/reports s3://terraform-test-reports/${{ matrix.solution.name }}/
if: always()
- name: Load test report history
uses: actions/checkout@v4
if: always()
continue-on-error: true
with:
ref: gh-pages
path: gh-pages
- name: Build test report
uses: simple-elf/[email protected]
if: always()
with:
gh_pages: gh-pages
keep_reports: 100
subfolder: ${{ matrix.solution.name }}
allure_results: ${{ matrix.solution.name }}/tests/reports
- name: Publish test report
uses: peaceiris/actions-gh-pages@v4
if: always()
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_branch: gh-pages
publish_dir: allure-history
keep_files: true
cleanup-infra:
name: 'Cleanup Infra'
environment:
name: project-e00pjzzrtk1fs3yavy
runs-on: ubuntu-latest
needs: terraform
if: always() && needs.terraform.result != 'success'
steps:
- name: Status of the tf tests
run: echo TF Matrix Job result is ${{ needs.terraform.result }}
- name: Install Nebius CLI
run: |
curl -sSL https://storage.ai.nebius.cloud/nebius/install.sh | bash
echo "${HOME}/.nebius/bin" >> $GITHUB_PATH
- name: Nebius CLI init
run: |
cat << EOF > /tmp/sa.pem
${{ secrets.TERRAFORM_SA_PRIVATE_KEY }}
EOF
nebius profile create ${{ vars.TERRAFORM_SA_ID }} --skip-auth \
--endpoint api.eu-north1.nebius.cloud \
--service-account-id ${{ vars.TERRAFORM_SA_ID }} \
--public-key-id ${{ secrets.TERRAFORM_SA_PUBLIC_KEY_ID }} \
--parent-id ${{ env.TF_VAR_parent_id }} \
--private-key-file /tmp/sa.pem
- name: Perform forced cleanup
run: |
for resource in \
"mk8s v1 cluster" \
"compute v1 instance" \
"compute v1 filesystem" \
"compute v1 disk" \
"compute v1 gpu-cluster" \
"vpc v1alpha1 allocation" \
"storage bucket" \
; do
echo Deleting all $resource
eval nebius --format json $resource list \
| jq -r 'try .items[] | .metadata.id' \
| eval xargs -r -n 1 nebius $resource delete --id
done