.github/workflows/github-actions.yml

name: Python package

on: [push]

jobs:
  build:

    runs-on: ubuntu-20.04
    strategy:
      matrix:
        python-version: [3.6]

    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
              pip install -r requirements.txt
      - name: Test with pytest
        run: |
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_bert_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --seq_length 64 --data_processor bert
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --output_model_path models/bert_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2
              mv models/bert_model.bin-10 models/bert_model.bin
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --dynamic_masking --seq_length 64 --data_processor mlm
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --output_model_path models/roberta_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2 --data_processor mlm --target mlm
              mv models/roberta_model.bin-10 models/roberta_model.bin
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_bert_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --seq_length 64 --data_processor albert
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/albert/base_config.json --output_model_path models/albert_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2
              mv models/albert_model.bin-10 models/albert_model.bin
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --seq_length 64 --data_processor lm
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/gpt2/config.json --output_model_path models/gpt2_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2
              mv models/gpt2_model.bin-10 models/gpt2_model.bin
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --dynamic_masking --span_masking --seq_length 64 --data_processor mlm
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --output_model_path models/spanbert_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2 --data_processor mlm --target mlm
              mv models/spanbert_model.bin-10 models/spanbert_model.bin
              python preprocess.py --corpus_path corpora/book_review_cls.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --seq_length 64 --data_processor cls
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --output_model_path models/cls_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2 --labels_num 2 --data_processor cls --target cls
              mv models/cls_model.bin-10 models/cls_model.bin
              python preprocess.py --corpus_path corpora/CLUECorpusSmall_bert_sampled.txt --vocab_path models/google_zh_vocab.txt --dataset_path dataset.pt --processes_num 8 --seq_length 128 --tgt_seq_length 128 --dup_factor 1 --sentence_selection_strategy random --data_processor gsg
              python pretrain.py --dataset_path dataset.pt --vocab_path models/google_zh_vocab.txt --config_path models/pegasus/base_config.json --output_model_path models/pegasus_model.bin --total_steps 10 --save_checkpoint_steps 10 --report_steps 2 --batch_size 2
              mv models/pegasus_model.bin-10 models/pegasus_model.bin
              python finetune/run_classifier.py --pretrained_model_path models/bert_model.bin --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --output_model_path models/classifier_model.bin --train_path datasets/test_data/book_review/train.tsv --dev_path datasets/test_data/book_review/dev.tsv --epochs_num 3 --batch_size 2
              python inference/run_classifier_infer.py --load_model_path models/classifier_model.bin --vocab_path models/google_zh_vocab.txt --config_path models/bert/mini_config.json --test_path datasets/test_data/book_review/test_nolabel.tsv --prediction_path datasets/test_data/book_review/prediction.tsv --labels_num 2