Skip to content

Commit

Permalink
Adding new cmds
Browse files Browse the repository at this point in the history
  • Loading branch information
proserve committed Dec 20, 2023
1 parent 6a659ad commit aa64610
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 45 deletions.
131 changes: 88 additions & 43 deletions start.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import os
import argparse
from typing import Optional

import torch
import transformers
from peft import AutoPeftModelForCausalLM
import logging
from transformers import AutoTokenizer
from huggingface_hub import login


Expand All @@ -17,7 +13,12 @@ def parse_arge():
parser.add_argument(
"--model_id",
type=str,
help="Model id to use for training.",
help="Huggingface Model id or s3 path.",
)
parser.add_argument(
"--peft_model_id",
type=str,
help="peft model huggingface id or s3 path",
)
parser.add_argument(
"--hf_token",
Expand Down Expand Up @@ -54,61 +55,105 @@ def parse_arge():
return args


if __name__ == "__main__":
def run_vllm(model_id_or_path, tasks):
model_args = {
"pretrained": model_id_or_path, # required: taken from UI, no default value
"tensor_parallel_size": 8,
"dtype": 'auto',
"gpu_memory_utilization": 0.90,
"trust_remote_code": True,
"quantization": "awq"
}
model_args_str = make_model_args_str(model_args)
cmd = f"lm_eval --model=vllm \
--model_args={model_args_str} \
--tasks={tasks} \
--batch_size=auto \
--output_path=/opt/ml/model/"
print(f"Running command: {cmd}")
return os.system(cmd)


def run_hf(model_id_path, peft_model_id_or_path, tasks):
model_args = {
"pretrained": model_id_path, # required: taken from UI, no default value
"peft": peft_model_id_or_path,
"parallelize": True,
"trust_remote_code": True
}
model_args_str = make_model_args_str(model_args)
cmd = f"lm_eval --model hf \
--model_args {model_args_str} \
--tasks {tasks} \
--batch_size=auto \
--output_path=/opt/ml/model/"
print(f"Running command: {cmd}")
return os.system(cmd)


def make_model_args_str(model_args):
model_args_str = ",".join([f"{k}={v}" for k, v in model_args.items()])
return model_args_str


def main():
logger = logging.getLogger(__name__)
logger.setLevel(transformers.logging.INFO)
transformers.logging.set_verbosity(transformers.logging.INFO)
transformers.logging.enable_default_handler()
transformers.logging.enable_explicit_format()

# parse arguments
script_args = parse_arge()
if script_args.hf_token:
print(f"Logging into the Hugging Face Hub with token {script_args.hf_token[:10]}...")
login(token=script_args.hf_token)

model_id = script_args.model_id
peft_model_id = script_args.peft_model_id
# if is an s3 path, download the model to /tmp/model using s5cmd
if model_id.startswith("s3://"):
# add /* to model id but make sure it doesn't already have / at the end
if model_id[-1] != "/":
model_id += "/"
os.system(f"s5cmd sync {model_id}* /tmp/model")
model_id = "/tmp/model"
if script_args.is_lora:
# merge the model
model = AutoPeftModelForCausalLM.from_pretrained(
model_id,
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
use_auth_token=True,
)
model = model.merge_and_unload()
merged_model_path = "/tmp/merged_model"
model.save_pretrained(merged_model_path, safe_serialization=True, max_shard_size="10GB")
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.save_pretrained(merged_model_path)
if script_args.repository_id is not None and len(script_args.repository_id) > 0:
print("uploading to hub")
from huggingface_hub import HfApi

api = HfApi()
future = api.upload_folder(folder_path=merged_model_path, repo_id=script_args.repository_id,
repo_type="model", run_as_future=True)
future.add_done_callback(lambda p: print(f"Uploaded to {p.result()}"))
model_id = merged_model_path

model_args = {
"pretrained": model_id, # required: taken from UI, no default value
"tensor_parallel_size": 8,
"dtype": 'auto',
"gpu_memory_utilization": 0.90,
"trust_remote_code": True
}
if peft_model_id is not None and peft_model_id.startswith("s3://"):
# add /* to model id but make sure it doesn't already have / at the end
if peft_model_id[-1] != "/":
peft_model_id += "/"
os.system(f"s5cmd sync {peft_model_id}* /tmp/peft_model")
peft_model_id = "/tmp/peft_model"
# if script_args.is_lora:
# # merge the model
# model = AutoPeftModelForCausalLM.from_pretrained(
# model_id,
# low_cpu_mem_usage=True,
# torch_dtype=torch.float16,
# use_auth_token=True,
# )
# model = model.merge_and_unload()
# merged_model_path = "/tmp/merged_model"
# model.save_pretrained(merged_model_path, safe_serialization=True, max_shard_size="10GB")
# # tokenizer = AutoTokenizer.from_pretrained(model_id)
# # tokenizer.save_pretrained(merged_model_path)
# if script_args.repository_id is not None and len(script_args.repository_id) > 0:
# print("uploading to hub")
# from huggingface_hub import HfApi
#
# api = HfApi()
# future = api.upload_folder(folder_path=merged_model_path, repo_id=script_args.repository_id,
# repo_type="model", run_as_future=True)
# future.add_done_callback(lambda p: print(f"Uploaded to {p.result()}"))
# model_id = merged_model_path
if peft_model_id is not None and len(peft_model_id) > 0:
code = run_hf(model_id, peft_model_id, script_args.tasks)
else:
code = run_vllm(model_id_or_path=model_id, tasks=script_args.tasks)

model_args_str = ",".join([f"{k}={v}" for k, v in model_args.items()])
cmd = f"lm_eval --model=vllm --model_args={model_args_str} --tasks={script_args.tasks} --batch_size=auto --output_path=/opt/ml/model/"
print(f"Running command: {cmd}")
code = os.system(cmd)
if code != 0:
raise Exception("lm_eval failed")
raise Exception("Evaluation job has failed")


if __name__ == "__main__":
main()


4 changes: 2 additions & 2 deletions start.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export HUGGINGFACE_HUB_CACHE=/tmp/.cache
export HF_HUB_ENABLE_HF_TRANSFER=1
export NUMEXPR_MAX_THREADS=96
pip install -e .
pip install -e ".[vllm]"
echo "$@"
python start.py "$@"
# lm_eval --model=vllm --model_args=pretrained=epfl-llm/meditron-7b,tensor_parallel_size=8,dtype=auto,gpu_memory_utilization=0.9,trust_remote_code=True,download_dir=/home/ec2-user/SageMaker/huggingface --tasks=mmlu_clinical_knowledge,mmlu_college_biology,mmlu_college_medicine,mmlu_medical_genetics --batch_size=auto
python start.py "$@"

0 comments on commit aa64610

Please sign in to comment.