From aa64610a16261c5e11c116f0c81aea461fcfe5b9 Mon Sep 17 00:00:00 2001 From: Khalid Ghiboub Date: Sun, 17 Dec 2023 17:08:36 +0400 Subject: [PATCH] Adding new cmds --- start.py | 131 +++++++++++++++++++++++++++++++++++++------------------ start.sh | 4 +- 2 files changed, 90 insertions(+), 45 deletions(-) diff --git a/start.py b/start.py index a8c4bf72ea..16efdf4ac3 100644 --- a/start.py +++ b/start.py @@ -1,12 +1,8 @@ import os import argparse -from typing import Optional -import torch import transformers -from peft import AutoPeftModelForCausalLM import logging -from transformers import AutoTokenizer from huggingface_hub import login @@ -17,7 +13,12 @@ def parse_arge(): parser.add_argument( "--model_id", type=str, - help="Model id to use for training.", + help="Huggingface Model id or s3 path.", + ) + parser.add_argument( + "--peft_model_id", + type=str, + help="peft model huggingface id or s3 path", ) parser.add_argument( "--hf_token", @@ -54,20 +55,60 @@ def parse_arge(): return args -if __name__ == "__main__": +def run_vllm(model_id_or_path, tasks): + model_args = { + "pretrained": model_id_or_path, # required: taken from UI, no default value + "tensor_parallel_size": 8, + "dtype": 'auto', + "gpu_memory_utilization": 0.90, + "trust_remote_code": True, + "quantization": "awq" + } + model_args_str = make_model_args_str(model_args) + cmd = f"lm_eval --model=vllm \ + --model_args={model_args_str} \ + --tasks={tasks} \ + --batch_size=auto \ + --output_path=/opt/ml/model/" + print(f"Running command: {cmd}") + return os.system(cmd) + + +def run_hf(model_id_path, peft_model_id_or_path, tasks): + model_args = { + "pretrained": model_id_path, # required: taken from UI, no default value + "peft": peft_model_id_or_path, + "parallelize": True, + "trust_remote_code": True + } + model_args_str = make_model_args_str(model_args) + cmd = f"lm_eval --model hf \ + --model_args {model_args_str} \ + --tasks {tasks} \ + --batch_size=auto \ + --output_path=/opt/ml/model/" + print(f"Running command: {cmd}") + return os.system(cmd) + + +def make_model_args_str(model_args): + model_args_str = ",".join([f"{k}={v}" for k, v in model_args.items()]) + return model_args_str + + +def main(): logger = logging.getLogger(__name__) logger.setLevel(transformers.logging.INFO) transformers.logging.set_verbosity(transformers.logging.INFO) transformers.logging.enable_default_handler() transformers.logging.enable_explicit_format() - # parse arguments script_args = parse_arge() if script_args.hf_token: print(f"Logging into the Hugging Face Hub with token {script_args.hf_token[:10]}...") login(token=script_args.hf_token) - model_id = script_args.model_id + peft_model_id = script_args.peft_model_id # if is an s3 path, download the model to /tmp/model using s5cmd if model_id.startswith("s3://"): # add /* to model id but make sure it doesn't already have / at the end @@ -75,40 +116,44 @@ def parse_arge(): model_id += "/" os.system(f"s5cmd sync {model_id}* /tmp/model") model_id = "/tmp/model" - if script_args.is_lora: - # merge the model - model = AutoPeftModelForCausalLM.from_pretrained( - model_id, - low_cpu_mem_usage=True, - torch_dtype=torch.float16, - use_auth_token=True, - ) - model = model.merge_and_unload() - merged_model_path = "/tmp/merged_model" - model.save_pretrained(merged_model_path, safe_serialization=True, max_shard_size="10GB") - # tokenizer = AutoTokenizer.from_pretrained(model_id) - # tokenizer.save_pretrained(merged_model_path) - if script_args.repository_id is not None and len(script_args.repository_id) > 0: - print("uploading to hub") - from huggingface_hub import HfApi - - api = HfApi() - future = api.upload_folder(folder_path=merged_model_path, repo_id=script_args.repository_id, - repo_type="model", run_as_future=True) - future.add_done_callback(lambda p: print(f"Uploaded to {p.result()}")) - model_id = merged_model_path - - model_args = { - "pretrained": model_id, # required: taken from UI, no default value - "tensor_parallel_size": 8, - "dtype": 'auto', - "gpu_memory_utilization": 0.90, - "trust_remote_code": True - } + if peft_model_id is not None and peft_model_id.startswith("s3://"): + # add /* to model id but make sure it doesn't already have / at the end + if peft_model_id[-1] != "/": + peft_model_id += "/" + os.system(f"s5cmd sync {peft_model_id}* /tmp/peft_model") + peft_model_id = "/tmp/peft_model" + # if script_args.is_lora: + # # merge the model + # model = AutoPeftModelForCausalLM.from_pretrained( + # model_id, + # low_cpu_mem_usage=True, + # torch_dtype=torch.float16, + # use_auth_token=True, + # ) + # model = model.merge_and_unload() + # merged_model_path = "/tmp/merged_model" + # model.save_pretrained(merged_model_path, safe_serialization=True, max_shard_size="10GB") + # # tokenizer = AutoTokenizer.from_pretrained(model_id) + # # tokenizer.save_pretrained(merged_model_path) + # if script_args.repository_id is not None and len(script_args.repository_id) > 0: + # print("uploading to hub") + # from huggingface_hub import HfApi + # + # api = HfApi() + # future = api.upload_folder(folder_path=merged_model_path, repo_id=script_args.repository_id, + # repo_type="model", run_as_future=True) + # future.add_done_callback(lambda p: print(f"Uploaded to {p.result()}")) + # model_id = merged_model_path + if peft_model_id is not None and len(peft_model_id) > 0: + code = run_hf(model_id, peft_model_id, script_args.tasks) + else: + code = run_vllm(model_id_or_path=model_id, tasks=script_args.tasks) - model_args_str = ",".join([f"{k}={v}" for k, v in model_args.items()]) - cmd = f"lm_eval --model=vllm --model_args={model_args_str} --tasks={script_args.tasks} --batch_size=auto --output_path=/opt/ml/model/" - print(f"Running command: {cmd}") - code = os.system(cmd) if code != 0: - raise Exception("lm_eval failed") + raise Exception("Evaluation job has failed") + + +if __name__ == "__main__": + main() + + diff --git a/start.sh b/start.sh index b8e109cca7..e78c5884bd 100644 --- a/start.sh +++ b/start.sh @@ -1,7 +1,7 @@ export HUGGINGFACE_HUB_CACHE=/tmp/.cache export HF_HUB_ENABLE_HF_TRANSFER=1 +export NUMEXPR_MAX_THREADS=96 pip install -e . pip install -e ".[vllm]" echo "$@" -python start.py "$@" -# lm_eval --model=vllm --model_args=pretrained=epfl-llm/meditron-7b,tensor_parallel_size=8,dtype=auto,gpu_memory_utilization=0.9,trust_remote_code=True,download_dir=/home/ec2-user/SageMaker/huggingface --tasks=mmlu_clinical_knowledge,mmlu_college_biology,mmlu_college_medicine,mmlu_medical_genetics --batch_size=auto \ No newline at end of file +python start.py "$@" \ No newline at end of file