You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
运行脚本就是官方给出的示例脚本
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
from swift.llm import (
get_model_tokenizer, load_dataset, get_template, EncodePreprocessor, get_model_arch,
get_multimodal_target_regex, LazyLLMDataset
)
from swift.utils import get_logger, get_model_parameter_info, plot_images, seed_everything
from swift.tuners import Swift, LoraConfig
from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from functools import partial
logger = get_logger()
seed_everything(42)
Hyperparameters for training
model
model_id_or_path = '/home/jdn/.cache/modelscope/hub/models/deepseek-ai/deepseek-vl2-tiny'
system = None # Using the default system defined in the template.
output_dir = '/home/jdn/deepseek/output'
dataset
dataset = '/home/jdn/train_CT_and_Xray_last_500.json' # dataset_id or dataset_path. Sampling 20000 data points
data_seed = 42
max_length = 2048
split_dataset_ratio = 0.01 # Split validation set
num_proc = 4 # The number of processes for data loading.
learning_rate=1e-4,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
gradient_checkpointing=True,
weight_decay=0.1,
lr_scheduler_type='cosine',
warmup_ratio=0.05,
report_to=['tensorboard'],
logging_first_step=True,
save_strategy='steps',
save_steps=50,
eval_strategy='steps',
eval_steps=50,
gradient_accumulation_steps=16,
# To observe the training results more quickly, this is set to 1 here.
# Under normal circumstances, a larger number should be used.
num_train_epochs=1,
metric_for_best_model='loss',
save_total_limit=5,
logging_steps=5,
dataloader_num_workers=4,
data_seed=data_seed,
remove_unused_columns=False,
运行脚本就是官方给出的示例脚本
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
from swift.llm import (
get_model_tokenizer, load_dataset, get_template, EncodePreprocessor, get_model_arch,
get_multimodal_target_regex, LazyLLMDataset
)
from swift.utils import get_logger, get_model_parameter_info, plot_images, seed_everything
from swift.tuners import Swift, LoraConfig
from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from functools import partial
logger = get_logger()
seed_everything(42)
Hyperparameters for training
model
model_id_or_path = '/home/jdn/.cache/modelscope/hub/models/deepseek-ai/deepseek-vl2-tiny'
system = None # Using the default system defined in the template.
output_dir = '/home/jdn/deepseek/output'
dataset
dataset = '/home/jdn/train_CT_and_Xray_last_500.json' # dataset_id or dataset_path. Sampling 20000 data points
data_seed = 42
max_length = 2048
split_dataset_ratio = 0.01 # Split validation set
num_proc = 4 # The number of processes for data loading.
lora
lora_rank = 8
lora_alpha = 32
freeze_llm = False
freeze_vit = True
freeze_aligner = True
training_args
training_args = Seq2SeqTrainingArguments(
output_dir=output_dir,
)
output_dir = os.path.abspath(os.path.expanduser(output_dir))
logger.info(f'output_dir: {output_dir}')
Obtain the model and template
model, processor = get_model_tokenizer(model_id_or_path)
#model.half()#jdn修改
logger.info(f'model_info: {model.model_info}')
template = get_template(model.model_meta.template, processor, default_system=system, max_length=max_length)
template.set_mode('train')
if template.use_model:
template.model = model
Get target_modules and add trainable LoRA modules to the model.
target_modules = get_multimodal_target_regex(model, freeze_llm=freeze_llm, freeze_vit=freeze_vit,
freeze_aligner=freeze_aligner)
lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,
target_modules=target_modules)
model = Swift.prepare_model(model, lora_config)
logger.info(f'lora_config: {lora_config}')
Print model structure and trainable parameters.
logger.info(f'model: {model}')
model_parameter_info = get_model_parameter_info(model)
logger.info(f'model_parameter_info: {model_parameter_info}')
Download and load the dataset, split it into a training set and a validation set,
and encode the text data into tokens.
train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,
seed=data_seed)
logger.info(f'train_dataset: {train_dataset}')
logger.info(f'val_dataset: {val_dataset}')
logger.info(f'train_dataset[0]: {train_dataset[0]}')
train_dataset = LazyLLMDataset(train_dataset, template.encode, random_state=data_seed)
val_dataset = LazyLLMDataset(val_dataset, template.encode, random_state=data_seed)
data = train_dataset[0]
logger.info(f'encoded_train_dataset[0]: {data}')
template.print_inputs(data)
Get the trainer and start the training.
model.enable_input_require_grads() # Compatible with gradient checkpointing
trainer = Seq2SeqTrainer(
)
trainer.train()
last_model_checkpoint = trainer.state.last_model_checkpoint
logger.info(f'last_model_checkpoint: {last_model_checkpoint}')
Visualize the training loss.
You can also use the TensorBoard visualization interface during training by entering
tensorboard --logdir '{output_dir}/runs'
at the command line.images_dir = os.path.join(output_dir, 'images')
logger.info(f'images_dir: {images_dir}')
plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9) # save images
Read and display the image.
The light yellow line represents the actual loss value,
while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.
from IPython.display import display
from PIL import Image
image = Image.open(os.path.join(images_dir, 'train_loss.png'))
display(image)
报错为
The text was updated successfully, but these errors were encountered: