Skip to content

不支持bf16报错 #4036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
jfy1016 opened this issue Apr 29, 2025 · 1 comment
Open

不支持bf16报错 #4036

jfy1016 opened this issue Apr 29, 2025 · 1 comment

Comments

@jfy1016
Copy link

jfy1016 commented Apr 29, 2025

运行脚本就是官方给出的示例脚本
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

from swift.llm import (
get_model_tokenizer, load_dataset, get_template, EncodePreprocessor, get_model_arch,
get_multimodal_target_regex, LazyLLMDataset
)
from swift.utils import get_logger, get_model_parameter_info, plot_images, seed_everything
from swift.tuners import Swift, LoraConfig
from swift.trainers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from functools import partial

logger = get_logger()
seed_everything(42)

Hyperparameters for training

model

model_id_or_path = '/home/jdn/.cache/modelscope/hub/models/deepseek-ai/deepseek-vl2-tiny'
system = None # Using the default system defined in the template.
output_dir = '/home/jdn/deepseek/output'

dataset

dataset = '/home/jdn/train_CT_and_Xray_last_500.json' # dataset_id or dataset_path. Sampling 20000 data points
data_seed = 42
max_length = 2048
split_dataset_ratio = 0.01 # Split validation set
num_proc = 4 # The number of processes for data loading.

lora

lora_rank = 8
lora_alpha = 32
freeze_llm = False
freeze_vit = True
freeze_aligner = True

training_args

training_args = Seq2SeqTrainingArguments(
output_dir=output_dir,

learning_rate=1e-4,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
gradient_checkpointing=True,
weight_decay=0.1,
lr_scheduler_type='cosine',
warmup_ratio=0.05,
report_to=['tensorboard'],
logging_first_step=True,
save_strategy='steps',
save_steps=50,
eval_strategy='steps',
eval_steps=50,
gradient_accumulation_steps=16,
# To observe the training results more quickly, this is set to 1 here. 
# Under normal circumstances, a larger number should be used.
num_train_epochs=1,
metric_for_best_model='loss',
save_total_limit=5,
logging_steps=5,
dataloader_num_workers=4,
data_seed=data_seed,
remove_unused_columns=False,

)

output_dir = os.path.abspath(os.path.expanduser(output_dir))
logger.info(f'output_dir: {output_dir}')

Obtain the model and template

model, processor = get_model_tokenizer(model_id_or_path)
#model.half()#jdn修改
logger.info(f'model_info: {model.model_info}')
template = get_template(model.model_meta.template, processor, default_system=system, max_length=max_length)
template.set_mode('train')
if template.use_model:
template.model = model

Get target_modules and add trainable LoRA modules to the model.

target_modules = get_multimodal_target_regex(model, freeze_llm=freeze_llm, freeze_vit=freeze_vit,
freeze_aligner=freeze_aligner)
lora_config = LoraConfig(task_type='CAUSAL_LM', r=lora_rank, lora_alpha=lora_alpha,
target_modules=target_modules)
model = Swift.prepare_model(model, lora_config)
logger.info(f'lora_config: {lora_config}')

Print model structure and trainable parameters.

logger.info(f'model: {model}')
model_parameter_info = get_model_parameter_info(model)
logger.info(f'model_parameter_info: {model_parameter_info}')

Download and load the dataset, split it into a training set and a validation set,

and encode the text data into tokens.

train_dataset, val_dataset = load_dataset(dataset, split_dataset_ratio=split_dataset_ratio, num_proc=num_proc,
seed=data_seed)

logger.info(f'train_dataset: {train_dataset}')
logger.info(f'val_dataset: {val_dataset}')
logger.info(f'train_dataset[0]: {train_dataset[0]}')

train_dataset = LazyLLMDataset(train_dataset, template.encode, random_state=data_seed)
val_dataset = LazyLLMDataset(val_dataset, template.encode, random_state=data_seed)
data = train_dataset[0]
logger.info(f'encoded_train_dataset[0]: {data}')

template.print_inputs(data)

Get the trainer and start the training.

model.enable_input_require_grads() # Compatible with gradient checkpointing
trainer = Seq2SeqTrainer(

model=model,
args=training_args,
data_collator=template.data_collator,
train_dataset=train_dataset,
eval_dataset=val_dataset,
template=template,

)
trainer.train()

last_model_checkpoint = trainer.state.last_model_checkpoint
logger.info(f'last_model_checkpoint: {last_model_checkpoint}')

Visualize the training loss.

You can also use the TensorBoard visualization interface during training by entering

tensorboard --logdir '{output_dir}/runs' at the command line.

images_dir = os.path.join(output_dir, 'images')
logger.info(f'images_dir: {images_dir}')
plot_images(images_dir, training_args.logging_dir, ['train/loss'], 0.9) # save images

Read and display the image.

The light yellow line represents the actual loss value,

while the yellow line represents the loss value smoothed with a smoothing factor of 0.9.

from IPython.display import display
from PIL import Image
image = Image.open(os.path.join(images_dir, 'train_loss.png'))
display(image)

报错为

Image请问如何修改代码能讲bf16 切换为float16

@Jintao-Huang
Copy link
Collaborator

设备不支持的问题,建议买新设备

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants