Open
Description
Describe the bug/ 问题描述 (Mandatory / 必填)
A clear and concise description of what the bug is.
使用mindnlp0.4.1加载模型报错
RuntimeError: Unsupported data type!
----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/pybind_api/ir/tensor_py.cc:361 MakeTensorOfNumpy
- Hardware Environment(
Ascend
/GPU
/CPU
) / 硬件环境:
Please delete the backend not involved / 请删除不涉及的后端:
/device ascend/GPU/CPU/kirin/等其他芯片
Ascend: 1*ascend-snt9b1|ARM: 24核 192GB
- Software Environment / 软件环境 (Mandatory / 必填):
-- MindSpore version (e.g., 1.7.0.Bxxx) :
-- Python version (e.g., Python 3.7.5) :
-- OS platform and distribution (e.g., Linux Ubuntu 16.04):
-- GCC/Compiler version (if compiled from source):
Python 3.9.10
mindspore 2.3.1
mindnlp 0.4.1
- Excute Mode / 执行模式 (Mandatory / 必填)(
PyNative
/Graph
):
Please delete the mode not involved / 请删除不涉及的模式:
/mode pynative
/mode graph
PyNative
To Reproduce / 重现步骤 (Mandatory / 必填)
Steps to reproduce the behavior:
- Go to '...'
- Click on '....'
- Scroll down to '....'
- See error
!export HF_ENDPOINT=http://hf.co
import os
import mindspore
from mindnlp.transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from mindnlp.peft import get_peft_config, get_peft_model, get_peft_model_state_dict, PromptTuningConfig, TaskType
from mindnlp.dataset import load_dataset, BaseMapFunction
from mindspore.dataset import GeneratorDataset
from mindnlp.core import ops, optim
import numpy as np
from threading import Lock
from mindnlp.transformers.optimization import get_linear_schedule_with_warmup
from tqdm import tqdm
from mindnlp.peft.tuners.prompt_tuning.config import PromptTuningInit
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['HF_ENDPOINT'] = 'http://hf-mirror.com'
if "RANK_TABLE_FILE" in os.environ:
del os.environ["RANK_TABLE_FILE"]
# model_name_or_path = "t5-large"
# tokenizer_name_or_path = "t5-large"
model_name_or_path = "google-t5/t5-large"
tokenizer_name_or_path = "google-t5/t5-large"
checkpoint_name = "financial_sentiment_analysis_prompt_tuning_v1.ckpt"
text_column = "sentence"
label_column = "text_label"
max_length = 128
lr = 1e-3
num_epochs = 8
batch_size = 8
# creating model
peft_config = PromptTuningConfig(
task_type=TaskType.SEQ_2_SEQ_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
num_virtual_tokens=20,
prompt_tuning_init_text="What is the sentiment of this article?\n",
inference_mode=False,
tokenizer_name_or_path=model_name_or_path,
)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
Expected behavior / 预期结果 (Mandatory / 必填)
A clear and concise description of what you expected to happen.
torch输出结果
PeftModelForSeq2SeqLM(
(base_model): T5ForConditionalGeneration(
(shared): Embedding(32128, 1024)
(encoder): T5Stack(
(embed_tokens): Embedding(32128, 1024)
(block): ModuleList(
(0): T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
(relative_attention_bias): Embedding(32, 16)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerFF(
(DenseReluDense): T5DenseActDense(
(wi): Linear(in_features=1024, out_features=4096, bias=False)
(wo): Linear(in_features=4096, out_features=1024, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): ReLU()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(1-23): 23 x T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerFF(
(DenseReluDense): T5DenseActDense(
(wi): Linear(in_features=1024, out_features=4096, bias=False)
(wo): Linear(in_features=4096, out_features=1024, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): ReLU()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(final_layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(decoder): T5Stack(
(embed_tokens): Embedding(32128, 1024)
(block): ModuleList(
(0): T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
(relative_attention_bias): Embedding(32, 16)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerCrossAttention(
(EncDecAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(2): T5LayerFF(
(DenseReluDense): T5DenseActDense(
(wi): Linear(in_features=1024, out_features=4096, bias=False)
(wo): Linear(in_features=4096, out_features=1024, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): ReLU()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(1-23): 23 x T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerCrossAttention(
(EncDecAttention): T5Attention(
(q): Linear(in_features=1024, out_features=1024, bias=False)
(k): Linear(in_features=1024, out_features=1024, bias=False)
(v): Linear(in_features=1024, out_features=1024, bias=False)
(o): Linear(in_features=1024, out_features=1024, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(2): T5LayerFF(
(DenseReluDense): T5DenseActDense(
(wi): Linear(in_features=1024, out_features=4096, bias=False)
(wo): Linear(in_features=4096, out_features=1024, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): ReLU()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(final_layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(lm_head): Linear(in_features=1024, out_features=32128, bias=False)
)
(prompt_encoder): ModuleDict(
(default): PromptEmbedding(
(embedding): Embedding(40, 1024)
)
)
(word_embeddings): Embedding(32128, 1024)
)
Screenshots/ 日志 / 截图 (Mandatory / 必填)
If applicable, add screenshots to help explain your problem.
Additional context / 备注 (Optional / 选填)
Add any other context about the problem here.