Problem
When trying to load a logged (=saved) peft model from mlflow server, I get an Error(s) in loading state_dict for PeftModelForCausalLM: size mismatch
. When I load the very same model from a local file, it works :thinking:
I’m on mlflow 2.15.1
and transformers 4.42.4
.
Any ideas how to solve this?
Example
Logging a peft model to mlflow
from peft import LoraConfig, get_peft_model
from transformers import (
from trl import SFTTrainer, setup_chat_format
model_id = "LeoLM/leo-hessianai-7b-chat-bilingual"
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer_no_pad = AutoTokenizer.from_pretrained(model_id, add_bos_token=True)
model, tokenizer = setup_chat_format(model, tokenizer)
peft_config = LoraConfig(...)
args = TrainingArguments(...)
peft_model = get_peft_model(model, peft_config)
train_dataset=train_dataset,
eval_dataset=eval_dataset,
mlflow.set_experiment("my_experiment")
with mlflow.start_run() as run:
mlflow.transformers.autolog()
"tokenizer": tokenizer_no_pad,
mlflow.transformers.log_model(
transformers_model=components,
<code>import mlflow
import torch
from peft import LoraConfig, get_peft_model
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
)
from trl import SFTTrainer, setup_chat_format
train_dataset = ...
eval_dataset = ...
model_id = "LeoLM/leo-hessianai-7b-chat-bilingual"
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer_no_pad = AutoTokenizer.from_pretrained(model_id, add_bos_token=True)
model, tokenizer = setup_chat_format(model, tokenizer)
peft_config = LoraConfig(...)
args = TrainingArguments(...)
peft_model = get_peft_model(model, peft_config)
# Define Trainer
trainer = SFTTrainer(
model=peft_model,
args=args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
peft_config=peft_config,
tokenizer=tokenizer,
packing=True,
)
# mlflow
mlflow.set_experiment("my_experiment")
with mlflow.start_run() as run:
mlflow.transformers.autolog()
trainer.train()
trainer.save_model()
components = {
"model": trainer.model,
"tokenizer": tokenizer_no_pad,
}
mlflow.transformers.log_model(
transformers_model=components,
artifact_path="model",
)
del model
del trainer
torch.cuda.empty_cache()
</code>
import mlflow
import torch
from peft import LoraConfig, get_peft_model
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
)
from trl import SFTTrainer, setup_chat_format
train_dataset = ...
eval_dataset = ...
model_id = "LeoLM/leo-hessianai-7b-chat-bilingual"
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer_no_pad = AutoTokenizer.from_pretrained(model_id, add_bos_token=True)
model, tokenizer = setup_chat_format(model, tokenizer)
peft_config = LoraConfig(...)
args = TrainingArguments(...)
peft_model = get_peft_model(model, peft_config)
# Define Trainer
trainer = SFTTrainer(
model=peft_model,
args=args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
peft_config=peft_config,
tokenizer=tokenizer,
packing=True,
)
# mlflow
mlflow.set_experiment("my_experiment")
with mlflow.start_run() as run:
mlflow.transformers.autolog()
trainer.train()
trainer.save_model()
components = {
"model": trainer.model,
"tokenizer": tokenizer_no_pad,
}
mlflow.transformers.log_model(
transformers_model=components,
artifact_path="model",
)
del model
del trainer
torch.cuda.empty_cache()
✔️ Loading peft model from local file — works —
<code>local_peft_model = "models/leo-hessianai-7b-chat-bilingual"
# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
local_peft_model, device_map="auto", torch_dtype=torch.float16
tokenizer = AutoTokenizer.from_pretrained(local_peft_model)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
<code>local_peft_model = "models/leo-hessianai-7b-chat-bilingual"
# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
local_peft_model, device_map="auto", torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(local_peft_model)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
</code>
local_peft_model = "models/leo-hessianai-7b-chat-bilingual"
# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
local_peft_model, device_map="auto", torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(local_peft_model)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
❌ Loading peft model via mlflow.transformers.load_model()
—does not work—
<code>model_from_mlflow = mlflow.transformers.load_model(
"mlflow-artifacts:/path/to/artifacts/peft_model",
<code>model_from_mlflow = mlflow.transformers.load_model(
"mlflow-artifacts:/path/to/artifacts/peft_model",
)
</code>
model_from_mlflow = mlflow.transformers.load_model(
"mlflow-artifacts:/path/to/artifacts/peft_model",
)
Throws this error:
<code>Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:31<00:00, 9.15s/it]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 9.40it/s]
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00, 2.40s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/utils/docstring_utils.py", line 423, in version_func
return func(*args, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1033, in load_model
return _load_model(local_model_path, flavor_config, return_type, device, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1221, in _load_model
model_and_components[FlavorKey.MODEL] = get_model_with_peft_adapter(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/peft.py", line 50, in get_model_with_peft_adapter
return PeftModel.from_pretrained(base_model, peft_adapter_path)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 430, in from_pretrained
model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 988, in load_adapter
load_result = set_peft_model_state_dict(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/utils/save_and_load.py", line 353, in set_peft_model_state_dict
load_result = model.load_state_dict(peft_model_state_dict, strict=False)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:nt{}'.format(
RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM:
size mismatch for base_model.model.model.embed_tokens.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
size mismatch for base_model.model.lm_head.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
<code>Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:31<00:00, 9.15s/it]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 9.40it/s]
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00, 2.40s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/utils/docstring_utils.py", line 423, in version_func
return func(*args, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1033, in load_model
return _load_model(local_model_path, flavor_config, return_type, device, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1221, in _load_model
model_and_components[FlavorKey.MODEL] = get_model_with_peft_adapter(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/peft.py", line 50, in get_model_with_peft_adapter
return PeftModel.from_pretrained(base_model, peft_adapter_path)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 430, in from_pretrained
model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 988, in load_adapter
load_result = set_peft_model_state_dict(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/utils/save_and_load.py", line 353, in set_peft_model_state_dict
load_result = model.load_state_dict(peft_model_state_dict, strict=False)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:nt{}'.format(
RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM:
size mismatch for base_model.model.model.embed_tokens.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
size mismatch for base_model.model.lm_head.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
</code>
Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:31<00:00, 9.15s/it]
Downloading artifacts: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 9.40it/s]
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00, 2.40s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/utils/docstring_utils.py", line 423, in version_func
return func(*args, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1033, in load_model
return _load_model(local_model_path, flavor_config, return_type, device, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/__init__.py", line 1221, in _load_model
model_and_components[FlavorKey.MODEL] = get_model_with_peft_adapter(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/mlflow/transformers/peft.py", line 50, in get_model_with_peft_adapter
return PeftModel.from_pretrained(base_model, peft_adapter_path)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 430, in from_pretrained
model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/peft_model.py", line 988, in load_adapter
load_result = set_peft_model_state_dict(
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/peft/utils/save_and_load.py", line 353, in set_peft_model_state_dict
load_result = model.load_state_dict(peft_model_state_dict, strict=False)
File "/git_repos/myproject/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 2215, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:nt{}'.format(
RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM:
size mismatch for base_model.model.model.embed_tokens.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
size mismatch for base_model.model.lm_head.weight: copying a param with shape torch.Size([32007, 4096]) from checkpoint, the shape in current model is torch.Size([32128, 4096]).
I honestly don’t know where ths size mismatches of the models comes from…
I have tried to resize the token embeddings to match the tokenizer’s vocabulary size via model.resize_token_embeddings(len(tokenizer))model.resize_token_embeddings(len(tokenizer))
– which did not help, and I have set the max_seq_length=512
(a rather low value), which did also not help…
Any ideas how to either get the size problem fixed or make mlflow.transformers.load_model
behave the way a local loading does?