Facing Issues while fine-tuning already trained LayoutLMv3 classification model

I trained a LayoutLMv3 document classification model using pytorch lightning for few document classes. After training I uploaded model weights to huggingface hub.
ISSUE: My model1 was trained on 4 classes and now I want to fine-tune model1 with other data that has different classes. I want to use the same weights so that model2 should be able to classify on both dataset(dataset1 classes + dataset2 classes). I’m facing issues removing the last layers.
Could anybody please help what are the changes that I need to make in order to fine-tune model1 on different data.

Could anybody please help what are the changes that I need to make in order to fine-tune model1 on different data.
Thanks!

I tried below code but looks like model weights are newly initialized.

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>model = LayoutLMv3ForSequenceClassification.from_pretrained(
'user/mymodel1',
num_labels=len(DOCUMENT_CLASSES),
ignore_mismatched_sizes=True,
)
# warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
</code>
<code>model = LayoutLMv3ForSequenceClassification.from_pretrained( 'user/mymodel1', num_labels=len(DOCUMENT_CLASSES), ignore_mismatched_sizes=True, ) # warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match: - classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated - classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. </code>
model = LayoutLMv3ForSequenceClassification.from_pretrained(
            'user/mymodel1',
            num_labels=len(DOCUMENT_CLASSES),
            ignore_mismatched_sizes=True,
)

# warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Below is the code snippet that I used to train model1.

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>DOCUMENT_CLASSES = sorted(list(map(
lambda p: p.name,
Path(TRAIN_DS_PATH).glob("*")
)))
train_images = list(TRAIN_DS_PATH.glob('*/*.jpg'))
test_images = list(TEST_DS_PATH.glob('*/*.jpg'))
val_images = list(VAL_DS_PATH.glob('*/*.jpg'))
def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]:
return [
int(box[0] * width_scale),
int(box[1] * height_scale),
int(box[2] * width_scale),
int(box[3] * height_scale)
]
class DocumentClassificationDataset(Dataset):
def __init__(self, image_paths, processor):
self.image_paths = image_paths
self.processor = processor
def __len__(self):
return len(self.image_paths)
def __getitem__(self, item):
image_path = self.image_paths[item]
json_path = image_path.with_suffix(".json")
with json_path.open("r") as f:
ocr_result = json.load(f)
with Image.open(image_path).convert("RGB") as image:
width, height = image.size
width_scale = 1000 / width
height_scale = 1000 / height
words = []
boxes = []
for row in ocr_result:
boxes.append(scale_bounding_box(
row["bounding_box"],
width_scale,
height_scale
))
words.append(row["word"])
encoding = self.processor(
image,
words,
boxes=boxes,
max_length=512,
padding="max_length",
truncation=True,
return_tensors="pt"
)
label = DOCUMENT_CLASSES.index(image_path.parent.name)
return dict(
input_ids=encoding["input_ids"].flatten(),
attention_mask=encoding["attention_mask"].flatten(),
bbox=encoding["bbox"].flatten(end_dim=1),
pixel_values=encoding["pixel_values"].flatten(end_dim=1),
labels=torch.tensor(label, dtype=torch.long)
)
train_dataset = DocumentClassificationDataset(train_images, processor)
val_dataset = DocumentClassificationDataset(val_images, processor)
train_data_loader = DataLoader(
train_dataset,
batch_size=4,
shuffle=True,
num_workers=4
)
val_data_loader = DataLoader(
val_dataset,
batch_size=4,
shuffle=False,
num_workers=4
)
class ModelModule(pl.LightningModule):
def __init__(self, n_classes:int):
super().__init__()
self.model = LayoutLMv3ForSequenceClassification.from_pretrained(
"microsoft/layoutlmv3-base",
num_labels=n_classes
)
self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)}
self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)}
self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None):
return self.model(
input_ids,
attention_mask=attention_mask,
bbox=bbox,
pixel_values=pixel_values,
labels=labels
)
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
bbox = batch["bbox"]
pixel_values = batch["pixel_values"]
labels = batch["labels"]
output = self(input_ids, attention_mask, bbox, pixel_values, labels)
self.log("train_loss", output.loss)
self.log(
"train_acc",
self.train_accuracy(output.logits, labels),
on_step=True,
on_epoch=True
)
return output.loss
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
bbox = batch["bbox"]
pixel_values = batch["pixel_values"]
labels = batch["labels"]
output = self(input_ids, attention_mask, bbox, pixel_values, labels)
self.log("val_loss", output.loss)
self.log(
"val_acc",
self.val_accuracy(output.logits, labels),
on_step=False,
on_epoch=True
)
return output.loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5
return optimizer
model_module = ModelModule(len(DOCUMENT_CLASSES))
# Add early stopping
from pytorch_lightning.callbacks import EarlyStopping
early_stopping = EarlyStopping(
monitor='val_loss', # Monitoring validation loss
min_delta=0.00, # Minimum change to qualify as an improvement
patience=5, # Number of epochs with no improvement after which training will be stopped
verbose=True, # Whether to print logs to stdout
mode='min' # `min` mode means training will stop when the quantity monitored has stopped decreasing
)
model_checkpoint = ModelCheckpoint(
filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min"
)
trainer = pl.Trainer(
accelerator="gpu",
precision=16,
max_epochs=10,
callbacks=[
model_checkpoint,
early_stopping
]
)
trainer.fit(model_module, train_data_loader, val_data_loader)
</code>
<code>DOCUMENT_CLASSES = sorted(list(map( lambda p: p.name, Path(TRAIN_DS_PATH).glob("*") ))) train_images = list(TRAIN_DS_PATH.glob('*/*.jpg')) test_images = list(TEST_DS_PATH.glob('*/*.jpg')) val_images = list(VAL_DS_PATH.glob('*/*.jpg')) def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]: return [ int(box[0] * width_scale), int(box[1] * height_scale), int(box[2] * width_scale), int(box[3] * height_scale) ] class DocumentClassificationDataset(Dataset): def __init__(self, image_paths, processor): self.image_paths = image_paths self.processor = processor def __len__(self): return len(self.image_paths) def __getitem__(self, item): image_path = self.image_paths[item] json_path = image_path.with_suffix(".json") with json_path.open("r") as f: ocr_result = json.load(f) with Image.open(image_path).convert("RGB") as image: width, height = image.size width_scale = 1000 / width height_scale = 1000 / height words = [] boxes = [] for row in ocr_result: boxes.append(scale_bounding_box( row["bounding_box"], width_scale, height_scale )) words.append(row["word"]) encoding = self.processor( image, words, boxes=boxes, max_length=512, padding="max_length", truncation=True, return_tensors="pt" ) label = DOCUMENT_CLASSES.index(image_path.parent.name) return dict( input_ids=encoding["input_ids"].flatten(), attention_mask=encoding["attention_mask"].flatten(), bbox=encoding["bbox"].flatten(end_dim=1), pixel_values=encoding["pixel_values"].flatten(end_dim=1), labels=torch.tensor(label, dtype=torch.long) ) train_dataset = DocumentClassificationDataset(train_images, processor) val_dataset = DocumentClassificationDataset(val_images, processor) train_data_loader = DataLoader( train_dataset, batch_size=4, shuffle=True, num_workers=4 ) val_data_loader = DataLoader( val_dataset, batch_size=4, shuffle=False, num_workers=4 ) class ModelModule(pl.LightningModule): def __init__(self, n_classes:int): super().__init__() self.model = LayoutLMv3ForSequenceClassification.from_pretrained( "microsoft/layoutlmv3-base", num_labels=n_classes ) self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)} self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)} self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes) self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes) def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None): return self.model( input_ids, attention_mask=attention_mask, bbox=bbox, pixel_values=pixel_values, labels=labels ) def training_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] bbox = batch["bbox"] pixel_values = batch["pixel_values"] labels = batch["labels"] output = self(input_ids, attention_mask, bbox, pixel_values, labels) self.log("train_loss", output.loss) self.log( "train_acc", self.train_accuracy(output.logits, labels), on_step=True, on_epoch=True ) return output.loss def validation_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] bbox = batch["bbox"] pixel_values = batch["pixel_values"] labels = batch["labels"] output = self(input_ids, attention_mask, bbox, pixel_values, labels) self.log("val_loss", output.loss) self.log( "val_acc", self.val_accuracy(output.logits, labels), on_step=False, on_epoch=True ) return output.loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5 return optimizer model_module = ModelModule(len(DOCUMENT_CLASSES)) # Add early stopping from pytorch_lightning.callbacks import EarlyStopping early_stopping = EarlyStopping( monitor='val_loss', # Monitoring validation loss min_delta=0.00, # Minimum change to qualify as an improvement patience=5, # Number of epochs with no improvement after which training will be stopped verbose=True, # Whether to print logs to stdout mode='min' # `min` mode means training will stop when the quantity monitored has stopped decreasing ) model_checkpoint = ModelCheckpoint( filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min" ) trainer = pl.Trainer( accelerator="gpu", precision=16, max_epochs=10, callbacks=[ model_checkpoint, early_stopping ] ) trainer.fit(model_module, train_data_loader, val_data_loader) </code>
DOCUMENT_CLASSES = sorted(list(map(
    lambda p: p.name,
    Path(TRAIN_DS_PATH).glob("*")
)))

train_images = list(TRAIN_DS_PATH.glob('*/*.jpg'))
test_images = list(TEST_DS_PATH.glob('*/*.jpg'))
val_images = list(VAL_DS_PATH.glob('*/*.jpg'))

def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]:
    return [
        int(box[0] * width_scale),
        int(box[1] * height_scale),
        int(box[2] * width_scale),
        int(box[3] * height_scale)
    ]

class DocumentClassificationDataset(Dataset):

    def __init__(self, image_paths, processor):
        self.image_paths = image_paths
        self.processor = processor

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):

        image_path = self.image_paths[item]
        json_path = image_path.with_suffix(".json")
        with json_path.open("r") as f:
            ocr_result = json.load(f)

            with Image.open(image_path).convert("RGB") as image:

                width, height = image.size
                width_scale = 1000 / width
                height_scale = 1000 / height

                words = []
                boxes = []
                for row in ocr_result:
                    boxes.append(scale_bounding_box(
                        row["bounding_box"],
                        width_scale,
                        height_scale
                    ))
                    words.append(row["word"])

                encoding = self.processor(
                    image,
                    words,
                    boxes=boxes,
                    max_length=512,
                    padding="max_length",
                    truncation=True,
                    return_tensors="pt"
                )

        label = DOCUMENT_CLASSES.index(image_path.parent.name)

        return dict(
            input_ids=encoding["input_ids"].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            bbox=encoding["bbox"].flatten(end_dim=1),
            pixel_values=encoding["pixel_values"].flatten(end_dim=1),
            labels=torch.tensor(label, dtype=torch.long)
        )

train_dataset = DocumentClassificationDataset(train_images, processor)
val_dataset = DocumentClassificationDataset(val_images, processor)

train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4
)

val_data_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4
)

class ModelModule(pl.LightningModule):
    def __init__(self, n_classes:int):
        super().__init__()
        self.model = LayoutLMv3ForSequenceClassification.from_pretrained(
            "microsoft/layoutlmv3-base",
            num_labels=n_classes
        )
        self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)}
        self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)}
        self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
        self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes)

    def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None):
        return self.model(
            input_ids,
            attention_mask=attention_mask,
            bbox=bbox,
            pixel_values=pixel_values,
            labels=labels
        )

    def training_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        bbox = batch["bbox"]
        pixel_values = batch["pixel_values"]
        labels = batch["labels"]
        output = self(input_ids, attention_mask, bbox, pixel_values, labels)
        self.log("train_loss", output.loss)
        self.log(
            "train_acc",
            self.train_accuracy(output.logits, labels),
            on_step=True,
            on_epoch=True
        )
        return output.loss
    def validation_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        bbox = batch["bbox"]
        pixel_values = batch["pixel_values"]
        labels = batch["labels"]
        output = self(input_ids, attention_mask, bbox, pixel_values, labels)
        self.log("val_loss", output.loss)
        self.log(
            "val_acc",
            self.val_accuracy(output.logits, labels),
            on_step=False,
            on_epoch=True
        )
        return output.loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5
        return optimizer

model_module = ModelModule(len(DOCUMENT_CLASSES))

# Add early stopping
from pytorch_lightning.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',   # Monitoring validation loss
    min_delta=0.00,       # Minimum change to qualify as an improvement
    patience=5,           # Number of epochs with no improvement after which training will be stopped
    verbose=True,         # Whether to print logs to stdout
    mode='min'            # `min` mode means training will stop when the quantity monitored has stopped decreasing
)

model_checkpoint = ModelCheckpoint(
    filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min"
)

trainer = pl.Trainer(
    accelerator="gpu",
    precision=16,
    max_epochs=10,
    callbacks=[
        model_checkpoint,
        early_stopping
    ]
)

trainer.fit(model_module, train_data_loader, val_data_loader)

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị
Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa
Thiết kế website Thiết kế website Thiết kế website Cách kháng tài khoản quảng cáo Mua bán Fanpage Facebook Dịch vụ SEO Tổ chức sinh nhật