Thiết kế website giá rẻ

Question

I trained a LayoutLMv3 document classification model using pytorch lightning for few document classes. After training I uploaded model weights to huggingface hub.
ISSUE: My model1 was trained on 4 classes and now I want to fine-tune model1 with other data that has different classes. I want to use the same weights so that model2 should be able to classify on both dataset(dataset1 classes + dataset2 classes). I’m facing issues removing the last layers.
Could anybody please help what are the changes that I need to make in order to fine-tune model1 on different data.

Could anybody please help what are the changes that I need to make in order to fine-tune model1 on different data.
Thanks!

I tried below code but looks like model weights are newly initialized.

<code>model = LayoutLMv3ForSequenceClassification.from_pretrained(

'user/mymodel1',

num_labels=len(DOCUMENT_CLASSES),

ignore_mismatched_sizes=True,

)

# warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match:

- classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated

- classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated

You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

</code>

<code>model = LayoutLMv3ForSequenceClassification.from_pretrained( 'user/mymodel1', num_labels=len(DOCUMENT_CLASSES), ignore_mismatched_sizes=True, ) # warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match: - classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated - classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. </code>

model = LayoutLMv3ForSequenceClassification.from_pretrained(
            'user/mymodel1',
            num_labels=len(DOCUMENT_CLASSES),
            ignore_mismatched_sizes=True,
)

# warning: Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at 314e/mymodel1 and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Below is the code snippet that I used to train model1.

<code>DOCUMENT_CLASSES = sorted(list(map(

lambda p: p.name,

Path(TRAIN_DS_PATH).glob("*")

)))

train_images = list(TRAIN_DS_PATH.glob('*/*.jpg'))

test_images = list(TEST_DS_PATH.glob('*/*.jpg'))

val_images = list(VAL_DS_PATH.glob('*/*.jpg'))

def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]:

return [

int(box[0] * width_scale),

int(box[1] * height_scale),

int(box[2] * width_scale),

int(box[3] * height_scale)

]

class DocumentClassificationDataset(Dataset):

def __init__(self, image_paths, processor):

self.image_paths = image_paths

self.processor = processor

def __len__(self):

return len(self.image_paths)

def __getitem__(self, item):

image_path = self.image_paths[item]

json_path = image_path.with_suffix(".json")

with json_path.open("r") as f:

ocr_result = json.load(f)

with Image.open(image_path).convert("RGB") as image:

width, height = image.size

width_scale = 1000 / width

height_scale = 1000 / height

words = []

boxes = []

for row in ocr_result:

boxes.append(scale_bounding_box(

row["bounding_box"],

width_scale,

height_scale

))

words.append(row["word"])

encoding = self.processor(

image,

words,

boxes=boxes,

max_length=512,

padding="max_length",

truncation=True,

return_tensors="pt"

)

label = DOCUMENT_CLASSES.index(image_path.parent.name)

return dict(

input_ids=encoding["input_ids"].flatten(),

attention_mask=encoding["attention_mask"].flatten(),

bbox=encoding["bbox"].flatten(end_dim=1),

pixel_values=encoding["pixel_values"].flatten(end_dim=1),

labels=torch.tensor(label, dtype=torch.long)

)

train_dataset = DocumentClassificationDataset(train_images, processor)

val_dataset = DocumentClassificationDataset(val_images, processor)

train_data_loader = DataLoader(

train_dataset,

batch_size=4,

shuffle=True,

num_workers=4

)

val_data_loader = DataLoader(

val_dataset,

batch_size=4,

shuffle=False,

num_workers=4

)

class ModelModule(pl.LightningModule):

def __init__(self, n_classes:int):

super().__init__()

self.model = LayoutLMv3ForSequenceClassification.from_pretrained(

"microsoft/layoutlmv3-base",

num_labels=n_classes

)

self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)}

self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)}

self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes)

self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes)

def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None):

return self.model(

input_ids,

attention_mask=attention_mask,

bbox=bbox,

pixel_values=pixel_values,

labels=labels

)

def training_step(self, batch, batch_idx):

input_ids = batch["input_ids"]

attention_mask = batch["attention_mask"]

bbox = batch["bbox"]

pixel_values = batch["pixel_values"]

labels = batch["labels"]

output = self(input_ids, attention_mask, bbox, pixel_values, labels)

self.log("train_loss", output.loss)

self.log(

"train_acc",

self.train_accuracy(output.logits, labels),

on_step=True,

on_epoch=True

)

return output.loss

def validation_step(self, batch, batch_idx):

input_ids = batch["input_ids"]

attention_mask = batch["attention_mask"]

bbox = batch["bbox"]

pixel_values = batch["pixel_values"]

labels = batch["labels"]

output = self(input_ids, attention_mask, bbox, pixel_values, labels)

self.log("val_loss", output.loss)

self.log(

"val_acc",

self.val_accuracy(output.logits, labels),

on_step=False,

on_epoch=True

)

return output.loss

def configure_optimizers(self):

optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5

return optimizer

model_module = ModelModule(len(DOCUMENT_CLASSES))

# Add early stopping

from pytorch_lightning.callbacks import EarlyStopping

early_stopping = EarlyStopping(

monitor='val_loss', # Monitoring validation loss

min_delta=0.00, # Minimum change to qualify as an improvement

patience=5, # Number of epochs with no improvement after which training will be stopped

verbose=True, # Whether to print logs to stdout

mode='min' # `min` mode means training will stop when the quantity monitored has stopped decreasing

)

model_checkpoint = ModelCheckpoint(

filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min"

)

trainer = pl.Trainer(

accelerator="gpu",

precision=16,

max_epochs=10,

callbacks=[

model_checkpoint,

early_stopping

]

)

trainer.fit(model_module, train_data_loader, val_data_loader)

</code>

<code>DOCUMENT_CLASSES = sorted(list(map( lambda p: p.name, Path(TRAIN_DS_PATH).glob("*") ))) train_images = list(TRAIN_DS_PATH.glob('*/*.jpg')) test_images = list(TEST_DS_PATH.glob('*/*.jpg')) val_images = list(VAL_DS_PATH.glob('*/*.jpg')) def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]: return [ int(box[0] * width_scale), int(box[1] * height_scale), int(box[2] * width_scale), int(box[3] * height_scale) ] class DocumentClassificationDataset(Dataset): def __init__(self, image_paths, processor): self.image_paths = image_paths self.processor = processor def __len__(self): return len(self.image_paths) def __getitem__(self, item): image_path = self.image_paths[item] json_path = image_path.with_suffix(".json") with json_path.open("r") as f: ocr_result = json.load(f) with Image.open(image_path).convert("RGB") as image: width, height = image.size width_scale = 1000 / width height_scale = 1000 / height words = [] boxes = [] for row in ocr_result: boxes.append(scale_bounding_box( row["bounding_box"], width_scale, height_scale )) words.append(row["word"]) encoding = self.processor( image, words, boxes=boxes, max_length=512, padding="max_length", truncation=True, return_tensors="pt" ) label = DOCUMENT_CLASSES.index(image_path.parent.name) return dict( input_ids=encoding["input_ids"].flatten(), attention_mask=encoding["attention_mask"].flatten(), bbox=encoding["bbox"].flatten(end_dim=1), pixel_values=encoding["pixel_values"].flatten(end_dim=1), labels=torch.tensor(label, dtype=torch.long) ) train_dataset = DocumentClassificationDataset(train_images, processor) val_dataset = DocumentClassificationDataset(val_images, processor) train_data_loader = DataLoader( train_dataset, batch_size=4, shuffle=True, num_workers=4 ) val_data_loader = DataLoader( val_dataset, batch_size=4, shuffle=False, num_workers=4 ) class ModelModule(pl.LightningModule): def __init__(self, n_classes:int): super().__init__() self.model = LayoutLMv3ForSequenceClassification.from_pretrained( "microsoft/layoutlmv3-base", num_labels=n_classes ) self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)} self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)} self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes) self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes) def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None): return self.model( input_ids, attention_mask=attention_mask, bbox=bbox, pixel_values=pixel_values, labels=labels ) def training_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] bbox = batch["bbox"] pixel_values = batch["pixel_values"] labels = batch["labels"] output = self(input_ids, attention_mask, bbox, pixel_values, labels) self.log("train_loss", output.loss) self.log( "train_acc", self.train_accuracy(output.logits, labels), on_step=True, on_epoch=True ) return output.loss def validation_step(self, batch, batch_idx): input_ids = batch["input_ids"] attention_mask = batch["attention_mask"] bbox = batch["bbox"] pixel_values = batch["pixel_values"] labels = batch["labels"] output = self(input_ids, attention_mask, bbox, pixel_values, labels) self.log("val_loss", output.loss) self.log( "val_acc", self.val_accuracy(output.logits, labels), on_step=False, on_epoch=True ) return output.loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5 return optimizer model_module = ModelModule(len(DOCUMENT_CLASSES)) # Add early stopping from pytorch_lightning.callbacks import EarlyStopping early_stopping = EarlyStopping( monitor='val_loss', # Monitoring validation loss min_delta=0.00, # Minimum change to qualify as an improvement patience=5, # Number of epochs with no improvement after which training will be stopped verbose=True, # Whether to print logs to stdout mode='min' # `min` mode means training will stop when the quantity monitored has stopped decreasing ) model_checkpoint = ModelCheckpoint( filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min" ) trainer = pl.Trainer( accelerator="gpu", precision=16, max_epochs=10, callbacks=[ model_checkpoint, early_stopping ] ) trainer.fit(model_module, train_data_loader, val_data_loader) </code>

DOCUMENT_CLASSES = sorted(list(map(
    lambda p: p.name,
    Path(TRAIN_DS_PATH).glob("*")
)))

train_images = list(TRAIN_DS_PATH.glob('*/*.jpg'))
test_images = list(TEST_DS_PATH.glob('*/*.jpg'))
val_images = list(VAL_DS_PATH.glob('*/*.jpg'))

def scale_bounding_box(box: List[int], width_scale : float = 1.0, height_scale : float = 1.0) -> List[int]:
    return [
        int(box[0] * width_scale),
        int(box[1] * height_scale),
        int(box[2] * width_scale),
        int(box[3] * height_scale)
    ]

class DocumentClassificationDataset(Dataset):

    def __init__(self, image_paths, processor):
        self.image_paths = image_paths
        self.processor = processor

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):

        image_path = self.image_paths[item]
        json_path = image_path.with_suffix(".json")
        with json_path.open("r") as f:
            ocr_result = json.load(f)

            with Image.open(image_path).convert("RGB") as image:

                width, height = image.size
                width_scale = 1000 / width
                height_scale = 1000 / height

                words = []
                boxes = []
                for row in ocr_result:
                    boxes.append(scale_bounding_box(
                        row["bounding_box"],
                        width_scale,
                        height_scale
                    ))
                    words.append(row["word"])

                encoding = self.processor(
                    image,
                    words,
                    boxes=boxes,
                    max_length=512,
                    padding="max_length",
                    truncation=True,
                    return_tensors="pt"
                )

        label = DOCUMENT_CLASSES.index(image_path.parent.name)

        return dict(
            input_ids=encoding["input_ids"].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            bbox=encoding["bbox"].flatten(end_dim=1),
            pixel_values=encoding["pixel_values"].flatten(end_dim=1),
            labels=torch.tensor(label, dtype=torch.long)
        )

train_dataset = DocumentClassificationDataset(train_images, processor)
val_dataset = DocumentClassificationDataset(val_images, processor)

train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4
)

val_data_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4
)

class ModelModule(pl.LightningModule):
    def __init__(self, n_classes:int):
        super().__init__()
        self.model = LayoutLMv3ForSequenceClassification.from_pretrained(
            "microsoft/layoutlmv3-base",
            num_labels=n_classes
        )
        self.model.config.id2label = {k: v for k, v in enumerate(DOCUMENT_CLASSES)}
        self.model.config.label2id = {v: k for k, v in enumerate(DOCUMENT_CLASSES)}
        self.train_accuracy = Accuracy(task="multiclass", num_classes=n_classes)
        self.val_accuracy = Accuracy(task="multiclass", num_classes=n_classes)

    def forward(self, input_ids, attention_mask, bbox, pixel_values, labels=None):
        return self.model(
            input_ids,
            attention_mask=attention_mask,
            bbox=bbox,
            pixel_values=pixel_values,
            labels=labels
        )

    def training_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        bbox = batch["bbox"]
        pixel_values = batch["pixel_values"]
        labels = batch["labels"]
        output = self(input_ids, attention_mask, bbox, pixel_values, labels)
        self.log("train_loss", output.loss)
        self.log(
            "train_acc",
            self.train_accuracy(output.logits, labels),
            on_step=True,
            on_epoch=True
        )
        return output.loss
    def validation_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        bbox = batch["bbox"]
        pixel_values = batch["pixel_values"]
        labels = batch["labels"]
        output = self(input_ids, attention_mask, bbox, pixel_values, labels)
        self.log("val_loss", output.loss)
        self.log(
            "val_acc",
            self.val_accuracy(output.logits, labels),
            on_step=False,
            on_epoch=True
        )
        return output.loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.00001) #1e-5
        return optimizer

model_module = ModelModule(len(DOCUMENT_CLASSES))

# Add early stopping
from pytorch_lightning.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',   # Monitoring validation loss
    min_delta=0.00,       # Minimum change to qualify as an improvement
    patience=5,           # Number of epochs with no improvement after which training will be stopped
    verbose=True,         # Whether to print logs to stdout
    mode='min'            # `min` mode means training will stop when the quantity monitored has stopped decreasing
)

model_checkpoint = ModelCheckpoint(
    filename="{epoch}-{step}-{val_loss:.4f}", save_last=True, save_top_k=3, monitor="val_loss", mode="min"
)

trainer = pl.Trainer(
    accelerator="gpu",
    precision=16,
    max_epochs=10,
    callbacks=[
        model_checkpoint,
        early_stopping
    ]
)

trainer.fit(model_module, train_data_loader, val_data_loader)

Thiết kế website giá rẻ

Danh mục

Facing Issues while fine-tuning already trained LayoutLMv3 classification model