Flair_NER/train_2.py

51 lines
1.6 KiB
Python

import flair
from flair.data import Corpus
from flair.datasets import ColumnDataset
from flair.datasets import ColumnCorpus
from flair.embeddings import TransformerDocumentEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
model = 'zedfum/arman-longformer-8k-finetuned-ensani'
dataset = './jokar/Flair_NER/data/DATASET.txt'
# define columns
columns = {0 : 'text', 1 : 'ner'}
# directory where the data resides
data_folder = './data/'
# initializing the corpus
corpus = ColumnCorpus(data_folder, columns,
train_file = 'DATASET_3.txt'
)
# Load the dataset for training
#corpus = ColumnDataset(dataset, column_format={0: 'text', 1: 'ner'})
# Create a Flair embedding layer using the safetensors model
embeddings = TransformerDocumentEmbeddings(model, layers='-1', pooling='mean')
# tag to predict
tag_type = 'ner'
# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
print(tag_dictionary)
print('#'*50)
print('#'*50)
print('#'*50)
# Create a SequenceTagger model for NER
tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=corpus.make_tag_dictionary(tag_type=tag_type),tag_type=tag_type)
print('SequenceTagger')
print('#'*50)
print('#'*50)
print('#'*50)
# Initialize the trainer
trainer = ModelTrainer(tagger, corpus)
print('ModelTrainer')
print('#'*50)
print('#'*50)
print('#'*50)
# Train the model
trainer.train('./jokar/Flair_NER/trained', learning_rate=0.65e-4, mini_batch_size=8, max_epochs=10)
print('trained!')