Flair_NER/train_2.py

51 lines
1.6 KiB
Python
Raw Permalink Normal View History

2024-09-18 16:35:06 +00:00
import flair
from flair.data import Corpus
from flair.datasets import ColumnDataset
from flair.datasets import ColumnCorpus
from flair.embeddings import TransformerDocumentEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
model = 'zedfum/arman-longformer-8k-finetuned-ensani'
dataset = './jokar/Flair_NER/data/DATASET.txt'
# define columns
columns = {0 : 'text', 1 : 'ner'}
# directory where the data resides
data_folder = './data/'
# initializing the corpus
corpus = ColumnCorpus(data_folder, columns,
train_file = 'DATASET_3.txt'
)
# Load the dataset for training
#corpus = ColumnDataset(dataset, column_format={0: 'text', 1: 'ner'})
# Create a Flair embedding layer using the safetensors model
embeddings = TransformerDocumentEmbeddings(model, layers='-1', pooling='mean')
# tag to predict
tag_type = 'ner'
# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
print(tag_dictionary)
print('#'*50)
print('#'*50)
print('#'*50)
# Create a SequenceTagger model for NER
tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=corpus.make_tag_dictionary(tag_type=tag_type),tag_type=tag_type)
print('SequenceTagger')
print('#'*50)
print('#'*50)
print('#'*50)
# Initialize the trainer
trainer = ModelTrainer(tagger, corpus)
print('ModelTrainer')
print('#'*50)
print('#'*50)
print('#'*50)
# Train the model
trainer.train('./jokar/Flair_NER/trained', learning_rate=0.65e-4, mini_batch_size=8, max_epochs=10)
print('trained!')