30 lines
1.2 KiB
Python
30 lines
1.2 KiB
Python
import torch
|
|
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
|
|
|
model_path = "./jokar/Models/HooshvareLab-bert-fa-base-uncased-finetuned-2-pt"
|
|
# Load the model and tokenizer from the safetensors file
|
|
model = AutoModelForTokenClassification.from_pretrained(model_path, local_files_only=True)
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
|
|
|
|
# Define a function to perform NER on a given text
|
|
def ner(text):
|
|
# Tokenize the input text
|
|
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
# Perform NER using the model
|
|
outputs = model(**inputs)
|
|
|
|
# Get the predicted entities and their corresponding labels
|
|
entities = []
|
|
for token, label in zip(inputs["input_ids"][0], outputs.logits[0]):
|
|
entity = tokenizer.decode(token, skip_special_tokens=True)
|
|
label_id = torch.argmax(label)
|
|
label_name = model.config.id2label[label_id.item()]
|
|
entities.append((entity, label_name))
|
|
|
|
return entities
|
|
|
|
# Test the NER function
|
|
text = "علی پسر خوبی است بر اساس قانون جوانی جمعیت که در تاریخ 12/ 8/ 1401 تصویب شده است او می تواند وام ازدواج بگیرد"
|
|
entities = ner(text)
|
|
print(entities) |