Compare commits
No commits in common. "a0df99bc0eef80d1d2d510ae77640247dcdb5ff5" and "3141a929a05711829272c9b7fd7985c844245a51" have entirely different histories.
a0df99bc0e
...
3141a929a0
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,6 +3,3 @@ final-model.pt
|
||||||
pytorch_model.bin
|
pytorch_model.bin
|
||||||
sentencepiece.bpe.model
|
sentencepiece.bpe.model
|
||||||
tokenizer.json
|
tokenizer.json
|
||||||
/__pycache__
|
|
||||||
.gitignore
|
|
||||||
log
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -3516,125 +3516,3 @@ Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
Span[224:225]: "مشهد"/LOC2 /%/ 1.0
|
Span[224:225]: "مشهد"/LOC2 /%/ 1.0
|
||||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||||
|
|
||||||
|
|
||||||
**************************************************
|
|
||||||
############################################################
|
|
||||||
Model Name: HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
|
||||||
Found Entity Count: 18
|
|
||||||
inference time: 2025-07-21 17:03:32.727174
|
|
||||||
############################################################
|
|
||||||
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
|
||||||
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
|
||||||
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
|
||||||
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
|
||||||
Span[49:50]: "جمهوری"/LOC /%/ 0.92
|
|
||||||
Span[50:52]: "اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[137:147]: "نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.98
|
|
||||||
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
|
||||||
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
|
||||||
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
|
||||||
Span[199:202]: "قانون هوای پاک"/REF /%/ 1.0
|
|
||||||
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
|
||||||
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
|
||||||
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
|
||||||
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
|
||||||
**************************************************
|
|
||||||
|
|
||||||
**************************************************
|
|
||||||
############################################################
|
|
||||||
Model Name: HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
|
||||||
Found Entity Count: 18
|
|
||||||
inference time: 2025-07-21 17:37:56.924980
|
|
||||||
############################################################
|
|
||||||
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
|
||||||
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
|
||||||
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
|
||||||
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
|
||||||
Span[49:50]: "جمهوری"/LOC /%/ 0.92
|
|
||||||
Span[50:52]: "اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[137:147]: "نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.98
|
|
||||||
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
|
||||||
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
|
||||||
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
|
||||||
Span[199:202]: "قانون هوای پاک"/REF /%/ 1.0
|
|
||||||
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
|
||||||
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
|
||||||
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
|
||||||
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
|
||||||
**************************************************
|
|
||||||
|
|
||||||
**************************************************
|
|
||||||
############################################################
|
|
||||||
Model Name: HooshvareLab--bert-base-parsbert-peymaner-uncased--2025-7-21--0-40-1
|
|
||||||
Found Entity Count: 15
|
|
||||||
inference time: 2025-07-21 17:39:02.757138
|
|
||||||
############################################################
|
|
||||||
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
|
||||||
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
|
||||||
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
|
||||||
Span[47:52]: "نیروهای مسلح جمهوری اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[136:138]: "روز نوزدهم"/EVENT /%/ 0.89
|
|
||||||
Span[144:145]: "نود"/EVENT /%/ 0.92
|
|
||||||
Span[146:147]: "سه"/EVENT /%/ 0.78
|
|
||||||
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
|
||||||
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
|
||||||
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
|
||||||
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
|
||||||
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
|
||||||
**************************************************
|
|
||||||
|
|
||||||
**************************************************
|
|
||||||
############################################################
|
|
||||||
Model Name: HooshvareLab--bert-fa-base-uncased-ner-arman--2025-7-21--0-12-43
|
|
||||||
Found Entity Count: 17
|
|
||||||
inference time: 2025-07-21 17:40:54.675849
|
|
||||||
############################################################
|
|
||||||
Span[5:6]: "اجرائی"/ORG2 /%/ 1.0
|
|
||||||
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
|
||||||
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
|
||||||
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
|
||||||
Span[49:52]: "جمهوری اسلامی ایران"/LOC /%/ 0.98
|
|
||||||
Span[61:64]: "جمهوری اسلامی ایران"/LOC /%/ 1.0
|
|
||||||
Span[136:147]: "روز نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.94
|
|
||||||
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
|
||||||
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
|
||||||
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
|
||||||
Span[199:200]: "قانون"/REF /%/ 1.0
|
|
||||||
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
|
||||||
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
|
||||||
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
|
||||||
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
|
||||||
**************************************************
|
|
||||||
|
|
||||||
**************************************************
|
|
||||||
############################################################
|
|
||||||
Model Name: HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3
|
|
||||||
Found Entity Count: 14
|
|
||||||
inference time: 2025-07-21 17:42:01.419288
|
|
||||||
############################################################
|
|
||||||
Span[4:6]: "دستگاههای اجرائی"/ORG2 /%/ 1.0
|
|
||||||
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
|
||||||
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
|
||||||
Span[47:52]: "نیروهای مسلح جمهوری اسلامی ایران"/ORG /%/ 1.0
|
|
||||||
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 0.95
|
|
||||||
Span[120:124]: "قانون خانواده و جوانی"/REF /%/ 0.98
|
|
||||||
Span[136:147]: "روز نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 1.0
|
|
||||||
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
|
||||||
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
|
||||||
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
|
||||||
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
|
||||||
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
|
||||||
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
|
||||||
**************************************************
|
|
||||||
|
|
|
@ -22,7 +22,5 @@ Detailed Results: {result.detailed_results}
|
||||||
return result_text
|
return result_text
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
model_folder = "2025-07-21--17-51-49--HooshvareLab--bert-fa-base-uncased-ner-peyma"
|
result = do_evaluate()
|
||||||
model_path = f"./taggers/{model_folder}/final-model.pt"
|
print(do_evaluate())
|
||||||
result = do_evaluate(model_path)
|
|
||||||
print(result)
|
|
||||||
|
|
10
inference.py
10
inference.py
|
@ -237,15 +237,7 @@ def inference_main(trained_model,input_sentence):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("do inf ... ")
|
print("do inf ... ")
|
||||||
result = inference_main('HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3',read_file())
|
result = inference_main('',read_file())
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
"""
|
|
||||||
HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
|
||||||
HooshvareLab--bert-base-parsbert-ner-uncased--2025-7-20--22-51-20
|
|
||||||
HooshvareLab--bert-base-parsbert-peymaner-uncased--2025-7-21--0-40-1
|
|
||||||
HooshvareLab--bert-fa-base-uncased-ner-arman--2025-7-21--0-12-43
|
|
||||||
HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
|
|
40
train.py
40
train.py
|
@ -33,11 +33,6 @@ from flair.embeddings import TransformerDocumentEmbeddings
|
||||||
#model = "HooshvareLab/bert-base-parsbert-peymaner-uncased" # جالب نبود!
|
#model = "HooshvareLab/bert-base-parsbert-peymaner-uncased" # جالب نبود!
|
||||||
#model = "HooshvareLab/bert-base-parsbert-armanner-uncased" # جالب نبود!
|
#model = "HooshvareLab/bert-base-parsbert-armanner-uncased" # جالب نبود!
|
||||||
|
|
||||||
def digit_correct(input_num):
|
|
||||||
if input_num <10:
|
|
||||||
return f'0{input_num}'
|
|
||||||
return str(input_num)
|
|
||||||
|
|
||||||
def main_train(model):
|
def main_train(model):
|
||||||
"""
|
"""
|
||||||
آموزش مدل برای تسک NER
|
آموزش مدل برای تسک NER
|
||||||
|
@ -46,7 +41,7 @@ def main_train(model):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
time = datetime.datetime.now()
|
time = datetime.datetime.now()
|
||||||
model_title = f"{time.year}-{digit_correct(time.month)}-{digit_correct(time.day)}--{digit_correct(time.hour)}-{digit_correct(time.minute)}-{digit_correct(time.second)}--{model}".replace('/','--')
|
model_title = f"{model}--{time.year}-{time.month}-{time.day}--{time.hour}-{time.minute}-{time.second}".replace('/','--')
|
||||||
print(f'\nMODEL:: {model}\n')
|
print(f'\nMODEL:: {model}\n')
|
||||||
|
|
||||||
#!pip install 'flair==0.10'
|
#!pip install 'flair==0.10'
|
||||||
|
@ -59,11 +54,11 @@ def main_train(model):
|
||||||
#اسم دیتاست اینجا تنظیم شود
|
#اسم دیتاست اینجا تنظیم شود
|
||||||
corpus = ColumnCorpus(data_folder, columns,
|
corpus = ColumnCorpus(data_folder, columns,
|
||||||
#train_file = 'peyma_train.txt')
|
#train_file = 'peyma_train.txt')
|
||||||
train_file = 'DATASET140402_no_aref2.txt', # qavanin 36K tokens
|
train_file = 'DATASET140402_no_aref.txt', # qavanin 36K tokens
|
||||||
# test_file = 'test_ds_new.txt', # test 110 sections - 6.7K
|
test_file = 'test_ds_new.txt',) # test 110 sections - 6.7K
|
||||||
#dev_file = 'dev split 2.txt'
|
#dev_file = 'dev split 2.txt'
|
||||||
#max_sentence_length=500
|
#max_sentence_length=500
|
||||||
)
|
#)
|
||||||
|
|
||||||
# tag to predict
|
# tag to predict
|
||||||
tag_type = 'ner'
|
tag_type = 'ner'
|
||||||
|
@ -164,18 +159,16 @@ def main_train(model):
|
||||||
F1 Score: {result}
|
F1 Score: {result}
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'''
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'''
|
||||||
|
|
||||||
hyperparameters = f"""learning_rate: {learning_rate} - mini_batch_size: {mini_batch_size} - max_epochs: {max_epochs}"""
|
# final_result = f"""Model Name: {model}
|
||||||
|
# Fine-Tune Parameters: {hyperparameters}
|
||||||
final_result = f"""Model Name: {model}
|
# {train_result}
|
||||||
Fine-Tune Parameters: {hyperparameters}
|
# {evaluate_result}\n
|
||||||
{train_result}
|
# Fine_Tune time: {operation_time}
|
||||||
{evaluate_result}\n
|
# ------------------------------------------------------------------------------------
|
||||||
Fine_Tune time: {operation_time}
|
# ------------------------------------------------------------------------------------\n
|
||||||
------------------------------------------------------------------------------------
|
# """
|
||||||
------------------------------------------------------------------------------------\n
|
# save_to_file_by_address('./data/train_log.txt', final_result)
|
||||||
"""
|
# print(' Saving results finished! ')
|
||||||
save_to_file_by_address('./data/train_log.txt', final_result)
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
models = """
|
models = """
|
||||||
|
@ -184,11 +177,8 @@ HooshvareLab/bert-fa-base-uncased-ner-peyma
|
||||||
HooshvareLab/bert-base-parsbert-armanner-uncased
|
HooshvareLab/bert-base-parsbert-armanner-uncased
|
||||||
HooshvareLab/bert-fa-base-uncased-ner-arman
|
HooshvareLab/bert-fa-base-uncased-ner-arman
|
||||||
HooshvareLab/bert-base-parsbert-peymaner-uncased
|
HooshvareLab/bert-base-parsbert-peymaner-uncased
|
||||||
|
HooshvareLab/distilbert-fa-zwnj-base-ner
|
||||||
"""
|
"""
|
||||||
models = """
|
|
||||||
HooshvareLab/bert-fa-base-uncased-ner-peyma
|
|
||||||
"""
|
|
||||||
# HooshvareLab/distilbert-fa-zwnj-base-ner
|
|
||||||
models_with_error= """
|
models_with_error= """
|
||||||
nicolauduran45/affilgood-ner-multilingual-v2 - error
|
nicolauduran45/affilgood-ner-multilingual-v2 - error
|
||||||
Amirmerfan/bert-base-uncased-persian-ner-50k-base - error
|
Amirmerfan/bert-base-uncased-persian-ner-50k-base - error
|
||||||
|
|
Loading…
Reference in New Issue
Block a user