data_processes/do_nlp_processes.py
2025-08-10 18:14:42 +03:30

51 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
سورس اجرای پردازش های مختلف روی اجزای قانونی
شامل: کلاسیفیکیشن، تشخیص موجودیت های نامدار، استخراج بردار کلمات، استخراج کلیدواژه ها و ساده‌سازی(بازنمایی) متن
"""
from p1_classifier import do_classify
from p2_ner_recognizer import do_ner_recognize
# from p3_words_embedder import do_word_embedder
# from p4_keyword_extractor import do_keyword_extract
# from p5_simplifier import do_simplify
from elastic_helper import ElasticHelper
def get_sections():
sections_path = "/home/gpu/data_11/14040423/mj_qa_section.zip"
eh_obj = ElasticHelper()
sections = eh_obj.iterateJsonFile(sections_path, True)
return sections
def main():
# get sections to do nlp processes
sections = get_sections()
# dictsections = {}
# for item in sections:
# if not item['id'] == 'qs2180272':
# continue
# dictsections[item['id']] = item['source']
# break
# sections = dictsections
# 1. classify
sections = do_classify(sections)
# 2. ner_recognize
sections = do_ner_recognize(sections)
# 3. word embedder
# sections = do_word_embedder(sections)
# 4. keyword extract
# result_kw = do_keyword_extract(sections)
# 5. simpify
# result_simp = do_simplify(sections)
print('all nlp processes finished successfully!')
main()