data_processes/do_nlp_processes.py

43 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
سورس اجرای پردازش های مختلف روی اجزای قانونی
شامل: کلاسیفیکیشن، تشخیص موجودیت های نامدار، استخراج بردار کلمات، استخراج کلیدواژه ها و ساده‌سازی متن
"""
from p1_classifier import do_classify
from p2_ner_recognizer import do_ner_recognize
from p3_words_embedder import do_word_embedder
from p4_keyword_extractor import do_keyword_extract
from p5_simplifier import do_simplify
from elastic_helper import ElasticHelper
def get_sections():
sections_path = "/home/gpu/data_11/14040423/mj_qa_section.zip"
eh_obj = ElasticHelper()
sections = eh_obj.iterateJsonFile(sections_path, True)
return sections
def main():
# get sections to do nlp processes
sections = get_sections()
# 1. classify
sections = do_classify(sections)
# 2. ner_recognize
sections = do_ner_recognize(sections)
# 3. word embedder
sections = do_word_embedder(sections)
# 4. keyword extract
sections = do_keyword_extract(sections)
# 5. simpify
sections = do_simplify(sections)
print('all nlp processes finished successfully!')