import parsivar import hazm import json with open("main_classes_dataset_POS_03.json", "r") as f: qs = json.load(f) parsivar_normalizer = parsivar.Normalizer() parsivar_tokenizer = parsivar.Tokenizer() parsivar_tagger = parsivar.POSTagger( tagging_model="wapiti" ) # tagging_model = "wapiti" or "stanford". "wapiti" is faster than "stanford" hazm_normalizer = hazm.Normalizer() hazm_tagger = hazm.POSTagger(model="pos_tagger.model", universal_tag=False) for cls, cls_list in qs.items(): for i, q in enumerate(cls_list): text = q["content"] qs[cls][i]["parsivar"] = str( parsivar_tagger.parse( parsivar_tokenizer.tokenize_words(parsivar_normalizer.normalize(text)) ) ) qs[cls][i]["hazm"] = str( hazm_tagger.tag(tokens=hazm.word_tokenize(hazm_normalizer.normalize(text))) ) with open("hazm_parsivar_added.json", "w", encoding="utf-8") as f: json.dump(qs, f, indent=4, ensure_ascii=False)