from dotenv import load_dotenv import os from llm_helper import AsyncCore from es_helper import ElasticHelper from base_model import MnMeet import time, traceback, uuid, orjson, re from datetime import datetime, timezone from elasticsearch.helpers import scan from typing import Union from pathlib import Path from collections import defaultdict from typing import List load_dotenv() ES_URL = os.getenv("ES_URL") ES_USER_NAME = os.getenv("ES_USER_NAME") ES_PASSWORD = os.getenv("ES_PASSWORD") LLM_URL = os.getenv("LLM_URL") def save_orjson(path, data): with open(path, "wb") as f: f.write( orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS) ) def load_orjson(path: str | Path): path = Path(path) with path.open("rb") as f: # باید باینری باز بشه برای orjson return orjson.loads(f.read()) # --------------------------- flow term_index_name = "mn_term" meet_index_name = "mn_meet" ment_index_name = "mn_meet_entity" sections_index_name = "" dash = "-" * 25 es_helper = ElasticHelper( es_url=ES_URL, es_user=ES_USER_NAME, es_pass=ES_PASSWORD, ) ############ DELETE INDEXES # es_helper.deleteIndex(index_name=term_index_name) # es_helper.deleteIndex(index_name=meet_index_name) # es_helper.deleteIndex(index_name=ment_index_name) ############ CREATE INDEXES # es_helper.createIndexIfNotExist(index_name_o=term_index_name) # es_helper.createIndexIfNotExist(index_name_o=meet_index_name) # es_helper.createIndexIfNotExist(index_name_o=ment_index_name) es = es_helper.es # fields = list(MnMeet.model_fields.keys()) fields = [ "id", "sanad_id", "main_type", "title", "author", "content", ] # old_data = es_helper.search( # index=old_index_name, _source=fields, query={"match_all": {}}, size=3 # ) # old_data = old_data["hits"]["hits"] # don't use in scan ################### for all data old_data = list( scan( es, index=meet_index_name, query={ "_source": fields, "query": { "term": { "main_type": "جلسه علمی"}}, # {"match_all": {}}, }, ) ) print(f'--- old_data {len(old_data)}') save_orjson( data=old_data, path='./data_content_1.json' )