first step

2025-11-27 20:31:12 +00:00 · 2025-11-27 20:31:12 +00:00 · 1472bf0e9f
commit 1472bf0e9f
11 changed files with 2286 additions and 0 deletions
--- a/monir/.env
+++ b/monir/.env
@ -0,0 +1,4 @@
 ES_URL = 'http://192.168.23.60/9200'
 ES_USER_NAME = 'elastic'
 ES_PASSWORD = '1234'
 LLM_URL = 'http://2.188.15.102:8001/v1/'
--- a/monir/init.py
+++ b/monir/init.py
--- a/monir/pycache/base_model.cpython-310.pyc
+++ b/monir/pycache/base_model.cpython-310.pyc
--- a/monir/pycache/es_helper.cpython-310.pyc
+++ b/monir/pycache/es_helper.cpython-310.pyc
--- a/monir/pycache/llm_helper.cpython-310.pyc
+++ b/monir/pycache/llm_helper.cpython-310.pyc
--- a/monir/base_model.py
+++ b/monir/base_model.py
@ -0,0 +1,339 @@
 from pydantic import BaseModel, Field
 from typing import Optional, List, Dict, Union, Any
 # فیلد اجباری:
 # str = Field(..., description=)
 # فیلد اختیاری:
 # Optional[str] = Field("", description=)
 class Properties(BaseModel):
    """
    Standard Form Of Foreign Key To Other Index
    """
    id: str = Field(..., description="شناسه یکتا")
    title: Optional[str] = Field("", description="عنوان")
    author: Optional[str] = Field("", description="سازنده")
    sub_type: Optional[str] = Field("", description="نوع دوم")
 class TreeInfo(BaseModel):
    title: str
    parent_id: str = Field(..., description="")
    child_order: int
    level: int
    full_path: Optional[str] = Field("", description="")
    path_headings: str
 class NlpParses(BaseModel):
    main_type: str
    nlp_type: str
    begin: int
    end: int
    text: str
    referes: str
    dependency_infos: str  # nested
 class Embeddings(BaseModel):
    type: Optional[str] = (Field("", description=""),)
    dims: Optional[int] = (Field(1024, description=""),)
    index: Optional[bool] = (Field(True, description=""),)
    similarity: Optional[str] = Field("", description="")
 class FileLinks(BaseModel):
    title: str
    link: str
    type: str
    description: str
 class UserLogs(BaseModel):
    id: str
    user_id: int
    username: str
    time_edit: int
    property: str
 class MnSection(BaseModel):
    id: str
    old_id: str
    main_type: str
    html: str
    content: str
    meet_info: Properties
    term_info: Properties
    tree_info: TreeInfo
    content_len: int
    word_len: int
    tags: str
    keywords: str
    sort_date_timestamp: int
    nlp_parses: NlpParses
    embeddings: Embeddings
    file_links: FileLinks
    time_edit: int
    user_edit: int
    user_logs: UserLogs
 class MnTerm(BaseModel):
    id: str
    author: str
    sub_type: Optional[str] = ""
    title: Optional[str] = ""
    begin_date: Optional[int] = ""
    end_date: Optional[int] = ""
    begin_year: Optional[int] = ""
    main_type: Optional[str] = ""
    content: Optional[str] = ""
    tags: Optional[str] = ""
    keywords: Optional[str] = ""
 class MnMeetEntity(BaseModel):
    id: str
    main_type: str
    sub_type: str
    title: str
    content: str
    permit_tags: str
    search_state: str
    user_create: str
    time_create: int
    time_edit: int
    file_links: FileLinks
    meet_info: Properties
    term_info: Properties
 class Subjects(BaseModel):
    id: int
    title: str
 class ReportInfo(BaseModel):
    films: int
    sounds: int
    photos: int
 class MnMeet(BaseModel):
    """
    Monir Meet Standard fields foramt
    """
    id: str
    sanad_id: str
    main_type: str
    sub_type: Optional[str] = ""
    person_code: Optional[str] = ""
    research_code: str
    meet_code: str
    old_meet_id: int
    title: str
    meet_no: int
    author: str
    term_info: Optional[Properties] = Field("", description="کلید اتصال به جدول ترم")
    subtitle: str
    subjects: Optional[List[Subjects]] = []
    allwords: str
    tags: Optional[List[str]] = []
    keywords: str
    verb: str
    sanad_year: Optional[int] = ""
    sanad_date: Optional[int] = ""
    amplify: str
    ralation: str
    city: str
    place: str
    address: str
    audience: str
    attendees: str
    report_info: ReportInfo
    mindex: Optional[str] = Field("", description="mindex index فهرست")
    mintro: Optional[str] = Field("", description="mintro خلاصه ")
    content: str
    completion: Optional[str] = Field("", description="type: completion")
    sort_date_timestamp: int
    permit_tags: str
    resource_info: str
 class MnSanadLink(BaseModel):
    text: Optional[str] = ""
    link: str
    in_search: bool
    title: str
 class Properties(BaseModel):
    """
    Standard Form Of Foreign Key To Other Index
    """
    id: str = Field(..., description="شناسه یکتا")
    title: Optional[str] = Field("", description="عنوان")
    author: Optional[str] = Field("", description="سازنده")
    sub_type: Optional[str] = Field("", description="نوع دوم")
 class TreeProperties(BaseModel):
 	parent_id :str
 	child_order: int
 	level: int
 	full_path: str
 	title: str
 	path_headings: str	
 class NlpParses(BaseModel):
 	main_type: str
 	nlp_type: str
 	begin: int
 	end: int
 	text: str
 	referes: str
 	dependency_infos: Dict
 class UserLogs(BaseModel):
 	id:str
 	user_id:int 
 	username:str
 	time_edit:int
 	property:str
 class MNSection(BaseModel):
    main_type : str
    id : str
    html : str
    content : str
    meet_info: Properties
    term_info: Properties
    tree_info: TreeProperties
    content_len: int
    word_len: int
    tags: str
    keywords: str
    sort_date_timestamp: int
    nlp_parses: NlpParses
    embeddings: Embeddings
    file_links:FileLinks
    time_edit: int
    user_edit: int
    user_logs: UserLogs
 class MnSanadVersionInfo(BaseModel):
    timestamp: int
    number: int
    title: str
 class SoundLinks(BaseModel):
    link: str
    title: str
 class MnSanad(BaseModel):
    id: Optional[str] = ""
    sort_date_timestamp: Optional[int] = ""
    title: Optional[str] = ""
    subtitle: Optional[str] = ""
    research_code: Optional[int] = ""
    content: Optional[str] = ""
    version_info: Optional[MnSanadVersionInfo] = ""
    meet_lid: Optional[int] = ""
    meet_id: Optional[int] = ""
    meet_no: Optional[int] = ""
    meet_code: Optional[int] = ""
    allwords: Optional[str] = ""
    keywords: Optional[str] = ""
    person_code: Optional[str] = ""
    subject: Optional[List] = ""
    city: Optional[str] = ""
    author: str
    begin_year: Optional[int] = ""
    begin_date: Optional[int] = ""
    end_date: Optional[int] = ""
    branch: Optional[str] = ""
    ralation: Optional[str] = ""
    research_id: Optional[int] = ""
    mintro: Optional[str] = ""
    mindex: Optional[str] = ""
    RowNum: Optional[int] = ""
    resource_info: Optional[str] = ""
    in_tadvin: Optional[bool] = ""
    format: Optional[str] = ""
    verb: Optional[str] = ""
    address: Optional[str] = ""
    attendees: Optional[str] = ""
    amplify: Optional[str] = ""
    audience: Optional[str] = ""
    place: Optional[str] = ""
    permit_tags: Optional[str] = ""
    photos: Optional[int] = ""
    tags: Optional[List[str]] = ""
    films: Optional[int] = ""
    sounds: Optional[int] = ""
    file_links: Optional[List[MnSanadLink]] = ""
    sound_links: Optional[List[SoundLinks]] = ""
    video_links: Optional[List[SoundLinks]] = ""
    photo_links: Optional[List[SoundLinks]] = ""
    model_config = {"exclude_" "": True}
 class MnSanad2Meet(BaseModel):
    id: str
    sort_date_timestamp: Optional[int] = ""
    title: str
    subtitle: str
    research_code: str
    format: str
    content: str
    # version_info: Optional[List|dict] = ""
    meet_lid: Optional[str] = ""
    meet_id: Optional[int] = ""
    meet_no: Optional[int] = ""
    meet_code: Optional[str] = ""
    allwords: Optional[str] = ""
    keywords: Optional[str] = ""
    person_code: Optional[str] = ""
    subject: Optional[List] = []
    city: Optional[str] = ""
    author: str
    begin_year: Optional[int] = ""
    begin_date: Optional[str] = ""
    end_date: Optional[str] = ""
    branch: str
    ralation: Optional[str] = ""
    research_id: int
    mintro: Optional[str] = ""
    mindex: Optional[str] = ""
    # RowNum: Optional[int]= ""
    resource_info: Optional[str] = ""
    # in_tadvin: Optional[bool] = ""
    verb: Optional[str] = ""
    address: Optional[str] = ""
    attendees: Optional[str] = ""
    amplify: Optional[str] = ""
    audience: Optional[str] = ""
    place: Optional[str] = ""
    permit_tags: Optional[str] = ""
    photos: Optional[int] = 0
    tags: Optional[List[str]] = []
    films: Optional[int] = 0
    sounds: Optional[int] = 0
    file_links: Optional[List[MnSanadLink]] = ""
    sound_links: Optional[List[SoundLinks]] = ""
    video_links: Optional[List[SoundLinks]] = ""
    photo_links: Optional[List[SoundLinks]] = ""
    # model_config = {
    #     "exclude_""": True
    # }
--- a/monir/doc_type.py
+++ b/monir/doc_type.py
@ -0,0 +1,349 @@
 type_count = [
    {
        "key": "جلسه علمی",
        "doc_count": 7332
    },
    {
        "key": "منبر",
        "doc_count": 3970
    },
    {
        "key": "درس خارج",
        "doc_count": 2450
    },
    {
        "key": "تدریس",
        "doc_count": 1401
    },
    {
        "key": "سخنرانی",
        "doc_count": 1221
    },
    {
        "key": "-",
        "doc_count": 992
    },
    {
        "key": "مشاوره",
        "doc_count": 858
    },
    {
        "key": "مدیریت",
        "doc_count": 652
    },
    {
        "key": "مصاحبه",
        "doc_count": 628
    },
    {
        "key": "مباحثه و گفتگو",
        "doc_count": 587
    },
    {
        "key": "جزوه",
        "doc_count": 583
    },
    {
        "key": "مناظره و گفتگو",
        "doc_count": 496
    },
    {
        "key": "گزارش",
        "doc_count": 395
    },
    {
        "key": "--",
        "doc_count": 250
    },
    {
        "key": "جدول",
        "doc_count": 208
    },
    {
        "key": "کتاب",
        "doc_count": 173
    },
    {
        "key": "صورت جلسه",
        "doc_count": 126
    },
    {
        "key": "فهرست",
        "doc_count": 124
    },
    {
        "key": "ارائه",
        "doc_count": 90
    },
    {
        "key": "بازدید (دیدار)",
        "doc_count": 83
    },
    {
        "key": "مقاله",
        "doc_count": 81
    },
    {
        "key": "طرح",
        "doc_count": 74
    },
    {
        "key": "کلیپ",
        "doc_count": 68
    },
    {
        "key": "گزارش رصد",
        "doc_count": 64
    },
    {
        "key": "نامه",
        "doc_count": 48
    },
    {
        "key": "نمودار",
        "doc_count": 45
    },
    {
        "key": "آیین نامه",
        "doc_count": 36
    },
    {
        "key": "تقریر",
        "doc_count": 35
    },
    {
        "key": "خلاصه",
        "doc_count": 25
    },
    {
        "key": "پیش نویس",
        "doc_count": 25
    },
    {
        "key": "نقد",
        "doc_count": 23
    },
    {
        "key": "چکیده",
        "doc_count": 16
    },
    {
        "key": "یادبود",
        "doc_count": 13
    },
    {
        "key": "فرم",
        "doc_count": 12
    },
    {
        "key": "فرم فیش",
        "doc_count": 11
    },
    {
        "key": "مناجات",
        "doc_count": 10
    },
    {
        "key": "دعا و مناجات",
        "doc_count": 8
    },
    {
        "key": "فیش",
        "doc_count": 6
    },
    {
        "key": "پرسش و پاسخ",
        "doc_count": 6
    },
    {
        "key": "بیانیه",
        "doc_count": 5
    },
    {
        "key": "اجلاسیه",
        "doc_count": 4
    },
    {
        "key": "پایان نامه",
        "doc_count": 4
    },
    {
        "key": "",
        "doc_count": 3
    },
    {
        "key": "آئین نامه",
        "doc_count": 3
    },
    {
        "key": "رزومه",
        "doc_count": 3
    },
    {
        "key": "قرارداد",
        "doc_count": 3
    },
    {
        "key": "مصوبه",
        "doc_count": 3
    },
    {
        "key": "نمونه سوال",
        "doc_count": 3
    },
    {
        "key": "همایش",
        "doc_count": 3
    },
    {
        "key": "پژوهش",
        "doc_count": 3
    },
    {
        "key": "چارت",
        "doc_count": 3
    },
    {
        "key": "کنفرانس",
        "doc_count": 3
    },
    {
        "key": "تلخیص",
        "doc_count": 2
    },
    {
        "key": "قرائت زیارت",
        "doc_count": 2
    },
    {
        "key": "لیست",
        "doc_count": 2
    },{
        "key": "متن جلسه",
        "doc_count": 2
    },
    {
        "key": "مستند تلوزیونی",
        "doc_count": 2
    },
    {
        "key": "نشست علمی",
        "doc_count": 2
    },
    {
        "key": "کتاب داخلی",
        "doc_count": 2
    },
    {
        "key": "گزارش جلسه",
        "doc_count": 2
    },
    {
        "key": "برنامه",
        "doc_count": 1
    },
    {
        "key": "بروشور",
        "doc_count": 1
    },
    {
        "key": "بزرگداشت",
        "doc_count": 1
    },
    {
        "key": "جزو",
        "doc_count": 1
    },
    {
        "key": "خطبه عقد(دائم)",
        "doc_count": 1
    },
    {
        "key": "روضه",
        "doc_count": 1
    },
    {
        "key": "زندگی نامه",
        "doc_count": 1
    },
    {
        "key": "زیارتنامه",
        "doc_count": 1
    },
    {
        "key": "سائر",
        "doc_count": 1
    },
    {
        "key": "سالگرد",
        "doc_count": 1
    },
    {
        "key": "سایر",
        "doc_count": 1
    },
    {
        "key": "طرج",
        "doc_count": 1
    },
    {
        "key": "عقد",
        "doc_count": 1
    },
    {
        "key": "ماتریس",
        "doc_count": 1
    },
    {
        "key": "مدل",
        "doc_count": 1
    },
    {
        "key": "مراحل دستیابی و به کارگیری  الگوی پیشرفت اسلامی ـ ",
        "doc_count": 1
    },
    {
        "key": "مقالات",
        "doc_count": 1
    },
    {
        "key": "مقاله و ارائه",
        "doc_count": 1
    },
    {
        "key": "نماه",
        "doc_count": 1
    },
    {
        "key": "نمایه",
        "doc_count": 1
    },
    {
        "key": "هرم",
        "doc_count": 1
    },
    {
        "key": "پرسشنامه",
        "doc_count": 1
    },
    {
        "key": "پروژه",
        "doc_count": 1
    },
    {
        "key": "پیش نشست",
        "doc_count": 1
    },
    {
        "key": "کاربرگ",
        "doc_count": 1
    },
    {
        "key": "کتاب سایت",
        "doc_count": 1
    },
    {
        "key": "کمیسیون خبرگان",
        "doc_count": 1
    }
    ]
--- a/monir/es_helper.py
+++ b/monir/es_helper.py
--- a/monir/llm_helper.py
+++ b/monir/llm_helper.py
@ -0,0 +1,368 @@
 from typing import List
 from pathlib import Path
 import os, orjson, time, json, re, asyncio, traceback
 from openai import AsyncOpenAI
 # --------------------------------------------------------------------
 # ------------------------------ پردازش API ------------------------------
 class AsyncCore:
    def __init__(
        self,
        model_name,
        task_name,
        data_path,
        output_schema,
        api_url,
        reasoning_effort='low',
        top_p=1,
        temperature=0.0,
        max_token=128000,
        output_path=None,
        ai_code_version=None,
        request_timeout=30,  # ثانیه
        api_key="EMPTY",
        save_number=2,
    ):
        self.save_number = save_number
        # json file of data
        self.data_path = data_path
        self.task_name = task_name
        if output_path is None:
            output_path = f"./{task_name}"
        self.output_path = Path(output_path)
        self._temp_path = self.output_path / "batch_data"
        self._temp_processed_id_path = self._temp_path / "processed_id.json"
        # Create output directory and subdirectories if they don't exist
        self.output_path.mkdir(parents=True, exist_ok=True)
        self._temp_path.mkdir(parents=True, exist_ok=True)
        # self._temp_processed_id_path.mkdir(parents=True, exist_ok=True)
        self.request_timeout = request_timeout
        self.model_name = model_name
        self.api_key = api_key
        self.output_schema = output_schema
        self.api_url = api_url
        self.reasoning_effort = reasoning_effort
        self.top_p = top_p
        self.temperature = temperature
        self.max_token = max_token
        if ai_code_version is None:
            ai_code_version = f"{model_name}_{reasoning_effort}"
        self.ai_code_version = ai_code_version
        self.PRIMARY_KEY = {"system_prompt", "user_prompt", "id"}
        try:
            self.data = self.__data_process()
            print(f"📦 Loaded {len(self.data)} words")
        except Exception as e:
            raise ValueError(
                f"Data loading/validation failed: {e}\n{traceback.format_exc()}"
            )
    def __validate_item(self, item, idx):
        # Mandatory fields
        for key in self.PRIMARY_KEY:
            if key not in item:
                raise ValueError(f"Missing mandatory key '{key}' in item #{idx}")
            if not isinstance(item[key], str):
                raise TypeError(
                    f"Item #{idx}: '{key}' must be a string, got {type(item[key]).__name__}"
                )
        # Optional field: assistant_prompt
        if "assistant_prompt" not in item or item["assistant_prompt"] is None:
            item["assistant_prompt"] = None
        else:
            if not isinstance(item["assistant_prompt"], str):
                raise TypeError(
                    f"Item #{idx}: 'assistant_prompt' must be a string or absent, got {type(item['assistant_prompt']).__name__}"
                )
        return item  # now normalized
    def __data_process(self):
        raw_data = self.__load_orjson(self.data_path)
        if not isinstance(raw_data, list):
            raise ValueError("Data must be a list of dictionaries.")
        processed_data = []
        for idx, item in enumerate(raw_data):
            if not isinstance(item, dict):
                raise ValueError(f"Item #{idx} is not a dictionary.")
            validated_item = self.__validate_item(item, idx)
            processed_data.append(validated_item)
        return processed_data
    def __get_max_number_file(self, directory):
        # Pattern to match filenames like out_1.json, out_25.json, etc.
        pattern = re.compile(r"output_(\d+)\.json$")
        max_num = 0
        for filename in os.listdir(directory):
            match = pattern.match(filename)
            if match:
                num = int(match.group(1))
                if num > max_num:
                    max_num = num
        return max_num + 1
    def __load_orjson(self, path: str | Path):
        path = Path(path)
        with path.open("rb") as f:  # باید باینری باز بشه برای orjson
            return orjson.loads(f.read())
    def __save_orjson(self, path, data):
        with open(path, "wb") as f:
            f.write(
                orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
            )
    def merge_json_dir(self, input_path, output_path):
        directory = Path(input_path)
        if not directory.is_dir():
            raise ValueError(f"Not valid PATH: {input_path}")
        seen_ids = set()  # برای ردیابی idهای دیده‌شده (سریع!)
        unique_data = []  # فقط داده‌های یکتا
        failed_files = []
        json_files = list(directory.glob("*.json"))
        if not json_files:
            print("⚠️ NO JSON File Found In This PATH")
            return
        for json_file in json_files:
            try:
                data = self.__load_orjson(json_file)
                if not data:  # خالی یا None
                    failed_files.append(json_file.name)
                    continue
                if isinstance(data, list) and isinstance(data[0], dict):
                    for item in data:
                        item_id = item.get("id")
                        if item_id is None:
                            # اگر id نداشت، می‌تونی تصمیم بگیری: نگه داری یا ردش کنی
                            # اینجا فرض می‌کنیم فقط مواردی با id معتبر مهم هستند
                            continue
                        if item_id not in seen_ids:
                            seen_ids.add(item_id)
                            unique_data.append(item)
                else:
                    raise ValueError(f"no list available in this json -> {json_file}")
            except (
                json.JSONDecodeError,
                ValueError,
                OSError,
                KeyError,
                TypeError,
            ) as e:
                # print(f"❌ Failed in process '{json_file.name}': {e}")
                failed_files.append(json_file.name)
        # گزارش خطاها
        if failed_files:
            print("\n❌ We lose this file:")
            for name in failed_files:
                print(f"  - {name}")
        else:
            print("\n✅ All JSON added")
        # ذخیره خروجی
        try:
            self.__save_orjson(data=unique_data, path=output_path)
            print(
                f"\n💾 Final file saved: {output_path} (Total unique items: {len(unique_data)})"
            )
        except Exception as e:
            print(f"❌ Error in saving final file: {e}")
    def make_new_proccessed_ids_from_file(self, json_in, out_path):
        data = self.__load_orjson(json_in)
        finall_data = []
        for d in data:
            if d["id"]:
                finall_data.append(d["id"])
        finall_data = set(finall_data)
        finall_data = list(finall_data)
        print(f"-- len ids {len(finall_data)}")
        self.__save_orjson(data=finall_data, path=out_path)
    # ------------------------------ Main ------------------------------
    async def __process_item(self, client, item):
        try:
            messages = [
                {"role": "system", "content": item["system_prompt"]},
                {"role": "user", "content": item["user_prompt"]},
            ]
            if item.get("assistant_prompt"):
                messages.append(
                    {"role": "assistant", "content": item["assistant_prompt"]}
                )
            response = await client.chat.completions.parse(
                model=self.model_name,
                messages=messages,
                temperature=self.temperature,
                top_p=self.top_p,
                reasoning_effort=self.reasoning_effort,
                max_tokens=self.max_token,
                stop=None,
                response_format=self.output_schema,
            )
            parsed = (
                response.choices[0].message.parsed
                if response and response.choices and response.choices[0].message.parsed
                else {"raw_text": str(response)}
            )
            parsed = self.output_schema.model_validate(parsed)
            parsed = dict(parsed)
            parsed["ai_code_version"] = self.ai_code_version
            parsed["id"] = item["id"]
            return parsed, 200
        except asyncio.TimeoutError:
            print(f"⏳ Timeout on item {item['id']}")
            return None, 408
        except Exception as e:
            print(f"⚠️ Error __process_item {item['id']}: {traceback.print_exc()}")
            return None, 400
    def async_eval(self, processed_id: List = []):
        try:
            asyncio.run(self.__async_eval(processed_id))
        except KeyboardInterrupt:
            print("\n🛑 Interrupted by user.")
            traceback.print_exc()
    async def __async_eval(self, processed_id: List):
        """
        اجرای اصلی تک‌هسته‌ای و async برای تولید خروجی نهایی.
        """
        print("🔹 Starting async data processing...")
        # ------------------ مرحله ۱: بازیابی شناسه‌های قبلاً پردازش‌شده ------------------
        if not processed_id:
            try:
                processed_id = self.__load_orjson(self._temp_processed_id_path)
                print(
                    f"📂 Loaded existing processed_id from {self._temp_processed_id_path}"
                )
            except Exception:
                print("⚠️ No valid processed_id found. Starting fresh.")
                processed_id = []
        # ------------------ مرحله ۲: آماده‌سازی داده‌ها ------------------
        all_processed_id = set(processed_id)
        all_results = []
        total_time = []
        data = [item for item in self.data if item.get("id") not in all_processed_id]
        print(
            f"➕ Total items: {len(self.data)} - {len(all_processed_id)} = {len(data)}"
        )
        # اگر چیزی برای پردازش نیست
        if not data:
            print("✅ Nothing new to process. All items are already done.")
            return
        # ------------------ مرحله ۳: شروع پردازش ------------------
        print(f"🤖 Model: {self.model_name} | Reasoning: {self.reasoning_effort}")
        async with AsyncOpenAI(base_url=self.api_url, api_key=self.api_key) as client:
            semaphore = asyncio.Semaphore(5)
            async def limited_process(item):
                async with semaphore:
                    return await self.__process_item(client, item)
            tasks = [asyncio.create_task(limited_process(item)) for item in data]
            total_i = 0
            # ✅ پردازش به ترتیب تکمیل (نه ترتیب لیست)
            for i, task in enumerate(asyncio.as_completed(tasks), start=1):
                start = time.time()
                try:
                    parsed, status_code = await asyncio.wait_for(
                        task, timeout=self.request_timeout
                    )  # ⏱ حداکثر 2 دقیقه
                except asyncio.TimeoutError:
                    print(f"⏳ Task {i} timed out completely")
                    parsed, status_code = None, 408
                total_time.append(time.time() - start)
                if status_code == 200:
                    all_results.append(parsed)
                    all_processed_id.add(parsed.get("id"))
                else:
                    print(f"⚠️ Skipped item {parsed.get('id')} (status={status_code})")
                total_i += 1
                # ✅ ذخیره‌ی موقت هر n مورد
                if total_i >= self.save_number:
                    print(f"total_i {total_i}")
                    print(f"self.save_number {self.save_number}")
                    total_i = 0
                    self.__save_orjson(
                        data=list(all_processed_id),
                        path=self._temp_processed_id_path,
                    )
                    print(f"💾 Auto-saved processed ids: {len(all_processed_id)}")
                    number = self.__get_max_number_file(self._temp_path)
                    print(f"number {number}")
                    temp_output_path = self._temp_path / f"output_{number}.json"
                    self.__save_orjson(data=list(all_results), path=temp_output_path)
                    print(f"💾 Auto-saved partial data: {len(all_results)}")
                    all_results.clear()
            # ✅ بعد از پایان تمام تسک‌ها، ذخیره نهایی برای داده‌های باقیمانده
            if total_i > 0 or len(all_results) > 0:
                print("💾 Final save of remaining data...")
                self.__save_orjson(
                    data=list(all_processed_id),
                    path=self._temp_processed_id_path,
                )
                print(f"💾 Auto-saved processed ids: {len(all_processed_id)}")
                number = self.__get_max_number_file(self._temp_path)
                print(f"number {number}")
                temp_output_path = self._temp_path / f"output_{number}.json"
                self.__save_orjson(data=list(all_results), path=temp_output_path)
                print(f"💾 Auto-saved partial data: {len(all_results)}")
                all_results.clear()
        # ------------------ مرحله ۴: ذخیره خروجی ------------------
        final_data_path = self.output_path / f"final_data_{self.task_name}.json"
        processed_id_path = self.output_path / "processed_id.json"
        self.merge_json_dir(input_path=self._temp_path, output_path=final_data_path)
        all_results = self.__load_orjson(final_data_path)
        # make_new_proccessed_ids_from_file()
        self.__save_orjson(data=list(all_processed_id), path=processed_id_path)
        self.__save_orjson(data=all_results, path=final_data_path)
        avg_time = (sum(total_time) / len(total_time)) if total_time else 0
        print(
            f"\n✅ Processing completed!\n"
            f"📊 Total-Data: {len(data)} | "
            f"⭕ Ignored-Data: {len(processed_id)} | "
            f"📦 Proccessed-Data: {len(all_results)} | "
            f"❌ Loss-Data: {len(data)-len(all_results)} | "
            f"🕒 Avg Time: {avg_time:.2f}'s per item | "
            f"🕒 Total Time: {sum(total_time):.4f}'s | "
            f"💾 Results saved to: {final_data_path}"
        )
--- a/monir/main.py
+++ b/monir/main.py
@ -0,0 +1,88 @@
 from dotenv  import load_dotenv
 import os 
 from llm_helper import AsyncCore
 from es_helper import ElasticHelper
 from base_model import MnMeet
 import time, traceback, uuid, orjson, re
 from datetime import datetime, timezone
 from elasticsearch.helpers import scan
 from typing import Union
 from pathlib import Path
 from collections import defaultdict
 from typing import List
 load_dotenv()
 ES_URL = os.getenv("ES_URL")
 ES_USER_NAME = os.getenv("ES_USER_NAME")
 ES_PASSWORD = os.getenv("ES_PASSWORD")
 LLM_URL = os.getenv("LLM_URL")
 def save_orjson(path, data):
    with open(path, "wb") as f:
        f.write(
            orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
        )
 def load_orjson(path: str | Path):
    path = Path(path)
    with path.open("rb") as f:  # باید باینری باز بشه برای orjson
        return orjson.loads(f.read())
 # --------------------------- flow
 term_index_name = "mn_term"
 meet_index_name = "mn_meet"
 ment_index_name = "mn_meet_entity"
 sections_index_name = ""
 dash = "-" * 25
 es_helper = ElasticHelper(
    es_url=ES_URL,
    es_user=ES_USER_NAME,
    es_pass=ES_PASSWORD,
 )
 ############ DELETE INDEXES
 # es_helper.deleteIndex(index_name=term_index_name)
 # es_helper.deleteIndex(index_name=meet_index_name)
 # es_helper.deleteIndex(index_name=ment_index_name)
 ############ CREATE INDEXES
 # es_helper.createIndexIfNotExist(index_name_o=term_index_name)
 # es_helper.createIndexIfNotExist(index_name_o=meet_index_name)
 # es_helper.createIndexIfNotExist(index_name_o=ment_index_name)
 es = es_helper.es
 # fields = list(MnMeet.model_fields.keys())
 fields = [
    "id",
    "sanad_id",
    "main_type",
    "title",
    "author",
    "content",
 ]
 # old_data = es_helper.search(
 #     index=old_index_name, _source=fields, query={"match_all": {}}, size=3
 # )
 # old_data = old_data["hits"]["hits"]  # don't use in scan
 ################### for all data
 old_data = list(
    scan(
        es,
        index=meet_index_name,
        query={
            "_source": fields,
            "query":
            {   "term":
                {
                    "main_type": "جلسه علمی"}},
            # {"match_all": {}},
        },
    )
 )
 print(f'--- old_data {len(old_data)}')
 save_orjson(
    data=old_data,
    path='./data_content_1.json'
 )
--- a/monir/requirements.txt
+++ b/monir/requirements.txt
@ -0,0 +1,4 @@
 python-dotenv
 openai
 elasticsearch==8.13.0
 orjson