first step

2025-11-27 20:31:12 +00:00 · 2025-11-27 20:31:12 +00:00 · 1472bf0e9f
commit 1472bf0e9f
11 changed files with 2286 additions and 0 deletions
--- a/monir/.env
+++ b/monir/.env
@ -0,0 +1,4 @@
+ES_URL = 'http://192.168.23.60/9200'
+ES_USER_NAME = 'elastic'
+ES_PASSWORD = '1234'
+LLM_URL = 'http://2.188.15.102:8001/v1/'
--- a/monir/init.py
+++ b/monir/init.py
--- a/monir/pycache/base_model.cpython-310.pyc
+++ b/monir/pycache/base_model.cpython-310.pyc
--- a/monir/pycache/es_helper.cpython-310.pyc
+++ b/monir/pycache/es_helper.cpython-310.pyc
--- a/monir/pycache/llm_helper.cpython-310.pyc
+++ b/monir/pycache/llm_helper.cpython-310.pyc
--- a/monir/base_model.py
+++ b/monir/base_model.py
@ -0,0 +1,339 @@
+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Union, Any
+
+
+# فیلد اجباری:
+# str = Field(..., description=)
+# فیلد اختیاری:
+# Optional[str] = Field("", description=)
+
+
+class Properties(BaseModel):
+    """
+    Standard Form Of Foreign Key To Other Index
+    """
+
+    id: str = Field(..., description="شناسه یکتا")
+    title: Optional[str] = Field("", description="عنوان")
+    author: Optional[str] = Field("", description="سازنده")
+    sub_type: Optional[str] = Field("", description="نوع دوم")
+
+
+class TreeInfo(BaseModel):
+    title: str
+    parent_id: str = Field(..., description="")
+    child_order: int
+    level: int
+    full_path: Optional[str] = Field("", description="")
+    path_headings: str
+
+
+class NlpParses(BaseModel):
+    main_type: str
+    nlp_type: str
+    begin: int
+    end: int
+    text: str
+    referes: str
+    dependency_infos: str  # nested
+
+
+class Embeddings(BaseModel):
+    type: Optional[str] = (Field("", description=""),)
+    dims: Optional[int] = (Field(1024, description=""),)
+    index: Optional[bool] = (Field(True, description=""),)
+    similarity: Optional[str] = Field("", description="")
+
+
+class FileLinks(BaseModel):
+    title: str
+    link: str
+    type: str
+    description: str
+
+
+class UserLogs(BaseModel):
+    id: str
+    user_id: int
+    username: str
+    time_edit: int
+    property: str
+
+
+class MnSection(BaseModel):
+    id: str
+    old_id: str
+    main_type: str
+    html: str
+    content: str
+    meet_info: Properties
+    term_info: Properties
+    tree_info: TreeInfo
+    content_len: int
+    word_len: int
+    tags: str
+    keywords: str
+    sort_date_timestamp: int
+    nlp_parses: NlpParses
+    embeddings: Embeddings
+    file_links: FileLinks
+    time_edit: int
+    user_edit: int
+    user_logs: UserLogs
+
+
+class MnTerm(BaseModel):
+    id: str
+    author: str
+    sub_type: Optional[str] = ""
+    title: Optional[str] = ""
+    begin_date: Optional[int] = ""
+    end_date: Optional[int] = ""
+    begin_year: Optional[int] = ""
+    main_type: Optional[str] = ""
+    content: Optional[str] = ""
+    tags: Optional[str] = ""
+    keywords: Optional[str] = ""
+
+
+class MnMeetEntity(BaseModel):
+    id: str
+    main_type: str
+    sub_type: str
+    title: str
+    content: str
+    permit_tags: str
+    search_state: str
+    user_create: str
+    time_create: int
+    time_edit: int
+    file_links: FileLinks
+    meet_info: Properties
+    term_info: Properties
+
+
+class Subjects(BaseModel):
+    id: int
+    title: str
+
+
+class ReportInfo(BaseModel):
+    films: int
+    sounds: int
+    photos: int
+
+
+class MnMeet(BaseModel):
+    """
+    Monir Meet Standard fields foramt
+    """
+
+    id: str
+    sanad_id: str
+    main_type: str
+    sub_type: Optional[str] = ""
+    person_code: Optional[str] = ""
+    research_code: str
+    meet_code: str
+    old_meet_id: int
+    title: str
+    meet_no: int
+    author: str
+    term_info: Optional[Properties] = Field("", description="کلید اتصال به جدول ترم")
+    subtitle: str
+    subjects: Optional[List[Subjects]] = []
+    allwords: str
+    tags: Optional[List[str]] = []
+    keywords: str
+    verb: str
+    sanad_year: Optional[int] = ""
+    sanad_date: Optional[int] = ""
+    amplify: str
+    ralation: str
+    city: str
+    place: str
+    address: str
+    audience: str
+    attendees: str
+    report_info: ReportInfo
+    mindex: Optional[str] = Field("", description="mindex index فهرست")
+    mintro: Optional[str] = Field("", description="mintro خلاصه ")
+    content: str
+    completion: Optional[str] = Field("", description="type: completion")
+    sort_date_timestamp: int
+    permit_tags: str
+    resource_info: str
+
+
+class MnSanadLink(BaseModel):
+    text: Optional[str] = ""
+    link: str
+    in_search: bool
+    title: str
+
+
+    
+class Properties(BaseModel):
+    """
+    Standard Form Of Foreign Key To Other Index
+    """
+
+    id: str = Field(..., description="شناسه یکتا")
+    title: Optional[str] = Field("", description="عنوان")
+    author: Optional[str] = Field("", description="سازنده")
+    sub_type: Optional[str] = Field("", description="نوع دوم")
+
+class TreeProperties(BaseModel):
+	parent_id :str
+	child_order: int
+	level: int
+	full_path: str
+	title: str
+	path_headings: str	
+
+class NlpParses(BaseModel):
+	main_type: str
+	nlp_type: str
+	begin: int
+	end: int
+	text: str
+	referes: str
+	dependency_infos: Dict
+
+class UserLogs(BaseModel):
+	id:str
+	user_id:int 
+	username:str
+	time_edit:int
+	property:str
+
+class MNSection(BaseModel):
+    main_type : str
+    id : str
+    html : str
+    content : str
+    meet_info: Properties
+    term_info: Properties
+    tree_info: TreeProperties
+    content_len: int
+    word_len: int
+    tags: str
+    keywords: str
+    sort_date_timestamp: int
+    nlp_parses: NlpParses
+    embeddings: Embeddings
+    file_links:FileLinks
+    time_edit: int
+    user_edit: int
+    user_logs: UserLogs
+            
+
+class MnSanadVersionInfo(BaseModel):
+    timestamp: int
+    number: int
+    title: str
+
+
+class SoundLinks(BaseModel):
+    link: str
+    title: str
+
+
+class MnSanad(BaseModel):
+    id: Optional[str] = ""
+    sort_date_timestamp: Optional[int] = ""
+    title: Optional[str] = ""
+    subtitle: Optional[str] = ""
+    research_code: Optional[int] = ""
+    content: Optional[str] = ""
+    version_info: Optional[MnSanadVersionInfo] = ""
+    meet_lid: Optional[int] = ""
+    meet_id: Optional[int] = ""
+    meet_no: Optional[int] = ""
+    meet_code: Optional[int] = ""
+    allwords: Optional[str] = ""
+    keywords: Optional[str] = ""
+    person_code: Optional[str] = ""
+    subject: Optional[List] = ""
+    city: Optional[str] = ""
+    author: str
+    begin_year: Optional[int] = ""
+    begin_date: Optional[int] = ""
+    end_date: Optional[int] = ""
+    branch: Optional[str] = ""
+    ralation: Optional[str] = ""
+    research_id: Optional[int] = ""
+    mintro: Optional[str] = ""
+    mindex: Optional[str] = ""
+    RowNum: Optional[int] = ""
+    resource_info: Optional[str] = ""
+    in_tadvin: Optional[bool] = ""
+    format: Optional[str] = ""
+    verb: Optional[str] = ""
+    address: Optional[str] = ""
+    attendees: Optional[str] = ""
+    amplify: Optional[str] = ""
+    audience: Optional[str] = ""
+    place: Optional[str] = ""
+    permit_tags: Optional[str] = ""
+    photos: Optional[int] = ""
+    tags: Optional[List[str]] = ""
+    films: Optional[int] = ""
+    sounds: Optional[int] = ""
+    file_links: Optional[List[MnSanadLink]] = ""
+    sound_links: Optional[List[SoundLinks]] = ""
+    video_links: Optional[List[SoundLinks]] = ""
+    photo_links: Optional[List[SoundLinks]] = ""
+
+    model_config = {"exclude_" "": True}
+
+
+class MnSanad2Meet(BaseModel):
+    id: str
+    sort_date_timestamp: Optional[int] = ""
+    title: str
+    subtitle: str
+    research_code: str
+    format: str
+    content: str
+    # version_info: Optional[List|dict] = ""
+    meet_lid: Optional[str] = ""
+    meet_id: Optional[int] = ""
+    meet_no: Optional[int] = ""
+    meet_code: Optional[str] = ""
+    allwords: Optional[str] = ""
+    keywords: Optional[str] = ""
+    person_code: Optional[str] = ""
+    subject: Optional[List] = []
+    city: Optional[str] = ""
+    author: str
+    begin_year: Optional[int] = ""
+    begin_date: Optional[str] = ""
+    end_date: Optional[str] = ""
+    branch: str
+    ralation: Optional[str] = ""
+    research_id: int
+    mintro: Optional[str] = ""
+    mindex: Optional[str] = ""
+    # RowNum: Optional[int]= ""
+    resource_info: Optional[str] = ""
+    # in_tadvin: Optional[bool] = ""
+    verb: Optional[str] = ""
+    address: Optional[str] = ""
+    attendees: Optional[str] = ""
+    amplify: Optional[str] = ""
+    audience: Optional[str] = ""
+    place: Optional[str] = ""
+    permit_tags: Optional[str] = ""
+    photos: Optional[int] = 0
+    tags: Optional[List[str]] = []
+    films: Optional[int] = 0
+    sounds: Optional[int] = 0
+    file_links: Optional[List[MnSanadLink]] = ""
+    sound_links: Optional[List[SoundLinks]] = ""
+    video_links: Optional[List[SoundLinks]] = ""
+    photo_links: Optional[List[SoundLinks]] = ""
+
+    # model_config = {
+    #     "exclude_""": True
+    # }
--- a/monir/doc_type.py
+++ b/monir/doc_type.py
@ -0,0 +1,349 @@
+type_count = [
+    {
+        "key": "جلسه علمی",
+        "doc_count": 7332
+    },
+    {
+        "key": "منبر",
+        "doc_count": 3970
+    },
+    {
+        "key": "درس خارج",
+        "doc_count": 2450
+    },
+    {
+        "key": "تدریس",
+        "doc_count": 1401
+    },
+    {
+        "key": "سخنرانی",
+        "doc_count": 1221
+    },
+    {
+        "key": "-",
+        "doc_count": 992
+    },
+    {
+        "key": "مشاوره",
+        "doc_count": 858
+    },
+    {
+        "key": "مدیریت",
+        "doc_count": 652
+    },
+    {
+        "key": "مصاحبه",
+        "doc_count": 628
+    },
+    {
+        "key": "مباحثه و گفتگو",
+        "doc_count": 587
+    },
+    {
+        "key": "جزوه",
+        "doc_count": 583
+    },
+    {
+        "key": "مناظره و گفتگو",
+        "doc_count": 496
+    },
+    {
+        "key": "گزارش",
+        "doc_count": 395
+    },
+    {
+        "key": "--",
+        "doc_count": 250
+    },
+    {
+        "key": "جدول",
+        "doc_count": 208
+    },
+    {
+        "key": "کتاب",
+        "doc_count": 173
+    },
+    {
+        "key": "صورت جلسه",
+        "doc_count": 126
+    },
+    {
+        "key": "فهرست",
+        "doc_count": 124
+    },
+    {
+        "key": "ارائه",
+        "doc_count": 90
+    },
+    {
+        "key": "بازدید (دیدار)",
+        "doc_count": 83
+    },
+    {
+        "key": "مقاله",
+        "doc_count": 81
+    },
+    {
+        "key": "طرح",
+        "doc_count": 74
+    },
+    {
+        "key": "کلیپ",
+        "doc_count": 68
+    },
+    {
+        "key": "گزارش رصد",
+        "doc_count": 64
+    },
+    {
+        "key": "نامه",
+        "doc_count": 48
+    },
+    {
+        "key": "نمودار",
+        "doc_count": 45
+    },
+    {
+        "key": "آیین نامه",
+        "doc_count": 36
+    },
+    {
+        "key": "تقریر",
+        "doc_count": 35
+    },
+    {
+        "key": "خلاصه",
+        "doc_count": 25
+    },
+    {
+        "key": "پیش نویس",
+        "doc_count": 25
+    },
+    {
+        "key": "نقد",
+        "doc_count": 23
+    },
+    {
+        "key": "چکیده",
+        "doc_count": 16
+    },
+    {
+        "key": "یادبود",
+        "doc_count": 13
+    },
+    {
+        "key": "فرم",
+        "doc_count": 12
+    },
+    {
+        "key": "فرم فیش",
+        "doc_count": 11
+    },
+    {
+        "key": "مناجات",
+        "doc_count": 10
+    },
+    {
+        "key": "دعا و مناجات",
+        "doc_count": 8
+    },
+    {
+        "key": "فیش",
+        "doc_count": 6
+    },
+    {
+        "key": "پرسش و پاسخ",
+        "doc_count": 6
+    },
+    {
+        "key": "بیانیه",
+        "doc_count": 5
+    },
+    {
+        "key": "اجلاسیه",
+        "doc_count": 4
+    },
+    {
+        "key": "پایان نامه",
+        "doc_count": 4
+    },
+    {
+        "key": "",
+        "doc_count": 3
+    },
+    {
+        "key": "آئین نامه",
+        "doc_count": 3
+    },
+    {
+        "key": "رزومه",
+        "doc_count": 3
+    },
+    {
+        "key": "قرارداد",
+        "doc_count": 3
+    },
+    {
+        "key": "مصوبه",
+        "doc_count": 3
+    },
+    {
+        "key": "نمونه سوال",
+        "doc_count": 3
+    },
+    {
+        "key": "همایش",
+        "doc_count": 3
+    },
+    {
+        "key": "پژوهش",
+        "doc_count": 3
+    },
+    {
+        "key": "چارت",
+        "doc_count": 3
+    },
+    {
+        "key": "کنفرانس",
+        "doc_count": 3
+    },
+    {
+        "key": "تلخیص",
+        "doc_count": 2
+    },
+    {
+        "key": "قرائت زیارت",
+        "doc_count": 2
+    },
+    {
+        "key": "لیست",
+        "doc_count": 2
+    },{
+        "key": "متن جلسه",
+        "doc_count": 2
+    },
+    {
+        "key": "مستند تلوزیونی",
+        "doc_count": 2
+    },
+    {
+        "key": "نشست علمی",
+        "doc_count": 2
+    },
+    {
+        "key": "کتاب داخلی",
+        "doc_count": 2
+    },
+    {
+        "key": "گزارش جلسه",
+        "doc_count": 2
+    },
+    {
+        "key": "برنامه",
+        "doc_count": 1
+    },
+    {
+        "key": "بروشور",
+        "doc_count": 1
+    },
+    {
+        "key": "بزرگداشت",
+        "doc_count": 1
+    },
+    {
+        "key": "جزو",
+        "doc_count": 1
+    },
+    {
+        "key": "خطبه عقد(دائم)",
+        "doc_count": 1
+    },
+    {
+        "key": "روضه",
+        "doc_count": 1
+    },
+    {
+        "key": "زندگی نامه",
+        "doc_count": 1
+    },
+    {
+        "key": "زیارتنامه",
+        "doc_count": 1
+    },
+    {
+        "key": "سائر",
+        "doc_count": 1
+    },
+    {
+        "key": "سالگرد",
+        "doc_count": 1
+    },
+    {
+        "key": "سایر",
+        "doc_count": 1
+    },
+    {
+        "key": "طرج",
+        "doc_count": 1
+    },
+    {
+        "key": "عقد",
+        "doc_count": 1
+    },
+    {
+        "key": "ماتریس",
+        "doc_count": 1
+    },
+    {
+        "key": "مدل",
+        "doc_count": 1
+    },
+    {
+        "key": "مراحل دستیابی و به کارگیری  الگوی پیشرفت اسلامی ـ ",
+        "doc_count": 1
+    },
+    {
+        "key": "مقالات",
+        "doc_count": 1
+    },
+    {
+        "key": "مقاله و ارائه",
+        "doc_count": 1
+    },
+    {
+        "key": "نماه",
+        "doc_count": 1
+    },
+    {
+        "key": "نمایه",
+        "doc_count": 1
+    },
+    {
+        "key": "هرم",
+        "doc_count": 1
+    },
+    {
+        "key": "پرسشنامه",
+        "doc_count": 1
+    },
+    {
+        "key": "پروژه",
+        "doc_count": 1
+    },
+    {
+        "key": "پیش نشست",
+        "doc_count": 1
+    },
+    {
+        "key": "کاربرگ",
+        "doc_count": 1
+    },
+    {
+        "key": "کتاب سایت",
+        "doc_count": 1
+    },
+    {
+        "key": "کمیسیون خبرگان",
+        "doc_count": 1
+    }
+    ]
--- a/monir/es_helper.py
+++ b/monir/es_helper.py
--- a/monir/llm_helper.py
+++ b/monir/llm_helper.py
@ -0,0 +1,368 @@
+from typing import List
+from pathlib import Path
+import os, orjson, time, json, re, asyncio, traceback
+from openai import AsyncOpenAI
+
+# --------------------------------------------------------------------
+
+
+# ------------------------------ پردازش API ------------------------------
+class AsyncCore:
+    def __init__(
+        self,
+        model_name,
+        task_name,
+        data_path,
+        output_schema,
+        api_url,
+        reasoning_effort='low',
+        top_p=1,
+        temperature=0.0,
+        max_token=128000,
+        output_path=None,
+        ai_code_version=None,
+        request_timeout=30,  # ثانیه
+        api_key="EMPTY",
+        save_number=2,
+    ):
+
+        self.save_number = save_number
+        # json file of data
+        self.data_path = data_path
+
+        self.task_name = task_name
+        if output_path is None:
+            output_path = f"./{task_name}"
+
+        self.output_path = Path(output_path)
+        self._temp_path = self.output_path / "batch_data"
+        self._temp_processed_id_path = self._temp_path / "processed_id.json"
+
+        # Create output directory and subdirectories if they don't exist
+        self.output_path.mkdir(parents=True, exist_ok=True)
+        self._temp_path.mkdir(parents=True, exist_ok=True)
+        # self._temp_processed_id_path.mkdir(parents=True, exist_ok=True)
+
+        self.request_timeout = request_timeout
+        self.model_name = model_name
+        self.api_key = api_key
+        self.output_schema = output_schema
+        self.api_url = api_url
+        self.reasoning_effort = reasoning_effort
+        self.top_p = top_p
+        self.temperature = temperature
+        self.max_token = max_token
+
+        if ai_code_version is None:
+            ai_code_version = f"{model_name}_{reasoning_effort}"
+        self.ai_code_version = ai_code_version
+
+        self.PRIMARY_KEY = {"system_prompt", "user_prompt", "id"}
+
+        try:
+            self.data = self.__data_process()
+            print(f"📦 Loaded {len(self.data)} words")
+        except Exception as e:
+            raise ValueError(
+                f"Data loading/validation failed: {e}\n{traceback.format_exc()}"
+            )
+
+    def __validate_item(self, item, idx):
+        # Mandatory fields
+        for key in self.PRIMARY_KEY:
+            if key not in item:
+                raise ValueError(f"Missing mandatory key '{key}' in item #{idx}")
+            if not isinstance(item[key], str):
+                raise TypeError(
+                    f"Item #{idx}: '{key}' must be a string, got {type(item[key]).__name__}"
+                )
+
+        # Optional field: assistant_prompt
+        if "assistant_prompt" not in item or item["assistant_prompt"] is None:
+            item["assistant_prompt"] = None
+        else:
+            if not isinstance(item["assistant_prompt"], str):
+                raise TypeError(
+                    f"Item #{idx}: 'assistant_prompt' must be a string or absent, got {type(item['assistant_prompt']).__name__}"
+                )
+
+        return item  # now normalized
+
+    def __data_process(self):
+        raw_data = self.__load_orjson(self.data_path)
+        if not isinstance(raw_data, list):
+            raise ValueError("Data must be a list of dictionaries.")
+
+        processed_data = []
+        for idx, item in enumerate(raw_data):
+            if not isinstance(item, dict):
+                raise ValueError(f"Item #{idx} is not a dictionary.")
+            validated_item = self.__validate_item(item, idx)
+            processed_data.append(validated_item)
+
+        return processed_data
+
+    def __get_max_number_file(self, directory):
+        # Pattern to match filenames like out_1.json, out_25.json, etc.
+        pattern = re.compile(r"output_(\d+)\.json$")
+        max_num = 0
+
+        for filename in os.listdir(directory):
+            match = pattern.match(filename)
+            if match:
+                num = int(match.group(1))
+                if num > max_num:
+                    max_num = num
+        return max_num + 1
+
+    def __load_orjson(self, path: str | Path):
+        path = Path(path)
+        with path.open("rb") as f:  # باید باینری باز بشه برای orjson
+            return orjson.loads(f.read())
+
+    def __save_orjson(self, path, data):
+        with open(path, "wb") as f:
+            f.write(
+                orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
+            )
+
+    def merge_json_dir(self, input_path, output_path):
+        directory = Path(input_path)
+        if not directory.is_dir():
+            raise ValueError(f"Not valid PATH: {input_path}")
+
+        seen_ids = set()  # برای ردیابی idهای دیده‌شده (سریع!)
+        unique_data = []  # فقط داده‌های یکتا
+        failed_files = []
+
+        json_files = list(directory.glob("*.json"))
+        if not json_files:
+            print("⚠️ NO JSON File Found In This PATH")
+            return
+
+        for json_file in json_files:
+            try:
+                data = self.__load_orjson(json_file)
+                if not data:  # خالی یا None
+                    failed_files.append(json_file.name)
+                    continue
+
+                if isinstance(data, list) and isinstance(data[0], dict):
+                    for item in data:
+                        item_id = item.get("id")
+                        if item_id is None:
+                            # اگر id نداشت، می‌تونی تصمیم بگیری: نگه داری یا ردش کنی
+                            # اینجا فرض می‌کنیم فقط مواردی با id معتبر مهم هستند
+                            continue
+                        if item_id not in seen_ids:
+                            seen_ids.add(item_id)
+                            unique_data.append(item)
+                else:
+                    raise ValueError(f"no list available in this json -> {json_file}")
+            except (
+                json.JSONDecodeError,
+                ValueError,
+                OSError,
+                KeyError,
+                TypeError,
+            ) as e:
+                # print(f"❌ Failed in process '{json_file.name}': {e}")
+                failed_files.append(json_file.name)
+
+        # گزارش خطاها
+        if failed_files:
+            print("\n❌ We lose this file:")
+            for name in failed_files:
+                print(f"  - {name}")
+        else:
+            print("\n✅ All JSON added")
+
+        # ذخیره خروجی
+        try:
+            self.__save_orjson(data=unique_data, path=output_path)
+            print(
+                f"\n💾 Final file saved: {output_path} (Total unique items: {len(unique_data)})"
+            )
+        except Exception as e:
+            print(f"❌ Error in saving final file: {e}")
+
+    def make_new_proccessed_ids_from_file(self, json_in, out_path):
+        data = self.__load_orjson(json_in)
+
+        finall_data = []
+        for d in data:
+            if d["id"]:
+                finall_data.append(d["id"])
+        finall_data = set(finall_data)
+        finall_data = list(finall_data)
+        print(f"-- len ids {len(finall_data)}")
+
+        self.__save_orjson(data=finall_data, path=out_path)
+
+    # ------------------------------ Main ------------------------------
+    async def __process_item(self, client, item):
+        try:
+            messages = [
+                {"role": "system", "content": item["system_prompt"]},
+                {"role": "user", "content": item["user_prompt"]},
+            ]
+            if item.get("assistant_prompt"):
+                messages.append(
+                    {"role": "assistant", "content": item["assistant_prompt"]}
+                )
+
+            response = await client.chat.completions.parse(
+                model=self.model_name,
+                messages=messages,
+                temperature=self.temperature,
+                top_p=self.top_p,
+                reasoning_effort=self.reasoning_effort,
+                max_tokens=self.max_token,
+                stop=None,
+                response_format=self.output_schema,
+            )
+
+            parsed = (
+                response.choices[0].message.parsed
+                if response and response.choices and response.choices[0].message.parsed
+                else {"raw_text": str(response)}
+            )
+
+            parsed = self.output_schema.model_validate(parsed)
+            parsed = dict(parsed)
+            parsed["ai_code_version"] = self.ai_code_version
+            parsed["id"] = item["id"]
+            return parsed, 200
+
+        except asyncio.TimeoutError:
+            print(f"⏳ Timeout on item {item['id']}")
+            return None, 408
+
+        except Exception as e:
+            print(f"⚠️ Error __process_item {item['id']}: {traceback.print_exc()}")
+            return None, 400
+
+    def async_eval(self, processed_id: List = []):
+        try:
+            asyncio.run(self.__async_eval(processed_id))
+        except KeyboardInterrupt:
+            print("\n🛑 Interrupted by user.")
+            traceback.print_exc()
+
+    async def __async_eval(self, processed_id: List):
+        """
+        اجرای اصلی تک‌هسته‌ای و async برای تولید خروجی نهایی.
+        """
+        print("🔹 Starting async data processing...")
+
+        # ------------------ مرحله ۱: بازیابی شناسه‌های قبلاً پردازش‌شده ------------------
+        if not processed_id:
+            try:
+                processed_id = self.__load_orjson(self._temp_processed_id_path)
+                print(
+                    f"📂 Loaded existing processed_id from {self._temp_processed_id_path}"
+                )
+            except Exception:
+                print("⚠️ No valid processed_id found. Starting fresh.")
+                processed_id = []
+
+        # ------------------ مرحله ۲: آماده‌سازی داده‌ها ------------------
+        all_processed_id = set(processed_id)
+        all_results = []
+        total_time = []
+
+        data = [item for item in self.data if item.get("id") not in all_processed_id]
+        print(
+            f"➕ Total items: {len(self.data)} - {len(all_processed_id)} = {len(data)}"
+        )
+
+        # اگر چیزی برای پردازش نیست
+        if not data:
+            print("✅ Nothing new to process. All items are already done.")
+            return
+
+        # ------------------ مرحله ۳: شروع پردازش ------------------
+        print(f"🤖 Model: {self.model_name} | Reasoning: {self.reasoning_effort}")
+        async with AsyncOpenAI(base_url=self.api_url, api_key=self.api_key) as client:
+            semaphore = asyncio.Semaphore(5)
+
+            async def limited_process(item):
+                async with semaphore:
+                    return await self.__process_item(client, item)
+
+            tasks = [asyncio.create_task(limited_process(item)) for item in data]
+
+            total_i = 0
+            # ✅ پردازش به ترتیب تکمیل (نه ترتیب لیست)
+            for i, task in enumerate(asyncio.as_completed(tasks), start=1):
+                start = time.time()
+                try:
+                    parsed, status_code = await asyncio.wait_for(
+                        task, timeout=self.request_timeout
+                    )  # ⏱ حداکثر 2 دقیقه
+                except asyncio.TimeoutError:
+                    print(f"⏳ Task {i} timed out completely")
+                    parsed, status_code = None, 408
+                total_time.append(time.time() - start)
+
+                if status_code == 200:
+                    all_results.append(parsed)
+                    all_processed_id.add(parsed.get("id"))
+                else:
+                    print(f"⚠️ Skipped item {parsed.get('id')} (status={status_code})")
+
+                total_i += 1
+                # ✅ ذخیره‌ی موقت هر n مورد
+                if total_i >= self.save_number:
+                    print(f"total_i {total_i}")
+                    print(f"self.save_number {self.save_number}")
+                    total_i = 0
+                    self.__save_orjson(
+                        data=list(all_processed_id),
+                        path=self._temp_processed_id_path,
+                    )
+                    print(f"💾 Auto-saved processed ids: {len(all_processed_id)}")
+                    number = self.__get_max_number_file(self._temp_path)
+                    print(f"number {number}")
+                    temp_output_path = self._temp_path / f"output_{number}.json"
+                    self.__save_orjson(data=list(all_results), path=temp_output_path)
+                    print(f"💾 Auto-saved partial data: {len(all_results)}")
+                    all_results.clear()
+
+            # ✅ بعد از پایان تمام تسک‌ها، ذخیره نهایی برای داده‌های باقیمانده
+            if total_i > 0 or len(all_results) > 0:
+                print("💾 Final save of remaining data...")
+                self.__save_orjson(
+                    data=list(all_processed_id),
+                    path=self._temp_processed_id_path,
+                )
+                print(f"💾 Auto-saved processed ids: {len(all_processed_id)}")
+                number = self.__get_max_number_file(self._temp_path)
+                print(f"number {number}")
+
+                temp_output_path = self._temp_path / f"output_{number}.json"
+                self.__save_orjson(data=list(all_results), path=temp_output_path)
+                print(f"💾 Auto-saved partial data: {len(all_results)}")
+                all_results.clear()
+
+        # ------------------ مرحله ۴: ذخیره خروجی ------------------
+        final_data_path = self.output_path / f"final_data_{self.task_name}.json"
+        processed_id_path = self.output_path / "processed_id.json"
+
+        self.merge_json_dir(input_path=self._temp_path, output_path=final_data_path)
+        all_results = self.__load_orjson(final_data_path)
+        # make_new_proccessed_ids_from_file()
+        self.__save_orjson(data=list(all_processed_id), path=processed_id_path)
+        self.__save_orjson(data=all_results, path=final_data_path)
+
+        avg_time = (sum(total_time) / len(total_time)) if total_time else 0
+        print(
+            f"\n✅ Processing completed!\n"
+            f"📊 Total-Data: {len(data)} | "
+            f"⭕ Ignored-Data: {len(processed_id)} | "
+            f"📦 Proccessed-Data: {len(all_results)} | "
+            f"❌ Loss-Data: {len(data)-len(all_results)} | "
+            f"🕒 Avg Time: {avg_time:.2f}'s per item | "
+            f"🕒 Total Time: {sum(total_time):.4f}'s | "
+            f"💾 Results saved to: {final_data_path}"
+        )
--- a/monir/main.py
+++ b/monir/main.py
@ -0,0 +1,88 @@
+from dotenv  import load_dotenv
+import os 
+from llm_helper import AsyncCore
+from es_helper import ElasticHelper
+from base_model import MnMeet
+import time, traceback, uuid, orjson, re
+from datetime import datetime, timezone
+from elasticsearch.helpers import scan
+from typing import Union
+from pathlib import Path
+from collections import defaultdict
+from typing import List
+
+load_dotenv()
+ES_URL = os.getenv("ES_URL")
+ES_USER_NAME = os.getenv("ES_USER_NAME")
+ES_PASSWORD = os.getenv("ES_PASSWORD")
+LLM_URL = os.getenv("LLM_URL")
+
+def save_orjson(path, data):
+    with open(path, "wb") as f:
+        f.write(
+            orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
+        )
+
+def load_orjson(path: str | Path):
+    path = Path(path)
+    with path.open("rb") as f:  # باید باینری باز بشه برای orjson
+        return orjson.loads(f.read())
+
+# --------------------------- flow
+term_index_name = "mn_term"
+meet_index_name = "mn_meet"
+ment_index_name = "mn_meet_entity"
+sections_index_name = ""
+dash = "-" * 25
+
+es_helper = ElasticHelper(
+    es_url=ES_URL,
+    es_user=ES_USER_NAME,
+    es_pass=ES_PASSWORD,
+)
+
+############ DELETE INDEXES
+# es_helper.deleteIndex(index_name=term_index_name)
+# es_helper.deleteIndex(index_name=meet_index_name)
+# es_helper.deleteIndex(index_name=ment_index_name)
+
+############ CREATE INDEXES
+# es_helper.createIndexIfNotExist(index_name_o=term_index_name)
+# es_helper.createIndexIfNotExist(index_name_o=meet_index_name)
+# es_helper.createIndexIfNotExist(index_name_o=ment_index_name)
+
+
+es = es_helper.es
+# fields = list(MnMeet.model_fields.keys())
+fields = [
+    "id",
+    "sanad_id",
+    "main_type",
+    "title",
+    "author",
+    "content",
+]
+# old_data = es_helper.search(
+#     index=old_index_name, _source=fields, query={"match_all": {}}, size=3
+# )
+# old_data = old_data["hits"]["hits"]  # don't use in scan
+################### for all data
+old_data = list(
+    scan(
+        es,
+        index=meet_index_name,
+        query={
+            "_source": fields,
+            "query":
+            {   "term":
+                {
+                    "main_type": "جلسه علمی"}},
+            # {"match_all": {}},
+        },
+    )
+)
+print(f'--- old_data {len(old_data)}')
+save_orjson(
+    data=old_data,
+    path='./data_content_1.json'
+)
--- a/monir/requirements.txt
+++ b/monir/requirements.txt
@ -0,0 +1,4 @@
+python-dotenv
+openai
+elasticsearch==8.13.0
+orjson