change project strategy

2025-11-19 11:50:41 +00:00 · 2025-11-19 11:50:41 +00:00 · 2218df5d09
commit 2218df5d09
parent 713776f6b0
41 changed files with 2746 additions and 16960 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,2 +1,3 @@
 ./qavanin-faiss
 ./llm-answer
 ./data
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,4 @@
 __pycache__/
-qavanin-faiss/faiss_index_qavanin_285k_metadata.json
+data/
 qavanin-faiss/faiss_index_qavanin_285k.index
 .vscode
 .gitignore
--- a/_old/README.md
+++ b/_old/README.md
@ -0,0 +1,2 @@
 # Qavanin Chatbot
--- a/_old/bale_qabot.py
+++ b/_old/bale_qabot.py
--- a/_old/build.bash
+++ b/_old/build.bash
--- a/_old/chatbot.py
+++ b/_old/chatbot.py
@ -1,5 +1,5 @@
 import json
-import chatbot_handler as chat
+import chatbot_handler as chatbot_handler
 # import bale_qabot
 import os
 import numpy as np
@ -56,18 +56,20 @@ async def ping():
@chatbot.post("/emergency_call")
 async def emergency_call(query: Query):
    print('emergency generate answer ...')
-    chat_id = await chat.create_chat_id()
+    chat_id = await chatbot_handler.create_chat_id()
-    answer = await chat.ask_chatbot_avalai(query.query, chat_id)
+    print('emergency chat_id ...', chat_id)
-    await chat.credit_refresh()
+    answer = await chatbot_handler.ask_chatbot_avalai(query.query, chat_id)
-
+    print('emergency answer ...', answer)
    await chatbot_handler.credit_refresh()
    print('credit updated')
    return {"answer": answer}
@chatbot.post("/run_chat")
 async def run_chat(query: Query):
    print('regular generate answer ...')
-    chat_id = await chat.create_chat_id()
+    chat_id = await chatbot_handler.create_chat_id()
-    answer = await chat.ask_chatbot(query.query, chat_id)
+    answer = await chatbot_handler.ask_chatbot(query.query, chat_id)
-    await chat.credit_refresh()
+    await chatbot_handler.credit_refresh()
    return {"answer": answer}
@ -83,20 +85,20 @@ if __name__ == "__main__":
            continue
        start = (datetime.datetime.now())
        # result = test_dataset()
-        result = chat.single_query(query)
+        result = chatbot_handler.single_query(query)
        end_retrive = datetime.datetime.now()
        print('-'*40)
        print(f'retrive duration: {(end_retrive - start).total_seconds()}')
        prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
-        llm_answer = chat.llm_request(prompt)
+        llm_answer = chatbot_handler.llm_request(prompt)
        print('-'*40)
        print(f'llm duration: {(datetime.datetime.now() - end_retrive).total_seconds()}')
        refrences = ''
-        recognized_refrences = chat.find_refrences(llm_answer)
+        recognized_refrences = chatbot_handler.find_refrences(llm_answer)
-        llm_answer = chat.replace_refrences(llm_answer, recognized_refrences)
+        llm_answer = chatbot_handler.replace_refrences(llm_answer, recognized_refrences)
        with open('./llm-answer/result.txt', mode='a+', encoding='utf-8') as file:
            result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
--- a/_old/chatbot_handler.py
+++ b/_old/chatbot_handler.py
@ -202,7 +202,7 @@ async def oss_request(query):
        response_dict['output'] = str(response)
        async with aiofiles. open('./llm-answer/messages.json', mode='w', encoding='utf-8') as output:
            await output.write(json.dumps(response_dict, ensure_ascii=False, indent=2))
-        print('response created')
+        print('oss response created')
        async with aiofiles.open('./llm-answer/chat-objs.txt', mode='a+', encoding='utf-8') as file:
            response_value = '0'
            await file.write(response_value) # estimated_cost
@ -239,13 +239,13 @@ async def llm_request(query, model):
        # gemini-2.5-flash-lite : 35 خیلی خوب
        answer = response.choices[0].message.content
-        print('$'*50)
+        # print('$'*50)
-        print(f'answer: {answer}')
+        # print(f'answer: {answer}')
-        print('$'*50)
+        # print('$'*50)
        cost_prompt = response.estimated_cost['irt']
-        print('$'*50)
+        # print('$'*50)
-        print(f'answer: {cost_prompt}')
+        # print(f'answer: {cost_prompt}')
-        print('$'*50)
+        # print('$'*50)
        # پاسخ را هم به سابقه اضافه می‌کنیم
        # messages.append({"role": "assistant", "content": answer})
        # print(f'type(response): {type(response)}')
@ -254,7 +254,7 @@ async def llm_request(query, model):
        response_dict['output'] = str(response)
        async with aiofiles. open('./llm-answer/messages.json', mode='w', encoding='utf-8') as output:
            await output.write(json.dumps(response_dict, ensure_ascii=False, indent=2))
-        print('response created')
+        print('llm response created')
        async with aiofiles.open('./llm-answer/chat-objs.txt', mode='a+', encoding='utf-8') as file:
            response_value = f"{response.estimated_cost['irt']}\n-------------------------------\n\n"
            await file.write(response_value) # estimated_cost
@ -496,7 +496,7 @@ async def single_query(query: str):
   # query = cleaning(query)
    retrived_sections_ids = []
-    retrived_sections = pipe.search(query, content_list, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=15)
+    retrived_sections = pipe.search(query, content_list, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=10)
    final_similars = ''
    for i, row in enumerate(retrived_sections, 1):
        id_value = '{' + str(ids[row['idx']]) + '}'
@ -545,7 +545,7 @@ async def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[s
    # refrences = ''
    for index, ref in enumerate(refrences_list,1):
            new_ref = '{' + str(ref) + '}'
-            llm_answer = llm_answer.replace(new_ref, f'[«{str(index)}»](https://majles.tavasi.ir/entity/detail/view/qsection/{ref}) ')
+            llm_answer = llm_answer.replace(new_ref, f' [«{str(index)}»](https://majles.tavasi.ir/entity/detail/view/qsection/{ref}) ')
            # id = ref.lstrip('{')
            # id = id.rstrip('}')
            # refrences += ''.join(f'[{index}] https://majles.tavasi.ir/entity/detail/view/qsection/{id}\n')
@ -584,6 +584,7 @@ async def get_title_system_prompt():
 async def ask_chatbot_avalai(query:str, chat_id:str):
    print('ask avalai func')
    prompt_status = True
    llm_model = ''
    llm_answer = ''
@ -623,19 +624,17 @@ async def ask_chatbot_avalai(query:str, chat_id:str):
        '''
-        try:
+        
-            for model in models:
+        for model in models:
-                before_prompt_credit = await credit_refresh()
+            before_prompt_credit = await credit_refresh()
-                llm_model = model
+            llm_model = model
-                print(f'using model: {model}')
+            print(f'using model: {model}')
-                try:
+            try:
-                    llm_answer, cost_prompt = await llm_request(prompt, model)
+                llm_answer, cost_prompt = await llm_request(prompt, model)
-                    # llm_answer, cost_prompt = await oss_request(prompt)
+                # llm_answer, cost_prompt = await oss_request(prompt)
                except:
                    print(f'error in ask-chatbot-avalai model:{model}')
                    continue
                break
-        except Exception as error:
+            except Exception as error:
                print(f'error in ask-chatbot-avalai model:{model}')
                after_prompt_credit = await credit_refresh()
                prompt_cost = int(before_prompt_credit) - int(after_prompt_credit)
                error = f'model: {model} \n{error}\n\n'
@ -646,6 +645,8 @@ async def ask_chatbot_avalai(query:str, chat_id:str):
                    await file.write(error)
                prompt_status = False
                status_text = 'با عرض پوزش، سرویس موقتا در دسترس نیست. لطفا دقایقی دیگر دوباره تلاش نمائید!'
                continue
    # حالتی که وضعیت پرامپت، نامعتبر باشد، یک شی با مقادیر زیر برگردانده می شود         
    else:
@ -710,17 +711,22 @@ async def ask_chatbot_avalai(query:str, chat_id:str):
        'status' : True, # or False # bool
    }
    prev_chat_data = []
    number = 1
    try:
-        async with aiofiles.open('./llm-answer/chat-messages1.json', mode='r', encoding='utf-8') as file:
+        async with aiofiles.open(f'./llm-answer/chat-messages{number}.json', mode='r', encoding='utf-8') as file:
                content = await file.read()
                prev_chat_data = json.loads(content)
                prev_chat_data.append(chat_obj)
    except:
-        pass
+        number += 1
    prev_chat_data.append(chat_obj)         
-    async with aiofiles. open('./llm-answer/chat-messages1.json', mode='w', encoding='utf-8') as output:
+    async with aiofiles.open(f'./llm-answer/chat-messages{number}.json', mode='w', encoding='utf-8') as output:
        await output.write(json.dumps(prev_chat_data, ensure_ascii=False, indent=2))
    async with aiofiles.open(f'./llm-answer/chat-messages-answer{number}.txt', mode='a+', encoding='utf-8') as output:
        await output.write(f'{chat_obj}\n+++++++++++++++++++++++++++\n')
    # save_result(chat_obj)
    # ایجاد آبجکت بازگشتی به فرانت  
@ -732,6 +738,7 @@ async def ask_chatbot_avalai(query:str, chat_id:str):
    return chat_obj
 async def ask_chatbot(query:str, chat_id:str):
    print('ask oss func')
    prompt_status = True
    llm_model = 'gpt.oss.120b'
    llm_answer = ''
@ -741,20 +748,21 @@ async def ask_chatbot(query:str, chat_id:str):
    if query == '':
        prompt_status = False
    # در صورتی که وضعیت پرامپت معتبر باشد، وارد فرایند شو
    if prompt_status:
        before_title_time = datetime.datetime.now()
        title_system_prompt = await get_title_system_prompt()
        title_user_prompt = await get_title_user_prompt(query)
        title = ''
        # title, cost_title = await llm_base_request(title_system_prompt, title_user_prompt)
-        title, cost_title = await oss_base_request(title_system_prompt, title_user_prompt)
+        # title, cost_title = await oss_base_request(title_system_prompt, title_user_prompt)
        if not title:
            title = query
        title_prompt_duration = (datetime.datetime.now() - before_title_time).total_seconds()
        print('-'*40)
        print(f'title_prompt_duration: {title_prompt_duration}')
        if title == '':
            title = query.split()[0:10]
@ -762,8 +770,6 @@ async def ask_chatbot(query:str, chat_id:str):
        start_time = (datetime.datetime.now())
        result_passages_text, result_passages_ids = await single_query(query)
        end_retrive = datetime.datetime.now()
        print('-'*40)
        print(f'title_prompt_duration: {title_prompt_duration}')
        retrive_duration = (end_retrive - start_time).total_seconds()
        print(f'retrive duration: {str(retrive_duration)}')
@ -856,15 +862,29 @@ async def ask_chatbot(query:str, chat_id:str):
        'status_text' : status_text, # str
        'status' : True, # or False # bool
    }
    prev_chat_data = []    
    async with aiofiles.open('./llm-answer/chat-messages1.json', mode='r', encoding='utf-8') as file:
            content = await file.read()
            prev_chat_data = json.loads(content)
            prev_chat_data.append(chat_obj)
-    async with aiofiles. open('./llm-answer/chat-messages1.json', mode='w', encoding='utf-8') as output:
+    prev_chat_data = [] 
    number = 1
    try:
        async with aiofiles.open(f'./llm-answer/chat-messages{number}.json', mode='r', encoding='utf-8') as file:
                content = await file.read()
                prev_chat_data = json.loads(content)
                prev_chat_data.append(chat_obj)
    except:
        number += 1
    prev_chat_data.append(chat_obj)         
    async with aiofiles. open(f'./llm-answer/chat-messages{number}.json', mode='w', encoding='utf-8') as output:
        await output.write(json.dumps(prev_chat_data, ensure_ascii=False, indent=2))
    # async with aiofiles. open(f'./llm-answer/chat-messages-answer{number}.txt', mode='a+', encoding='utf-8') as output:
    #     await output.write(f'{chat_obj}\n+++++++++++++++++++++++++++\n')
    full_prompt_duration = (datetime.datetime.now() - start_time).total_seconds()
    print(f'aiofiles duration: {full_prompt_duration}')
    print('~'*40)
    # save_result(chat_obj)
    # ایجاد آبجکت بازگشتی به فرانت  
@ -886,7 +906,7 @@ async def credit_refresh():
    }
    remained_credit = requests.get(url, headers=headers)
    remained_credit_value = str(remained_credit.json()['remaining_irt'])
-    
+    print('writing credit')
    async with aiofiles.open('./llm-answer/credit.txt', mode='a+', encoding='utf-8') as file:
        await file.write(f'{remained_credit_value}\n')
@ -933,6 +953,9 @@ if __name__ == "__main__":
        recognized_refrences =  find_refrences(llm_answer)
        llm_answer =  replace_refrences(llm_answer, recognized_refrences)
        print('-'*40)
        print(f'replace_refrences duration: {(datetime.datetime.now() - end_retrive).total_seconds()}')
        with open('./llm-answer/result.txt', mode='a+', encoding='utf-8') as file:
            result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
            file.write(result_message)  
@ -941,6 +964,8 @@ if __name__ == "__main__":
            result_message = f'متن پرامپت: {query.strip()}\n\مواد مشابه: {result} \n----------------------------------------------------------\n'
            file.write(result_message)
        print('-'*40)
        print(f'file write duration: {(datetime.datetime.now() - end_retrive).total_seconds()}')
        print('----------------------------------------------------------')
--- a/_old/convert_qavanin_json_to_faiss.py
+++ b/_old/convert_qavanin_json_to_faiss.py
--- a/_old/dockerfile
+++ b/_old/dockerfile
@ -0,0 +1,10 @@
 FROM docker.tavasi.ir/tavasi/qachat_base:1.0.0
 WORKDIR /src/app 
 COPY . /src/app
 EXPOSE 80
 CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80","--host=0.0.0.0"]
--- a/_old/dockerfile_base
+++ b/_old/dockerfile_base
@ -0,0 +1,5 @@
 FROM docker.tavasi.ir/tavasi/qachat_base:1.0.0
 RUN pip install uvicorn[standard]
 RUN pip install FlagEmbedding
 RUN pip install aiofiles
 RUN pip install openai
--- a/_old/elastic_helper.py
+++ b/_old/elastic_helper.py
--- a/_old/embedder_sbert_qavanin_285k.py
+++ b/_old/embedder_sbert_qavanin_285k.py
--- a/_old/normalizer.py
+++ b/_old/normalizer.py
--- a/_old/oss.py
+++ b/_old/oss.py
@ -10,6 +10,7 @@ LLM_URL = "http://172.16.29.102:8001/v1/"
 #     'assistant_prompt' : '',
 # }
 async def process_item(messages, reasoning_effort= 'medium', temperature= 0.4, top_p= 0.9, max_tokens= 2048):
    """
    generates answer with gpt-oss-120b model
--- a/_old/requirements.txt
+++ b/_old/requirements.txt
@ -0,0 +1,15 @@
 cleantext==1.1.4
 elasticsearch7==7.17.12
 faiss_cpu==1.9.0
 fastapi==0.117.1
 hazm==0.10.0
 langchain_openai==0.3.33
 numpy==1.21.5
 openai==1.108.1
 pandas==2.3.2
 pydantic==2.11.9
 scikit_learn==1.7.2
 sentence_transformers==2.5.1
 torch==2.4.0
 torch==2.1.2
 transformers==4.55.1
--- a/_old/run_docker.bash
+++ b/_old/run_docker.bash
@ -0,0 +1,3 @@
 docker stop qachat
 docker rm qachat
 docker run --name qachat -p 2425:80  --net qachat_net  --gpus=all -v  ./:/src/app/ -v ./qavanin-faiss/:/src/app/qavanin-faiss/ -v ./llm-answer/:/src/app/llm-answer/ -v ./../MODELS:/src/MODELS -v ./../cache:/root/.cache/huggingface/hub  -it  --restart unless-stopped  docker.tavasi.ir/tavasi/qachat:1.0.0
--- a/_old/selected-queries.txt
+++ b/_old/selected-queries.txt
--- a/baleqabot/bot.log
+++ b/baleqabot/bot.log
--- a/baleqabot/requests.json
+++ b/baleqabot/requests.json
--- a/config.env
+++ b/config.env
@ -0,0 +1 @@
 LLM_URL="http://172.16.29.102:8001/v1/"
--- a/2
+++ b/2
@ -6,5 +6,5 @@ COPY . /src/app
 EXPOSE 80
-CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80","--host=0.0.0.0"]
+CMD [ "uvicorn","main:app","--reload","--port","80","--host=0.0.0.0"]
--- a/llm-answer/chat-messages.json
+++ b/llm-answer/chat-messages.json
--- a/llm-answer/chat-messages1.json
+++ b/llm-answer/chat-messages1.json
--- a/llm-answer/chat-objs.txt
+++ b/llm-answer/chat-objs.txt
@ -1,743 +0,0 @@
 ChatCompletion(id='chatcmpl-CMEPWQSoYM74Yu0kGagCV6ruLCpoN', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='شرایط معافیت از پرداخت عوارض شهرداری به شرح زیر است:\n\n1. **تخفیف و بخشودگی:** هرگونه تخفیف یا بخشودگی حقوق و عوارض شهرداری\u200cها باید از بودجه عمومی سالانه کشور تأمین شود. در غیر این صورت، بخشودگی و تخفیف ممنوع است {qs738926}.\n\n2. **خانواده شهداء و ایثارگران:** خانواده شهداء و ایثارگران و جانبازان بیست و پنج درصد (۲۵٪) و بالاتر از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی حداکثر ۱۲۵ متر مربع برای یک بار معاف هستند {qs911698}.\n\n3. **جانبازان:** جانبازان بیست و پنج درصد (۲۵٪) و بالاتر نیز از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی حداکثر ۱۲۵ متر مربع برای یک بار معاف می\u200cباشند {qs289963}.\n\n4. **سازمان رادیو تلویزیون:** سازمان رادیو تلویزیون ملی ایران از پرداخت هرگونه عوارض شهرداری معاف است {qs814894}.\n\n5. **طرح\u200cهای عمومی عمرانی:** طرح\u200cهای مربوط به خرید و تملک اراضی و املاک برای اجرای برنامه\u200cهای عمومی، عمرانی و نظامی دولت از تاریخ شروع به اجرا، از پرداخت هر نوع عوارض مستقیم به شهرداری\u200cها معاف هستند {qs217429}.\n\n6. **واحدهای آموزشی:** واحدهای آموزشی و پرورشی با تأیید وزارت آموزش و پرورش از پرداخت هرگونه عوارض شهرداری معاف می\u200cباشند {qs993425} {qs212450}.\n\n7. **انتقال بلاعوض به نفع دولت و شهرداری\u200cها:** هرگونه انتقال بلاعوض به نفع دولت و شهرداری\u200cها از پرداخت عوارض معاف می\u200cباشد {qs120991}.\n\n8. **گواهی وزارت دارایی:** دولت و شهرداری\u200cها و موسسات وابسته به آن\u200cها با گواهی وزارت دارایی از پرداخت این نوع مالیات معاف خواهند بود {qs725533} {qs748695}.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1759414646, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_efad92c60b', usage=CompletionUsage(completion_tokens=465, prompt_tokens=8403, total_tokens=8868, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)), estimated_cost={'unit': '0.0016933950', 'irt': 197.03, 'exchange_rate': 116350})
 -------------------------------
 ChatCompletion(id='f4neaIupMtXp7M8P9viLgQs', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='شرایط معافیت از پرداخت عوارض شهرداری به شرح زیر است:\n\n*   **محدودیت تخفیف و بخشودگی:** هرگونه تخفیف یا بخشودگی حقوق و عوارض شهرداری\u200cها توسط دولت و قوانین مصوب، مشروط به تأمین آن از بودجه عمومی سالانه کشور است. در غیر این صورت، بخشودگی و تخفیف ممنوع است {qs738926}.\n\n*   **خانواده شهداء، ایثارگران و جانبازان:** خانواده شهداء، ایثارگران و جانبازان با از خودگذشتگی بیست و پنج درصد (۲۵٪) و بالاتر، از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی با حداکثر مساحت ۱۲۵ متر مربع، برای یک بار معاف می\u200cباشند {qs911698}.\n\n*   **جانبازان:** جانبازان با از خودگذشتگی بیست و پنج درصد (۲۵٪) و بالاتر، از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی با حداکثر مساحت ۱۲۵ متر مربع، برای یک بار معاف هستند {qs289963}.\n\n*   **سازمان رادیو تلویزیون ملی ایران:** این سازمان از پرداخت هرگونه عوارض شهرداری معاف است {qs814894}.\n\n*   **طرح\u200cهای عمومی، عمرانی و نظامی:** طرح\u200cهایی که طبق لایحه قانونی نحوه خرید و تملک اراضی و املاک برای اجرای برنامه\u200cهای عمومی، عمرانی و نظامی دولت اجرا می\u200cشوند، از تاریخ شروع به اجرا، از پرداخت هر نوع عوارض مستقیم به شهرداری\u200cها، مانند عوارض زمین، ساختمان، اموال منقول و غیرمنقول، حق تشرف و حق مرغوبیت معاف هستند {qs217429}.\n\n*   **واحدهای آموزشی و پرورشی:** واحدهای آموزشی و پرورشی، با تأیید وزارت آموزش و پرورش، از پرداخت هرگونه عوارض شهرداری معاف می\u200cباشند {qs993425} {qs212450}.\n\n*   **انتقال بلاعوض به نفع دولت و شهرداری\u200cها:** هرگونه انتقال بلاعوض که به نفع دولت و شهرداری\u200cها صورت می\u200cگیرد، از پرداخت عوارض و اخذ هرگونه گواهی معاف است {qs120991}.\n\n*   **معافیت دولت، شهرداری\u200cها و موسسات وابسته:** دولت، شهرداری\u200cها و موسسات وابسته به آن\u200cها، با گواهی وزارت دارایی، از پرداخت مالیات نقل و انتقالات قطعی معاف خواهند بود {qs725533} {qs748695}.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None, images=[], thinking_blocks=[]))], created=1759414653, model='gemini-2.5-flash-lite', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=596, prompt_tokens=10041, total_tokens=10637, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=None, text_tokens=10041, image_tokens=None)), vertex_ai_grounding_metadata=[], vertex_ai_url_context_metadata=[], vertex_ai_safety_results=[], vertex_ai_citation_metadata=[], estimated_cost={'unit': '0.0012425000', 'irt': 144.56, 'exchange_rate': 116350})
 -------------------------------
 ChatCompletion(id='chatcmpl-CMEW5PqnEzAVDKITc6s0ZgLtR6A0q', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='حمایت از خانواده در جمهوری اسلامی ایران بر اساس مواد قانونی متعددی انجام می\u200cشود که به شرح زیر است:\n\n1. **حمایت کلی از خانواده:** ماده ۸۱ قانون برنامه پنجساله هفتم پیشرفت جمهوری اسلامی ایران، به حمایت از خانواده و ارتقای کارآمدی ساختار سازمانی در حوزه\u200cهای خانواده، زنان و جوانان و همچنین حمایت همه\u200cجانبه از فرزندآوری و رفع موانع اشاره دارد {qs3390392}.\n\n2. **تخصیص زمین به خانواده\u200cها:** به منظور حمایت از خانواده و تحقق جوانی جمعیت، وزارت راه و شهرسازی موظف است در سال\u200cهای ۱۴۰۲، ۱۴۰۳ و ۱۴۰۴، زمین یا واحد مسکونی به صورت رایگان به خانواده\u200cهای دارای چهار فرزند و بیشتر زیر بیست سال اختصاص دهد {qs3186952} {qs3328892} {qs67a862ff4e15f_202}. \n\n3. **حمایت حقوقی و فرهنگی:** همچنین بر اساس بند ۱۴ سیاست\u200cهای کلی خانواده، حمایت حقوقی، اقتصادی و فرهنگی از خانواده\u200cهای با سرپرستی زنان و تسهیل ازدواج آنان مورد تأکید قرار گرفته است {qs2248511}.\n\n4. **حمایت از خانواده زندانیان:** حمایت از خانواده زندانیان و معدومین از طریق سازمان\u200cها و نهادهای خیریه مردمی و غیردولتی نیز پیش\u200cبینی شده است {qs108297}.\n\n5. **تخفیف مالیاتی:** در راستای قانون حمایت از خانواده و جوانی جمعیت، اشخاص حقیقی موضوع قانون مالیات\u200cهای مستقیم که فرزند سوم و بیشتر دارند، مشمول افزایش پانزده درصد (۱۵٪) در تخفیف مالیاتی می\u200cشوند {qs3186827} {qs67a862ff4e15f_052}.\n\n6. **اهداف تشکیل خانواده:** اهداف تشکیل و تحکیم خانواده شامل ارتقاء آگاهی اعضای خانواده نسبت به حقوق و وظایف یکدیگر، پیشگیری از تزلزل و فروپاشی نهاد خانواده و حمایت از خانواده\u200cهای آسیب\u200cدیده و کودکان است {qs1028727}.\n\n7. **راهبردهای حمایت:** راهبردها شامل محافظت از خانواده در برابر آسیب\u200cهای اجتماعی، اتخاذ تدابیر مناسب برای حمایت از خانواده\u200cهای آسیب\u200cدیده و حمایت از زنان و کودکان در برابر تعرضات نیز می\u200cباشد {qs1028747}.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1759415053, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_efad92c60b', usage=CompletionUsage(completion_tokens=521, prompt_tokens=16933, total_tokens=17454, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)), estimated_cost={'unit': '0.0031378050', 'irt': 363.51, 'exchange_rate': 115850})
 -------------------------------
 ChatCompletion(id='GIveaNm2KKeUkdUPzcWboAU', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='قوانین و سیاست\u200cهای موجود در زمینه حمایت از خانواده شامل موارد زیر است:\n\n*   **قانون حمایت از خانواده و جوانی جمعیت:** این قانون به منظور حمایت جامع از خانواده، ارتقاء کارآمدی ساختارهای سازمانی مرتبط با خانواده، زنان و جوانان، حمایت همه\u200cجانبه از فرزندآوری، رفع موانع و ایجاد مشوق\u200cهای مؤثر و اصلاح فرهنگی تدوین شده است {qs3390392}.\n\n*   **اعطای زمین رایگان به خانواده\u200cهای پرجمعیت:** در راستای حمایت از خانواده و تحقق جوانی جمعیت، وزارت راه و شهرسازی مکلف است در سال\u200cهای ۱۴۰۲، ۱۴۰۳ و ۱۴۰۴، زمین یا واحد مسکونی رایگان به خانواده\u200cهای دارای چهار فرزند و بیشتر زیر بیست سال اختصاص دهد {qs3186952} {qs3328892} {qs67a862ff4e15f_202}.\n\n*   **حمایت حقوقی، اقتصادی و فرهنگی از خانواده\u200cهای زنان سرپرست:** سیاست\u200cهای کلی خانواده بر حمایت حقوقی، اقتصادی و فرهنگی از خانواده\u200cهای با سرپرستی زنان و همچنین تشویق و تسهیل ازدواج آنان تأکید دارد {qs2248511}.\n\n*   **حمایت از خانواده زندانیان:** حمایت از خانواده زندانیان و معدومین از طریق سازمان\u200cها و نهادهای خیریه مردمی، غیردولتی و انجمن\u200cهای حمایت از زندانیان در نظر گرفته شده است {qs108297}.\n\n*   **تخفیف مالیاتی برای فرزندان:** اشخاص حقیقی که فرزند سوم و بیشتر آن\u200cها از آبان ۱۴۰۰ به بعد متولد شده است، مشمول پانزده درصد (۱۵٪) افزایش در تخفیف مالیاتی به ازای هر فرزند می\u200cشوند {qs3186827} {qs67a862ff4e15f_052}.\n\n*   **اهداف و راهبردهای تحکیم خانواده:** اهداف این سیاست\u200cها شامل ارتقاء جایگاه خانواده، حمایت از تشکیل و تحکیم آن، ارتقاء سطح فرهنگی و تربیتی اعضا، هماهنگی سیاست\u200cها، آگاهی از حقوق و وظایف، گسترش ارزش\u200cهای اسلامی، و ایمن\u200cسازی خانواده از آسیب\u200cهای اجتماعی و حمایت از خانواده\u200cهای آسیب\u200cدیده است {qs1028727}. راهبردها نیز بر ارتقای آگاهی، محافظت از آسیب\u200cپذیری خانواده، مبارزه با ناهنجاری\u200cهای اجتماعی و حمایت از زنان و کودکان تأکید دارند {qs1028747}.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None, images=[], thinking_blocks=[]))], created=1759415062, model='gemini-2.5-flash-lite', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=571, prompt_tokens=19342, total_tokens=19913, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=None, text_tokens=19342, image_tokens=None)), vertex_ai_grounding_metadata=[], vertex_ai_url_context_metadata=[], vertex_ai_safety_results=[], vertex_ai_citation_metadata=[], estimated_cost={'unit': '0.0021626000', 'irt': 250.54, 'exchange_rate': 115850})
 -------------------------------
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='متاسفانه در منابع، پاسخی پیدا نشد!', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))
 -------------------------------
 Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='اتومبیل\u200cهای نمایندگی\u200cهای سیاسی و کنسولی مقیم ایران و اعضای رسمی آنها، هنگام ترخیص، مشروط بر عمل متقابل، از پرداخت کلیه حقوق گمرکی، سود بازرگانی، عوارض و مالیات\u200cها معاف می\u200cباشند. {qs852944} همچنین، این اتومبیل\u200cها پس از گذشت ۳ سال از تاریخ ترخیص، در صورت عمل متقابل، هنگام فروش از پرداخت حقوق گمرکی، سود بازرگانی و عوارض متعلقه معاف خواهند بود. {qs852947}\n\nوسایل نقلیه مسافری خارجی با شرایطی از پرداخت عوارض راه و حق توقف معاف هستند. {qs335260}\n\nمتصدیان حمل کشورهای طرف موافقت\u200cنامه حمل و نقل بین\u200cالمللی جاده\u200cای کالا و مسافر (مانند کرواسی و لیتوانی)، برای انجام حمل و نقل بین\u200cالمللی جاده\u200cای و به صورت متقابل، از پرداخت عوارض و سایر پرداخت\u200cهای مربوط به مالکیت یا استفاده از وسیله نقلیه و همچنین عوارض مربوط به استفاده یا نگهداری راه\u200cهای کشور طرف دیگر معاف می\u200cشوند. {qs3437134} , {qs935848} , {qs985052}\n\nهرگونه انتقال بلاعوض به نفع دولت و شهرداری\u200cها از پرداخت عوارض معاف است. {qs120991}\n\nمالکان خودرو در صورت پرداخت و تسویه حساب قبوض جریمه\u200cهای رانندگی تا پایان آذرماه سال\u200cهای ۱۳۹۷ و ۱۳۹۵، از جریمه دیرکرد ناشی از عدم پرداخت تا پایان سال\u200cهای ۱۳۹۶ و ۱۳۹۴ بخشیده می\u200cشوند. {qs2565159} , {qs2207235}\n\nمسافران خاصی از پرداخت عوارض خروج از کشور معاف می\u200cباشند. {qs863196}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None, images=[], thinking_blocks=[]))
 -------------------------------
 #####################################################
 #####################################################
 #####################################################
 #####################################################
 [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='برای معافیت از پرداخت عوارض شهرداری، شرایط زیر وجود دارد:\n\n1. خانواده شهداء و ایثارگران و جانبازان بیست و پنج درصد (۲۵ ٪) و بالاتر، از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی حداکثر یکصد و بیست و پنج متر مربع برای یک\u200cبار معاف می\u200cباشند {qs911698}.\n\n2. جانبازان بیست و پنج درصد (۲۵ ٪) و بالاتر، از پرداخت عوارض شهرداری برای احداث یک واحد مسکونی حداکثر ۱۲۵ متر مربع برای یک\u200cبار معاف می\u200cباشند {qs289963}.\n\n3. سازمان رادیو تلویزیون ملی ایران از پرداخت هر گونه عوارض شهرداری معاف است و مشمول معافیت مذکور در ماده ۲۶ قانون نوسازی و عمران شهری نیز خواهد بود {qs814894}.\n\n4. طرح\u200cهای موضوع قانون نحوه خرید و تملک اراضی و املاک برای اجرای برنامه\u200cهای عمومی، عمرانی و نظامی دولت از تاریخ شروع به اجراء از پرداخت هر نوع عوارض مستقیم به شهرداری\u200cها معاف هستند {qs217429}.\n\n5. واحدهای آموزشی و پرورشی با تأیید وزارت آموزش و پرورش از پرداخت هر گونه عوارض شهرداری معاف می\u200cباشند {qs993425} و {qs212450}.\n\n6. هرگونه انتقال بلاعوض به نفع دولت و شهرداری\u200cها از پرداخت عوارض و اخذ هرگونه گواهی معاف می\u200cباشد {qs120991}. \n\nاین شرایط نشان\u200cدهنده مواردی است که می\u200cتوان بر اساس آن\u200cها از پرداخت عوارض شهرداری معاف شد.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))]
 -------------------------------
 [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='برای معافیت از پرداخت عوارض شهرداری، شرایط زیر وجود دارد:\n\n*   خانواده شهداء، ایثارگران و جانبازان با درصد جانبازی بیست و پنج درصد (۲۵ ٪) و بالاتر، برای احداث یک واحد مسکونی حداکثر به متراژ یکصد و بیست و پنج متر مربع، برای یک\u200cبار از پرداخت عوارض شهرداری معاف می\u200cباشند {qs911698}.\n*   جانبازان با درصد جانبازی بیست و پنج درصد (۲۵ ٪) و بالاتر، برای احداث یک واحد مسکونی حداکثر به متراژ ۱۲۵ متر مربع، برای یک\u200cبار از پرداخت عوارض شهرداری معاف می\u200cباشند {qs289963}.\n*   سازمان رادیو تلویزیون ملی ایران از پرداخت هر گونه عوارض شهرداری معاف است {qs814894}.\n*   طرح\u200cهای موضوع قانون نحوه خرید و تملک اراضی و املاک برای اجرای برنامه\u200cهای عمومی، عمرانی و نظامی دولت، از تاریخ شروع اجرا، از پرداخت هر نوع عوارض مستقیم به شهرداری\u200cها معاف هستند {qs217429}.\n*   واحدهای آموزشی و پرورشی با تأیید وزارت آموزش و پرورش از پرداخت هر گونه عوارض شهرداری معاف می\u200cباشند {qs993425} و {qs212450}.\n*   هرگونه انتقال بلاعوض به نفع دولت و شهرداری\u200cها از پرداخت عوارض و اخذ هرگونه گواهی معاف می\u200cباشد {qs120991}.\n*   دولت و شهرداری\u200cها و موسسات وابسته به آن\u200cها با ارائه گواهی از وزارت دارایی، از پرداخت مالیات نقل و انتقالات قطعی معاف خواهند بود {qs725533} و {qs748695}.\n\nشرط کلی برای هرگونه تخفیف یا بخشودگی حقوق و عوارض شهرداری توسط دولت و قوانین مصوب، تأمین آن از بودجه عمومی سالانه کشور است. در غیر این صورت، بخشودگی و تخفیف حقوق و عوارض شهرداری ممنوع است {qs738926}.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None, images=[], thinking_blocks=[]))]
 -------------------------------
 -------------------------------
 -------------------------------
 {'unit': '0.0006956400', 'irt': 80.42, 'exchange_rate': 115600}
 -------------------------------
 {'unit': '0.0011443000', 'irt': 132.28, 'exchange_rate': 115600}
 -------------------------------
 47.58
 -------------------------------
 89.14
 -------------------------------
 117.51
 -------------------------------
 149.32
 -------------------------------
 238.13
 -------------------------------
 199.27
 -------------------------------
 20.63
 -------------------------------
 22.99
 -------------------------------
 77.09
 -------------------------------
 79.66
 -------------------------------
 55.49
 -------------------------------
 69.28
 -------------------------------
 64.43
 -------------------------------
 85.99
 -------------------------------
 108.5
 -------------------------------
 58.21
 -------------------------------
 66.88
 -------------------------------
 97.08
 -------------------------------
 85.15
 -------------------------------
 98.28
 -------------------------------
 111.41
 -------------------------------
 124.53
 -------------------------------
 50.12
 -------------------------------
 206.18
 -------------------------------
 219.27
 -------------------------------
 249.11
 -------------------------------
 286.66
 -------------------------------
 281.35
 -------------------------------
 297.52
 -------------------------------
 331.9
 -------------------------------
 363.84
 -------------------------------
 376.68
 -------------------------------
 423.29
 -------------------------------
 466.93
 -------------------------------
 475.04
 -------------------------------
 20.42
 -------------------------------
 36.08
 -------------------------------
 50.47
 -------------------------------
 41.91
 -------------------------------
 55.16
 -------------------------------
 39.06
 -------------------------------
 61.63
 -------------------------------
 40.3
 -------------------------------
 69.5
 -------------------------------
 82.35
 -------------------------------
 140.04
 -------------------------------
 137.97
 -------------------------------
 200.29
 -------------------------------
 244.43
 -------------------------------
 257.28
 -------------------------------
 313.51
 -------------------------------
 315.89
 -------------------------------
 355.38
 -------------------------------
 396.78
 -------------------------------
 435.83
 -------------------------------
 478.95
 -------------------------------
 482.55
 -------------------------------
 249.69
 -------------------------------
 536.13
 -------------------------------
 575.45
 -------------------------------
 609.36
 -------------------------------
 643.95
 -------------------------------
 706.14
 -------------------------------
 748.32
 -------------------------------
 762.59
 -------------------------------
 527.55
 -------------------------------
 875.18
 -------------------------------
 939.63
 -------------------------------
 955.53
 -------------------------------
 1001.8
 -------------------------------
 1065.1
 -------------------------------
 1078.9
 -------------------------------
 1147.06
 -------------------------------
 1150.96
 -------------------------------
 1220.98
 -------------------------------
 1219.78
 -------------------------------
 1265.87
 -------------------------------
 1296.9
 -------------------------------
 47.79
 -------------------------------
 73.08
 -------------------------------
 87.81
 -------------------------------
 112.63
 -------------------------------
 137.99
 -------------------------------
 157.22
 -------------------------------
 186.68
 -------------------------------
 214.6
 -------------------------------
 242.14
 -------------------------------
 256.68
 -------------------------------
 283.86
 -------------------------------
 80.44
 -------------------------------
 121.42
 -------------------------------
 139.51
 -------------------------------
 101.81
 -------------------------------
 71.52
 -------------------------------
 180.64
 -------------------------------
 80.55
 -------------------------------
 97.99
 -------------------------------
 104.13
 -------------------------------
 155.02
 -------------------------------
 170.09
 -------------------------------
 96.74
 -------------------------------
 131.7
 -------------------------------
 80.5
 -------------------------------
 135.59
 -------------------------------
 148.67
 -------------------------------
 102.43
 -------------------------------
 131.11
 -------------------------------
 148.85
 -------------------------------
 98.64
 -------------------------------
 186.92
 -------------------------------
 188.55
 -------------------------------
 238.4
 -------------------------------
 235.39
 -------------------------------
 258.96
 -------------------------------
 209.47
 -------------------------------
 259.7
 -------------------------------
 234.19
 -------------------------------
 292.11
 -------------------------------
 297.42
 -------------------------------
 275.51
 -------------------------------
 286.96
 -------------------------------
 319.76
 -------------------------------
 386.88
 -------------------------------
 344.8
 -------------------------------
 406.07
 -------------------------------
 438.77
 -------------------------------
 659.01
 -------------------------------
 539.14
 -------------------------------
 551.07
 -------------------------------
 575.04
 -------------------------------
 72.18
 -------------------------------
 96.69
 -------------------------------
 106.42
 -------------------------------
 133.98
 -------------------------------
 162.94
 -------------------------------
 182.09
 -------------------------------
 206.28
 -------------------------------
 268.84
 -------------------------------
 275.0
 -------------------------------
 24.96
 -------------------------------
 56.4
 -------------------------------
 43.72
 -------------------------------
 63.34
 -------------------------------
 77.81
 -------------------------------
 78.82
 -------------------------------
 18.85
 -------------------------------
 49.17
 -------------------------------
 51.87
 -------------------------------
 76.95
 -------------------------------
 101.75
 -------------------------------
 122.79
 -------------------------------
 118.51
 -------------------------------
 101.94
 -------------------------------
 84.07
 -------------------------------
 224.65
 -------------------------------
 130.38
 -------------------------------
 108.94
 -------------------------------
 295.75
 -------------------------------
 105.6
 -------------------------------
 345.01
 -------------------------------
 344.0
 -------------------------------
 137.18
 -------------------------------
 395.53
 -------------------------------
 414.09
 -------------------------------
 204.4
 -------------------------------
 191.2
 -------------------------------
 31.68
 -------------------------------
 45.93
 -------------------------------
 74.96
 -------------------------------
 100.02
 -------------------------------
 129.08
 -------------------------------
 123.44
 -------------------------------
 236.05
 -------------------------------
 259.93
 -------------------------------
 291.77
 -------------------------------
 319.9
 -------------------------------
 348.04
 -------------------------------
 383.19
 -------------------------------
 235.45
 -------------------------------
 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001239.35
 -------------------------------
 860.89
 -------------------------------
 2803.63
 -------------------------------
 01440.25
 -------------------------------
 411.56
 -------------------------------
 1213.3
 -------------------------------
 063.92
 -------------------------------
 99.39
 -------------------------------
 000000000084.84
 -------------------------------
 37.3
 -------------------------------
 75.81
 -------------------------------
 147.93
 -------------------------------
 144.36
 -------------------------------
 85.03
 -------------------------------
 217.33
 -------------------------------
 00000149.16
 -------------------------------
 0205.91
 -------------------------------
 0140.51
 -------------------------------
 0148.86
 -------------------------------
 0130.56
 -------------------------------
 0000000069.39
 -------------------------------
 0000000132.37
 -------------------------------
 0220.48
 -------------------------------
 0000000000336.06
 -------------------------------
 0414.51
 -------------------------------
 401.78
 -------------------------------
 00393.24
 -------------------------------
 457.39
 -------------------------------
 00879.57
 -------------------------------
 0859.83
 -------------------------------
 01311.88
 -------------------------------
 01576.05
 -------------------------------
 1619.78
 -------------------------------
 1524.98
 -------------------------------
 837.66
 -------------------------------
 1641.84
 -------------------------------
 890.8
 -------------------------------
 883.3
 -------------------------------
 1953.82
 -------------------------------
 1821.97
 -------------------------------
 1876.64
 -------------------------------
 1124.28
 -------------------------------
 1093.42
 -------------------------------
 1922.2
 -------------------------------
 0152.77
 -------------------------------
 57.82
 -------------------------------
 00200.13
 -------------------------------
 0202.76
 -------------------------------
 0405.73
 -------------------------------
 0357.92
 -------------------------------
 54.72
 -------------------------------
 00230.81
 -------------------------------
 147.45
 -------------------------------
 0295.99
 -------------------------------
 0439.79
 -------------------------------
 0
--- a/llm-answer/credit.txt
+++ b/llm-answer/credit.txt
@ -1,725 +0,0 @@
 108315.8
 108315.8
 108315.8
 108315.8
 108315.8
 108315.8
 108315.8
 108315.8
 108273.66
 108315.8
 108315.8
 108315.8
 108148.33
 108148.33
 108148.33
 108148.33
 108148.33
 108148.33
 108148.33
 108148.33
 108148.33
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107820.53
 107311.44
 107311.44
 107311.44
 107311.44
 107311.44
 107311.44
 106600.25
 106600.25
 106600.25
 106600.25
 106600.25
 106600.25
 106600.25
 106600.25
 105674.64
 105674.64
 105674.64
 105674.64
 105674.64
 105674.64
 105674.64
 105674.64
 105545.4
 105545.4
 105545.4
 105392.43
 105392.43
 105319.32
 105064.98
 105064.98
 105064.98
 104994.01
 104844.02
 104844.02
 104844.02
 104844.02
 104633.2
 104633.2
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 104427.13
 103993.0
 103503.05
 103503.05
 103427.1
 103350.01
 103269.16
 103269.16
 103269.16
 103269.16
 103269.16
 103269.16
 103212.11
 103212.11
 103116.15
 103050.78
 103050.78
 103050.78
 102963.69
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102854.07
 102794.63
 102794.63
 102794.63
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102726.67
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102628.35
 102542.09
 102442.65
 102330.13
 102204.53
 102144.41
 102144.41
 101937.29
 101716.9
 101716.9
 101466.67
 101466.67
 101466.67
 101466.67
 101178.93
 100896.33
 100597.57
 100264.5
 100264.5
 99899.47
 99899.47
 99521.35
 99096.83
 99096.83
 99096.83
 99096.83
 98628.66
 98628.66
 98628.66
 98628.66
 98628.66
 98628.66
 98628.66
 96472.15
 96472.15
 96450.82
 96260.75
 96260.75
 96160.86
 96103.13
 95868.76
 95803.86
 95802.19
 95597.97
 95596.25
 95285.35
 95061.39
 94827.5
 94540.93
 94539.27
 94250.29
 93564.76
 93563.21
 93165.04
 92289.47
 92287.87
 91552.15
 91025.68
 91025.68
 89884.47
 89882.95
 88558.88
 87865.16
 87336.23
 86537.6
 85683.38
 84813.13
 83900.61
 82930.69
 81948.34
 79857.44
 77636.63
 77634.84
 75303.16
 75301.25
 75179.23
 75176.18
 75087.4
 74835.09
 74676.85
 74675.47
 74487.78
 74271.65
 73770.94
 73485.89
 70998.28
 70945.89
 70657.52
 70366.94
 70115.51
 68157.72
 67863.0
 67699.81
 67612.06
 67340.76
 67226.26
 67151.4
 67038.79
 66906.9
 66794.53
 66685.69
 66581.6
 66551.47
 66420.04
 66364.76
 66232.0
 66227.19
 66002.66
 65997.93
 65911.03
 56269.22
 56197.89
 56107.82
 55996.72
 55915.96
 55828.47
 55723.6
 55652.33
 55535.28
 55419.11
 55279.65
 55145.03
 55005.74
 54881.07
 54750.93
 54615.69
 54473.45
 54151.09
 54007.49
 53828.13
 53639.72
 53456.63
 53263.79
 53047.02
 52569.35
 50475.24
 50219.15
 49950.43
 49878.25
 49779.95
 49671.93
 49534.53
 49371.59
 49187.86
 48978.13
 48709.29
 48432.68
 48406.11
 48348.01
 48302.61
 48237.55
 48158.0
 48077.56
 48056.84
 48056.84
 47952.03
 47873.31
 47769.79
 47645.39
 47516.55
 47514.88
 47512.76
 47512.76
 47433.21
 47433.21
 47099.25
 46841.66
 46841.66
 46544.26
 46389.25
 46042.58
 45696.9
 45528.18
 45528.18
 45528.18
 44715.19
 44711.6
 44499.03
 44238.73
 44238.73
 44238.73
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 44201.47
 43951.09
 42258.63
 42258.63
 42258.63
 42258.63
 42258.63
 42258.63
 42258.63
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 42219.82
 233219.82
 233219.82
 231116.5
 228275.02
 226390.3
 225658.78
 225658.78
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225490.08
 225402.31
 225402.31
 225362.36
 225362.36
 225362.36
 225283.85
 225283.85
 225143.68
 224999.32
 224999.32
 224912.5
 224907.85
 224907.85
 224903.2
 224721.37
 224721.37
 224721.37
 224721.37
 224721.37
 224582.94
 224374.03
 224230.2
 224091.97
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223958.58
 223886.16
 223886.16
 223886.16
 223886.16
 223886.16
 223886.16
 223886.16
 223886.16
 223886.16
 223751.03
 223751.03
 223751.03
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223527.8
 223188.5
 223188.5
 223188.5
 222771.05
 222768.48
 222768.48
 222366.71
 222363.93
 222363.93
 221970.69
 221970.69
 221970.69
 221510.58
 220625.24
 219765.41
 218447.93
 217015.15
 217015.15
 217015.15
 217009.55
 215537.02
 214006.44
 214003.7
 214003.7
 211518.61
 211518.61
 210632.26
 210632.26
 210632.26
 210632.26
 201231.56
 201231.56
 201231.56
 201231.56
 201231.56
 201231.56
 201231.56
 201231.56
 200138.15
 200138.15
 200138.15
 199996.16
 199996.16
 199993.39
 199935.57
 199935.57
 199935.57
 199732.61
 199729.61
 199729.61
 199526.85
 199523.93
 199155.08
 199152.27
 199152.27
 198791.4
 198791.4
 198791.4
 198741.66
 198738.93
 198738.93
 198736.29
 198736.29
 198736.29
 198379.02
 198379.02
 198379.02
 198080.2
 198077.46
--- a/llm-answer/error-in-llm.txt
+++ b/llm-answer/error-in-llm.txt
--- a/llm-answer/llm-error.txt
+++ b/llm-answer/llm-error.txt
--- a/llm-answer/messages.json
+++ b/llm-answer/messages.json
--- a/llm-answer/passages.txt
+++ b/llm-answer/passages.txt
--- a/llm-answer/reranker-result.txt
+++ b/llm-answer/reranker-result.txt
@ -1,10 +0,0 @@
 [0.43728234058085713, 'الف - حقوق بنیادین کار (آزادی انجمنها و حمایت از حق تشکلهای مدنی روابط کار، حق سازماندهی و مذاکره دسته\u200cجمعی، تساوی مزدها برای زن و مرد در مقابل کار هم ارزش، منع تبعیض در اشتغال و حرفه، رعایت حداقل سن کار، ممنوعیت کار کودک، رعایت حداقل مزد متناسب با حداقل معیشت).', 186639]
 [0.17097510097612545, 'تبصره ۱۱ - بمنظور ایجاد مرجع صلاحیتدار و بیطرفی برای حل اختلافات بین کارگر و کارفرما و ایجاد حسن تفاهم در بین آنان و تمرکز امور مربوط بکار و مراقبت در تهیه و اجرای مقررات قانون کار و قانون بیمه کارگران و همچنین حمایت و تأمین بهداشت و رفاه و بالا بردن سطح زندگی کارگران و وضع و اجرای مقررات بیمه\u200cهای اجتماعی و برقرار نمودن روابط با تشکیلات بین\u200cالمللی کار وزارتخانه\u200cای بنام وزارت کار تأسیس می\u200cشود.', 128416]
 [0.15169625817516322, 'ث) حمایت از کارگران و نمایندگان آنها در برابر اقدامات انضباطی ناشی از اعمالی که آنها مطابق سیاست موضوع ماده (۴) فوق به طور معقول انجام داده\u200cاند.', 75037]
 [0.11213845051838162, 'ماده ۷ - دولتهای طرف این میثاق حق هر کس را به تمتع از شرایط عادلانه و مساعد کار که بویژه متضمن مراتب زیر باشد برسمیت بشناسند: الف - اجرتی که لااقل امور ذیل را برای کلیه کارگران تأمین نماید: ۱ - مزد منصفانه و اجرت مساوی برای کار با ارزش مساوی بدون هیچ نوع تمایز بویژه اینکه زنان تضمین داشته باشند که شرایط کار آنان پائین\u200cتر از\u200cشرایط مورد استفاده مردان نباشد و برای کار مساوی مزد مساوی با مردان دریافت دارند. ۲ - مزایای کافی برای آنان و خانواده\u200cشان طبق مقررات این میثاق: ب - ایمنی و بهداشت کار. ج - تساوی فرصت برای هر کس که بتواند در خدمت خود بمدارج مناسب عالیتری ارتقاء یابد بدون در نظر گرفتن هیچگونه ملاحظات دیگری جز\u200cطول مدت خدمت و لیاقت. د - استراحت - فراغت و محدودیت معقول ساعات کار و مرخصی اداری با استفاده از حقوق همچنین مزد ایام تعطیل رسمی.', 194273]
 [0.1079329839273747, '۲۰ - بمنظور تأمین شرائط مناسب\u200cتر کار و زندگی کارگران مهاجر نسبت بشرائطی که قانون یا رویه عملی برای سایر کارگران که در خدمت متشابه\u200cاشتغال دارند مقرر داشته و همچنین برای اینکه کارگران مهاجر هم مثل کارگران دیگر مشمول مقیاسهای حمایت بنحوی که در بندهای آتی این سفارش نامه خواهد آمد گردند کلیۀ مساعی باید صورت گیرد.', 8843]
 [0.0531841966906351, '۱۳ ایجاد نظام جامع تأمین اجتماعی برای حمایت از حقوق محرومان و مستضعفان و مبارزه با فقر و حمایت از نهادهای عمومی و موسسات و خیریه\u200cهای مردمی با رعایت ملاحظات دینی و انقلابی.', 213766]
 [0.05166811304646011, '۲ - ایجاد نظام جامع تأمین اجتماعی برای حمایت از حقوق محرومان و مستضعفان و مبارزه با فقر و حمایت از نهاد\u200cهای عمومی و موسسات و خیریه\u200cهای مردمی با رعایت ملاحظات دینی و انقلابی.', 53933]
 [0.051528153447387044, 'ج تقویت همسویی منافع کارگران و کارفرمایان و تکالیف دولت با رویکرد حمایت از تولید و سه\u200cجانبه گرایی', 185751]
 [0.024949120491999023, 'ماده ۲ - ۱ - هر یک از کشورهای عضو باید بوسائلی که منطبق با روشهای معمول جهت تعیین میزان اجرت باشد اجرای اصل تساوی اجرت کارگر\u200cزن و مرد را در قبال کار هم ارزش تشویق و تا حدودی که با روشهای فوق\u200cالذکر تطبیق نماید اجرای آنرا درباره عموم کارگران تأمین کند. ۲ - اجرای این اصل ممکن است بطرق زیر صورت گیرد: الف - وضع قوانین داخلی. ب - هر روشی که جهت تعیین میزان اجرت\u200cها ضمن قوانین پیش\u200cبینی\u200cشده باشد. ج - انعقاد پیمان\u200cهای دسته\u200cجمعی بین کارفرمایان و کارگران. د - ترکیبی از این روشهای مختلف.', 204904]
 [0.024270693471581787, 'ز تلاش در جهت گسترش امکانات رفاهی و حفظ حقوق قانونی کارکنان واحد.', 35580]
--- a/llm-answer/result.txt
+++ b/llm-answer/result.txt
--- a/llm-answer/samples.json
+++ b/llm-answer/samples.json
--- a/main.py
+++ b/main.py
@ -0,0 +1,59 @@
 import datetime
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi import FastAPI ,Header
 from openai import AsyncOpenAI
 from routes.rag_base import router as rag_base
 async def get_oss_client():
    LLM_URL = "http://172.16.29.102:8001/v1/"
    client = await AsyncOpenAI(base_url= LLM_URL, api_key="EMPTY")
    return client    
 def create_app() -> FastAPI:
    app = FastAPI(title="qachat2 Backend", version="0.1.0")
    origins = ["*"]
    app.add_middleware(
        CORSMiddleware,
        allow_origins=origins,
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
    # app.state.settings = get_settings()
    @app.on_event("startup")
    async def on_startup() -> None:
        print("startup app")     
        client = getattr(app.state, "oss_client", None)
        if not client : 
            client = get_oss_client()            
        app.state.oss_client = client
    @app.on_event("shutdown")
    async def on_shutdown() -> None:
        client = getattr(app.state, "elastic_client", None)
        if client is not None:
            await client.close()
    @app.get("/")
    async def simple():
        return "ai rag caht qanon OK"
    @app.get("/ping")
    async def ping():
        return "ai rag caht qanon OK"
    app.include_router(rag_base, prefix="")
    return app
 app = create_app()
--- a/routes/chatbot_handler.py
+++ b/routes/chatbot_handler.py
--- a/routes/rag_base.py
+++ b/routes/rag_base.py
@ -0,0 +1,47 @@
 from fastapi import APIRouter, Depends, HTTPException, Request
 from pydantic import BaseModel
 import routes.chatbot_handler as chatbot_handler
 import datetime
 import random
 router = APIRouter(tags=["ragchat"])
 # settings= get_settings()
 # تعریف مدل داده‌ها برای درخواست‌های API
 class RagQueryModal(BaseModel):
    query: str
 async def create_chat_id():
    date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
    chat_id = f'{date}-{random.randint(100000, 999999)}'
    return chat_id
@router.post("/emergency_call")
 async def emergency_call(payload: RagQueryModal):
    print('emergency generate answer ...')
    chat_id = await create_chat_id()
    answer = await chatbot_handler.ask_chatbot_avalai(payload.query, chat_id)
    # print('emergency answer ...', answer)
    await chatbot_handler.credit_refresh()
    print('*** ... ready for next ... ***')
    return {"answer": answer}
@router.post("/run_chat")
 async def run_chat(payload: RagQueryModal, request: Request):
    # request.state.app
    print('run_chat start ...')
    chat_id = await create_chat_id()
    answer = await chatbot_handler.ask_chatbot(payload.query, chat_id)
    print('*** ... ready for next ... ***')
    return {"answer": answer}
--- a/run_docker.bash
+++ b/run_docker.bash
@ -1,3 +1,3 @@
 docker stop qachat
 docker rm qachat
-docker run --name qachat -p 2425:80  --net qachat_net  --gpus=all -v  ./:/src/app/ -v ./qavanin-faiss/:/src/app/qavanin-faiss/ -v ./llm-answer/:/src/app/llm-answer/ -v ./../MODELS:/src/MODELS -v ./../cache:/root/.cache/huggingface/hub  -it  --restart unless-stopped  docker.tavasi.ir/tavasi/qachat:1.0.0
+docker run --name qachat -p 2425:80  --net qachat_net  --gpus=all -v  ./:/src/app/ -v ./qavanin-faiss/:/src/app/qavanin-faiss/ -v ./llm-answer/:/src/app/llm-answer/ -v ./../MODELS:/src/MODELS -v ./../cache:/root/.cache/huggingface/hub  -it  --restart unless-stopped  docker.tavasi.ir/tavasi/qachat2:1.0.0
--- a/util/convert_qavanin_json_to_faiss.py
+++ b/util/convert_qavanin_json_to_faiss.py
@ -0,0 +1,72 @@
 import json
 import numpy as np
 import faiss
 import os
 def create_faiss_index_from_json(json_file_path, faiss_index_path, metadata_file_path):
    print(f'try to read {json_file_path} ...')
    # --- 1. بارگذاری داده‌ها از JSON ---
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f'file reading finished')
    # فرض بر این است که هر عنصر شامل فیلدهای زیر است:
    # {
    #   "speech_title": "title",
    #   "sentence": "متن جمله",
    #   "embeddings": [0.12, 0.34, ...]
    # }
    sentences = []
    titles = []
    embeddings_list = []
    prefix_list = []
    for k, item in data.items():
        sentences.append(item['content'])
        titles.append(item['id'])
        embeddings_list.append(item['embeddings'])
        prefix_list.append(item['section-prefix'])
    embeddings = np.array(embeddings_list).astype('float32')  # ابعاد: (n, d)
    dimension = embeddings.shape[1]
    print(f"Loaded {len(embeddings)} embeddings with dimension {dimension}")
    # --- 2. ایجاد ایندکس FAISS برای GPU ---
    # اگر فقط CPU دارید، از faiss.IndexFlatL2 استفاده کنید.
    # اگر GPU دارید، ابتدا ایندکس را روی CPU ایجاد و سپس به GPU انتقال دهید.
    cpu_index = faiss.IndexFlatL2(dimension)  # معیار فاصله L2 (Euclidean)
    # انتقال ایندکس به GPU
    if faiss.get_num_gpus() > 0:
        print("Using GPU for FAISS index...")
        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index)
    else:
        print("GPU not available, using CPU.")
        gpu_index = cpu_index
    # --- 3. افزودن داده‌ها به ایندکس ---
    gpu_index.add(embeddings)
    print(f"Total vectors indexed: {gpu_index.ntotal}")
    # --- 4. ذخیره ایندکس به فایل ---
    # برای ذخیره باید به CPU منتقل شود
    final_index = faiss.index_gpu_to_cpu(gpu_index) if isinstance(gpu_index, faiss.Index) and faiss.get_num_gpus() > 0 else gpu_index
    os.makedirs(os.path.dirname(faiss_index_path), exist_ok=True)
    faiss.write_index(final_index, faiss_index_path)
    print(f"FAISS index saved to {faiss_index_path}")
    # --- 5. ذخیره متادیتا (برای نگاشت نتایج جستجو) ---
    metadata = [{"id": id, "content": c, 'prefix': p} for id, c, p in zip(titles, sentences,prefix_list)]
    with open(metadata_file_path, 'w', encoding='utf-8') as f:
        json.dump(metadata, f, ensure_ascii=False, indent=2)
    print(f"Metadata saved to {metadata_file_path}")
 if __name__ == '__main__':
    # استفاده از متد
    json_file_path = '../majles-output/sections-vec-285k.json'
    faiss_index_path = '../data/qavanin-faiss/faiss_index_qavanin_285k.index'
    metadata_file_path = '../data/qavanin-faiss/faiss_index_qavanin_285k_metadata.json'
    create_faiss_index_from_json(json_file_path, faiss_index_path, metadata_file_path)
--- a/util/docker_build.bash
+++ b/util/docker_build.bash
@ -0,0 +1,2 @@
 sudo docker build -t docker.tavasi.ir/tavasi/qachat_base:1.0.0 -f dockerfile_base .
 sudo docker build -t docker.tavasi.ir/tavasi/qachat2:1.0.0 .
--- a/util/elastic_helper.py
+++ b/util/elastic_helper.py
@ -0,0 +1,677 @@
 import zipfile
 import sys
 import os
 import json
 from time import sleep
 from elasticsearch7 import Elasticsearch,helpers
 class ElasticHelper():
    counter = 0
    total = 0
    id = ""
    path_mappings = os.getcwd() + '/repo/_other/'
    def __init__(self, es_url="http://127.0.0.1:6900", es_pass="", es_user="elastic", path_mappings = ""):
        if path_mappings : 
           self.path_mappings = path_mappings
        if es_pass == '' :
            self.es = Elasticsearch(es_url)
        else:
            self.es = Elasticsearch(
                es_url,
                http_auth=(es_user, es_pass),
            )
        # print(es_url)
        # print(self.es)
        self.success_connect = False
        for a in range(0,10):
            try :
                if not self.es.ping():
                    print('elastic not ping, sleep 30 s   : ', a)
                    sleep(5)
                    continue
                else:
                    self.success_connect = True
                    break
            except Exception as e:
                break
        if not self.success_connect :
            print('******','not access to elastic service')
            return
        self.counter = 0
        self.total = 0
        self.id = ""
    def get_doctument(self, index_name, id):
        res = self.es.get(index=index_name, id=id)
        return res
    def exist_doctument(self, index_name, id):
        res = self.es.exists(index=index_name, id=id)
        return res
    def update_index_doc(self, is_update_state, index_name_o, eid, data):
        if is_update_state:
            resp = self.es.update(index=index_name_o, id=eid, doc=data)
            # resp = self.es.update(index=index_name_o, id=eid, body={'doc':data})
        else:
            resp = self.es.index(index=index_name_o, id=eid, document=data)
        return resp    
    def exportToJsonForAI(self, path_back, index_name, out_name= '', body={}, fields=[]) :
        print('*' * 50, ' start backup -->', index_name)
        self.counter = 0
        sid = None
        out = out_name
        if out_name == '' :
           out = index_name 
        fout = open( path_back + "/"+ out + '.json', 'a+' , encoding='utf-8')
        s_res = self.es.search(
            index=index_name,
            scroll='5m',
            size=1000,
            body=body
        )
        self.total = s_res["hits"]["total"]['value']
        print('start index = %s' % index_name)
        print('total = %d' % self.total)
        sid = s_res['_scroll_id']
        scroll_size = len(s_res['hits']['hits'])
        file_count = 1
        out_json = []
        while scroll_size > 0:
            "Scrolling..."
            self.counter += scroll_size
            print("progress -> %.2f %%" % ((self.counter / self.total)*100))
            #############################
            for item in s_res['hits']['hits']:                
                if fields :
                   item2={}
                   item2['id']=item['_id']
                   for kf in  fields :
                       #print(kf)
                       if kf in item['_source'] :
                          # print(item['_source'][kf])
                          item2[kf] = item['_source'][kf]
                       #exit()
                else :
                    item2=item        
                out_json.append(item2)
            s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000)
            sid = s_res['_scroll_id']
            scroll_size = len(s_res['hits']['hits'])
        sid = None
        text = json.dumps(out_json, ensure_ascii=False) 
        fout.write(text)
        ##############################
    def backupIndexToZipfile(self, path_back, index_name, out_name= '', body={}, byzip = True, fields=[], noFields=[]) :
        print('*' * 50, ' start backup -->', index_name)
        self.counter = 0
        sid = None 
        out = out_name
        if out_name == '' :
           out = index_name 
        if body == {} :
            s_res = self.es.search(
                index=index_name,
                scroll='5m',
                size=1000
            )
        else:
            s_res = self.es.search(
                index=index_name,
                scroll='5m',
                size=1000,
                body=body
            )
        self.total = s_res["hits"]["total"]['value']
        if self.total == 0 :
           print('total index_name by query = %d' % self.total)
           return False
        if byzip:
            fout = zipfile.ZipFile(path_back + "/"+ out + '.zip', 'w')
        else:    
            fout = open( path_back + "/"+ out + '.json', 'a+' , encoding='utf-8')
        print('start index = %s' % index_name)
        print('total = %d' % self.total)
        sid = s_res['_scroll_id']
        scroll_size = len(s_res['hits']['hits'])
        file_count = 1
        while scroll_size > 0:
            "Scrolling..."
            self.counter += scroll_size
            print("progress -> %.2f %%" % ((self.counter / self.total)*100))
            #############################
            out_json = []
            for item in s_res['hits']['hits']:                
                if fields :
                   item2={}
                   item2['id']=item['_id']
                   item2['_source']={}
                   for kf in  fields :
                       if kf in item['_source'] :
                          item2['_source'][kf] = item['_source'][kf]
                else :
                    item2=item   
                if noFields :
                    for kf in  noFields :
                       if kf in item2['_source']:
                          del item2['_source'][kf]
                out_json.append(item2)
            text = json.dumps(out_json, ensure_ascii=False) 
            out_json = []
            if byzip:
                filename = out + str(file_count) + '.json'    
                file_count +=1
                fout.writestr(filename, text.encode('utf-8'), zipfile.ZIP_DEFLATED )
            else:    
                fout.write(text)
            ##############################
            s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000)
            sid = s_res['_scroll_id']
            scroll_size = len(s_res['hits']['hits'])
        sid = None    
        fout.close()
    def restorFileToElastic(self, path_back, index_name, app_key = '', queryDelete = True, map_name='') :
        if not os.path.exists(path_back) : 
            print(' **** error *** path not exist: ', path_back)  
            return False
        file_path = path_back + '/' + index_name + '.zip'
        if not os.path.exists(file_path ) :
            return False
        if queryDelete :
            # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند
            if  self.deleteIndex(index_name) :
                self.createIndex(index_name, app_key, map_name)
                self.zipFileToElastic(file_path, index_name)
        else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند
            self.createIndex(index_name, app_key, map_name)
            self.zipFileToElastic(file_path, index_name) 
    def restorFileToElastic2(self, path_file, index_name, app_key = '', queryDelete = True, map_name='') :
        if not os.path.exists(path_file) : 
            print(' **** error *** path not exist: ', path_file)  
            return False
        file_path = path_file
        if not os.path.exists(file_path ) :
            return False
        if queryDelete :
            # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند
            if  self.deleteIndex(index_name) :
                self.createIndex(index_name, app_key, map_name)
                self.zipFileToElastic(file_path, index_name)
        else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند
            self.createIndex(index_name, app_key, map_name)
            self.zipFileToElastic(file_path, index_name) 
    def renameElasticIndex(self, index_name_i, index_name_o, app_key = '', map_name='') :
        if self.createIndex(index_name_o, app_key, map_name) :
           res = self.es.reindex(
                    body={
                        "source": {"index": index_name_i},
                        "dest": {"index": index_name_o}
                    },
                    wait_for_completion=False)
           print(type(res))
           print(res)
           taskid = res["task"] if res["task"] else ""
           #tasks = client.TasksClient(self.es)
           tasks = self.es.tasks
           while True :               
               res = tasks.get(task_id = taskid)
               if res["completed"] :
                  break
               # print( res["task"])
               print( '----', index_name_o, '  imported : ', res["task"]["status"]["total"] , ' / ', res["task"]["status"]["created"])
               sleep(1)
           print( '----', index_name_o, '  complated')
    def deleteIndex(self, index_name) :
        if not self.es.indices.exists(index=index_name) :
            print(' ' * 10, " for delete NOT exist index :", index_name )
            return True
        question = 'Is DELETE elastic index (' + index_name +') ? ' 
        if self.query_yes_no(question) :
           self.es.indices.delete(index = index_name)  
           print('%' * 10 , "  Finish DELETE  index :", index_name )
           return True
        else :
            return False   
    def query_yes_no(self, question, default="no"):
        valid = { "yes": True, "y": True, "ye": True, "no": False, "n": False }
        if default is None:
            prompt = " [y/n] "
        elif default == "yes":
            prompt = " [Y/n] "
        elif default == "no":
            prompt = " [y/N] " 
        else:
            raise ValueError("invalid default answer: '%s'" % default)
        while True:
            print('%'*10, '  quistion ', '%'*10 , '\n')
            sys.stdout.write(question + prompt) 
            choice = input().lower()
            if default is not None and choice == "":
                return valid[default]
            elif choice in valid:
                return valid[choice]
            else:
                sys.stdout.write("لطفا یکی از موارد روبرو را وارد کنید : 'yes' or 'no' " "(or 'y' or 'n').\n") 
    def createIndexIfNotExist(self, index_name_o, mapping_o=""):
        try:
            if not self.es.indices.exists(index=index_name_o):
                response = self.es.indices.create(index=index_name_o, body=mapping_o)
                # print out the response:
                print("create index response:", response)
        except:
            print("....... index exist ! ... not created")
    def createIndex(self, index_name, app_key='', map_name=''):
        path_base = self.path_mappings
        path_mapping1 =  path_base + 'general/'
        if app_key == '' :
           app_key = 'tavasi'
        path_mapping2 =  path_base + app_key + '/'
        if map_name == '':
            map_name = index_name
        if self.es.indices.exists(index=index_name) :
            print("============== exist index :", index_name )
            return True  
        if map_name == 'mj_rg_section' or map_name == 'semantic_search' :
            map_name = 'mj_qa_section'
        elif map_name[-3]=='_ai':
            map_name=[0-len(map_name)-3]
            print(map_name)
        mapping_file_path = path_mapping1 + map_name + '.json'
        print("mapping_file_path : " , mapping_file_path)
        if not os.path.isfile(mapping_file_path):
            if not os.path.isfile(mapping_file_path):
                mapping_file_path = path_mapping2 + map_name + '.json'
        print("mapping_file_path : " , mapping_file_path)
        # Create Index With Mapping
        if os.path.isfile(mapping_file_path):
            mapping_file = open( mapping_file_path,'r', encoding='utf-8' )
            mapping_file_read = mapping_file.read()
            mapping_data = json.loads(mapping_file_read)
            mapping_file.close()   
            if self.es.indices.exists(index=index_name) :
                print("============== exist index :", index_name )
            else :                   
                self.es.indices.create(index = index_name , body = mapping_data) 
            return True   
        else:       
            print('*** error not find maping file elastic : *******',  mapping_file_path)     
            return False
    def updateBulkList(self, listData, index_name):
        chunk_size=1000
        raise_on_error=False
        raise_on_exception=False
        stats_only=True
        yield_ok = False
        actions=[]
        for item in listData:
            actions.append({
                            "_op_type": "update", 
                            "_index": index_name,
                            "_id"   : item['_id'],
                            "doc": item['_source']
                        } 
            )                                       
        helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
    def importBulkList(self, listData, index_name):
        chunk_size=100000
        raise_on_error=False
        raise_on_exception=False
        stats_only=True
        yield_ok = False
        for item in listData:
            actions = [{
                            "_op_type": "index", 
                            "_index": index_name,
                            "_id"   : item['_id'],
                            "_source": item['_source']
                        } 
                    ]                                             
            helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
    def importJsonDataToElastic(self, jsonData, index_name, fields=[]):
        chunk_size=1000
        raise_on_error=False
        raise_on_exception=False
        stats_only=True
        yield_ok = False
        actions=[]
        for item in jsonData:
            id = item['_id'] if item['_id'] else item['id']
            source = item['_source']
            if fields :
                source = {}
                for col in fields :
                    if col in item['_source'] :
                      source[col] = item['_source']
            actions.append({
                            "_op_type": "index", 
                            "_index": index_name,
                            "_id"   : id,
                            "_source": source
                        }) 
        helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
    def fileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]):
            if not os.path.exists(file_path):
                print("file zip:" , file_path , " not exist")
                return
            print("index:" , index_name , '=>' , file_path ) 
            self.counter = 0
            with open(file_path) as file:  
                data = json.loads(file.read())
                self.importJsonDataToElastic(data, index_name, fields)
            self.es.indices.refresh(index=index_name)
            print(self.es.cat.count(index=index_name, format="json"))
    def zipFileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]):
            if not os.path.exists(file_path):
                print("file zip:" , file_path , " not exist for imort to elastic : ", index_name )
                return
            fileNo = 0
            with zipfile.ZipFile(file_path, 'r') as zObject:
                fileNo +=1
                print("="*10, " zip fileNo: " , fileNo ,"  - ( ", index_name," ) | File Numbers:" ,len(zObject.namelist()) , "=" * 10)
                packNo = 0   
                self.counter = 0
                for filename in zObject.namelist(): 
                    packNo += 1
                    if limit_pack != -1 :
                        if packNo > limit_pack :
                            print('limit_data  ', index_name, '  ', limit_pack)
                            break
                    print("index:" , index_name , '=>' , filename ) 
                    with zObject.open(filename) as file:  
                        data = json.loads(file.read())
                        self.importJsonDataToElastic(data, index_name, fields)
                self.es.indices.refresh(index=index_name)
                print(self.es.cat.count(index=index_name, format="json"))
                print(" END Of Import to elastic ", index_name ,"\n")
    def iterateJsonFile(self, file_path, isZip=True, limit_pack = -1):
            if not os.path.exists(file_path):
                print("file zip:" , file_path , " not exist  iterateJsonFile " )
                return
            if isZip :
                fileNo = 0
                with zipfile.ZipFile(file_path, 'r') as zObject:
                    fileNo +=1
                    print("="*10, " zip fileNo: " , fileNo ,"  iterateJsonFile - | File Numbers:" ,len(zObject.namelist()) , "=" * 10)
                    packNo = 0   
                    self.counter = 0
                    for filename in zObject.namelist(): 
                        packNo += 1
                        if limit_pack != -1 :
                            if packNo > limit_pack :
                                print('limit_data  iterateJsonFile   ', limit_pack)
                                break
                        print("index iterateJsonFile :", '=>' , filename ) 
                        with zObject.open(filename) as file:  
                            data = json.loads(file.read())
                            # Yield each entry
                            # yield data
                            yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data)
            else :
                with open(filename, 'r', encoding='utf-8') as file:  
                    data = json.loads(file.read())
                    # Yield each entry
                    # yield from (hit for hit in data)
                    #return data
                    yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data)
    def es_iterate_all_documents(self, index, body="", pagesize=250, scroll_timeout="25m", **kwargs):
        """
        Helper to iterate ALL values from a single index
        Yields all the documents.
        """
        is_first = True
        while True:
            # Scroll next
            if is_first:  # Initialize scroll
                # result = self.es.search(index=index, scroll="2m", **kwargs, body={
                #     "size": pagesize
                # })
                if body : 
                    result = self.es.search(
                        index=index,
                        scroll=scroll_timeout,
                        **kwargs,
                        size=pagesize,
                        body=body
                    )
                else :
                    result = self.es.search(
                        index=index,
                        scroll=scroll_timeout,
                        **kwargs,
                        size=pagesize
                    )
                self.total = result["hits"]["total"]["value"]
                if self.total > 0:
                    print("total = %d" % self.total)
                is_first = False
            else:
                # result = es.scroll(body={
                #     "scroll_id": scroll_id,
                #     "scroll": scroll_timeout
                # })
                result = self.es.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
            scroll_id = result["_scroll_id"]
            hits = result["hits"]["hits"]
            self.counter += len(hits)
            if self.total > 0 :
                print("progress -> %.2f %%" % ((self.counter / self.total) * 100))
            # Stop after no more docs
            if not hits:
                break
            # Yield each entry
            yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in hits)
    def moveCustomFileds(self, index_name_i, index_name_o, fields=[], renameFileds={}):
        try:
            body = {}
            list = []
            try:
                list = self.es_iterate_all_documents(index_name_i)
            except Exception as e:
                print(e)
            count = 0
            for mentry in list:
                count += 1
                entry = mentry["source"]
                id = mentry["id"]
                # print(id)
                eid = id
                if (count % 100) == 0 :   
                    print("%s -> %.2f " % (id , (count / self.total) if self.total > 0 else 0))
                data_filled = False
                data = {}
                for col in fields:
                    if '.' in col :
                        cols = col.split('.')
                        subsource = entry 
                        for sub in cols :
                            dCol = subsource.get(sub, None)
                            if dCol :
                               subsource = dCol
                            else :
                                break
                    else : 
                        dCol = entry.get(col, None)
                    if dCol is None:
                        continue
                    if col in renameFileds :                       
                        data[renameFileds[col]] = dCol    
                    else:
                        data[col] = dCol    
                    data_filled = True
                if not data_filled :
                    continue
                try:
                    resp = self.update_index_doc(True, index_name_o, eid, data)
                except Exception as e:
                    print(e)
                    # save_error(id, e)
        except Exception as e:
            # print("1111")
            print(e)
            # save_error(id, e)
    def mappingIndex(self, index_name_i):
        # فقط از طریق کیبانا میشه تغییر مپ داد 
        #  با پایتون نمیشه 
        # باید ایندکس جدیدی با مپ مطلوب ایجاد کرد و رایندکس کرد
        pass
    def updateByQueryIndex(self, index_name_i, body):
        ## sample
        # body = {
        #         "script": {
        #             "inline": "ctx._source.Device='Test'",
        #             "lang": "painless"
        #         },
        #         "query": {
        #             "match": {
        #                 "Device": "Boiler"
        #             }
        #         }
        #     }
        try:
            self.es.update_by_query(body=body, index=index_name_i)
        except Exception as e:
            print(e)
            # save_error(id, e)    
    def deleteByQueryIndex(self, index_name_i, body):
        ## sample
        # body = {
        #         "query": {
        #             "match": {
        #                 "Device": "Boiler"
        #             }
        #         }
        #     }
        try:
            self.es.delete_by_query(index=index_name_i, body=body )
        except Exception as e:
            print(e)
            # save_error(id, e)   
    def delete_by_ids(self, index_name_i, ids):
        try:
            # ids = ['test1', 'test2', 'test3'] 
            query = {"query": {"terms": {"_id": ids}}}
            res = self.es.delete_by_query(index=index_name_i, body=query)
            print(res)                    
        except Exception as e:
            print(e)
            # save_error(id, e)   
--- a/util/embedder_sbert_qavanin_285k.py
+++ b/util/embedder_sbert_qavanin_285k.py
@ -0,0 +1,681 @@
 # !pip install hazm
 # !pip install transformers==4.26.0
 # !pip install --upgrade numpy
 # !pip install --upgrade sentence-transformers
 """
 Persian Sentence Processing and Vector Analysis
 ==============================================
 This script processes Persian sentences from a JSON file and performs:
 1. Word extraction and preprocessing
 2. Vector representation using multilingual transformer
 3. Similarity analysis for key words
 4. Dimensionality reduction to 3D
 5. 3D visualization with Persian labels
 Author: NLP Expert Assistant
 """
 import json
 import re
 import numpy as np
 import pandas as pd
 from typing import List, Dict, Tuple, Set
 from collections import Counter
 import logging
 from pathlib import Path
 # NLP and ML libraries
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
 from sklearn.metrics.pairwise import cosine_similarity
 #from normalizer import cleaning
 try:
    from util.elastic_helper import ElasticHelper
 except Exception as error:
    eee = error
    pass
 # Visualization libraries
 # import matplotlib.pyplot as plt
 # import plotly.graph_objects as go
 # import plotly.express as px
 # from plotly.subplots import make_subplots
 # Persian text processing
 # import hazm
 # from hazm import Normalizer, word_tokenize, POSTagger
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 class PersianVectorAnalyzer:
    """
    A comprehensive class for Persian text processing and vector analysis.
    """
    def __init__(self, model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
        """
        Initialize the analyzer with the specified model.
        Args:
            model_name: The sentence transformer model to use
        """
        self.model_name = model_name
        self.model = None
        #self.normalizer = Normalizer()
        self.stop_words = self._load_persian_stop_words()
        self.key_words = [
            "خدا", "بنده", "جهاد", "ولی", "زکات",
            "نماز", "صبر", "عبادت", "ولایت", "خلافت","پیامبر"
        ]
        logger.info(f"Initializing Persian Vector Analyzer with model: {model_name}")
    def _load_persian_stop_words(self) -> Set[str]:
        """
        Load Persian stop words.
        Returns:
            Set of Persian stop words
        """
        # Common Persian stop words
        stop_words = {
            'و', 'در', 'به', 'از', 'که', 'این', 'آن', 'با', 'برای', 'تا',
            'را', 'هم', 'یا', 'اما', 'اگر', 'چون', 'چرا', 'چگونه', 'کجا',
            'چه', 'کی', 'چند', 'چقدر', 'همه', 'هیچ', 'بعضی', 'هر', 'همه',
            'خود', 'خویش', 'ما', 'شما', 'آنها', 'ایشان', 'اینها', 'آنها',
            'من', 'تو', 'او', 'ما', 'شما', 'آنها', 'ایشان', 'اینها',
            'است', 'هست', 'بود', 'شد', 'می', 'باید', 'خواهد', 'دارد',
            'کرد', 'شد', 'بود', 'هست', 'است', 'می‌شود', 'می‌کند',
            'یک', 'دو', 'سه', 'چهار', 'پنج', 'شش', 'هفت', 'هشت', 'نه', 'ده',
            'اول', 'دوم', 'سوم', 'چهارم', 'پنجم', 'ششم', 'هفتم', 'هشتم', 'نهم', 'دهم',
            'سال', 'ماه', 'روز', 'هفته', 'ساعت', 'دقیقه', 'ثانیه','پس'
            'بله', 'نه', 'آری', 'خیر', 'بلی', 'نخیر',
            'حالا', 'الان', 'امروز', 'دیروز', 'فردا', 'هفته', 'ماه', 'سال',
            'بالا', 'پایین', 'چپ', 'راست', 'جلو', 'عقب', 'داخل', 'خارج',
            'بزرگ', 'کوچک', 'بلند', 'کوتاه', 'پهن', 'باریک', 'ضخیم', 'نازک',
        }
        return stop_words
    def load_model(self):
        """
        Load the sentence transformer model.
        """
        try:
            logger.info("Loading sentence transformer model...")
            self.model = SentenceTransformer(self.model_name)
            logger.info("Model loaded successfully!")
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            raise
    def split_sentence(self, sentence:str):
        sentences = []
        sentence_len = len(self.tokenize_sentence(sentence))
        if sentence_len < 512:
            sentences.append(sentence)
        else:
            temp_sentences = str(sentence).split('.')
            for sent in temp_sentences:
                sent_len = len(self.tokenize_sentence(sent))
                if sent_len > 512:
                    temp_sentences_2 = str(sent).split('،')
                    for snt in temp_sentences_2:
                        sentences.append(snt)
                else:
                    sentences.append(sent)
        return sentences
    def load_json_data(self, file_path: str) -> List[str]:
        """
        Load Persian sentences from JSON file.
        Args:
            file_path: Path to the JSON file
        Returns:
            List of Persian sentences
        """
        try:
            logger.info(f"Loading data from {file_path}")
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                # convert dict{dict} to list[dict]
                if type(data) == dict:
                    temp_data = []
                    for item in data.items():
                        temp_data.append(item[1])
                    data = temp_data
            sentences = []
            if isinstance(data, list):
                for index, item in enumerate(data):
                    print(f'split sentence {index}')
                    if isinstance(item, dict):
                        if item['content'] == '':
                            continue
                        sentences.append([item['id'],item['content'].strip()])
                        # for key in ['content']:
                        #     if key in item and item[key]:
                        #         # splited_sentences = self.split_sentence(item[key])
                        #         # splited_sentences = item[key]
                        #         sentences.append(item[key])
                        #         # for sent in splited_sentences:
                        #         #     sentences.append(sent)
                        #     else:
                        #         print('fault '+item['sentence-number'])
                    elif isinstance(item, str):
                        # splited_sentences = self.split_sentence(item[key])
                        sentences.append(item)
                        # for sent in splited_sentences:
                        #     sentences.append(sent)
            elif isinstance(data, dict):
                # If it's a single object, extract all string values
                for value in data.values():
                    if isinstance(value, str):
                        sentences.append(value)
                        # splited_sentences = str(value).split('.')
                        # for sent in splited_sentences:
                        #     sentences.append(sent)
            sentences = [senten for senten in sentences if senten]
            logger.info(f"Loaded {len(sentences)} sentences")
            return sentences
        except Exception as e:
            logger.error(f"Error loading JSON data: {e}")
            raise
    def preprocess_text(self, text: str) -> str:
        """
        Preprocess Persian text.
        Args:
            text: Raw Persian text
        Returns:
            Preprocessed text
        """
        # Normalize text
        #text = self.normalizer.normalize(text)
        # Remove extra whitespace
        text = re.sub(r'\s+', ' ', text)
        # Remove special characters but keep Persian characters
        text = re.sub(r'[^\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\s]', '', text)
        return text.strip()
    def tokenize_sentence(self, sentence:str):
        try:
            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            # print(self.model_name)
            tokens = tokenizer.tokenize(sentence)
            return tokens
        except:
            error = "An exception occurred in tokenizer : " + self.model_name
            #file.write( error + '\n' )
            return []
    def extract_words(self, sentences: List[str]) -> List[str]:
        """
        Extract all words from sentences.
        Args:
            sentences: List of Persian sentences
        Returns:
            List of all words
        """
        all_words = []
        for sentence in sentences:
            # Preprocess sentence
            processed_sentence = self.preprocess_text(sentence)
            # Tokenize
            words = word_tokenize(processed_sentence)
            # words = processed_sentence.split()
            # Filter out empty strings and very short words
            words = [word for word in words if len(word) > 1]
            all_words.extend(words)
        logger.info(f"Extracted {len(all_words)} words from {len(sentences)} sentences")
        return all_words
    def remove_stop_words(self, words: List[str]) -> List[str]:
        """
        Remove stop words from the word list.
        Args:
            words: List of words
        Returns:
            List of words without stop words
        """
        filtered_words = [word for word in words if word not in self.stop_words]
        logger.info(f"Removed {len(words) - len(filtered_words)} stop words")
        return filtered_words
    def get_unique_words(self, words: List[str]) -> List[str]:
        """
        Get unique words from the list.
        Args:
            words: List of words
        Returns:
            List of unique words
        """
        unique_words = list(set(words))
        logger.info(f"Found {len(unique_words)} unique words from {len(words)} total words")
        return unique_words
    def compute_word_vectors(self, sentences: List[str]) -> Dict[str, List[float]]:
        """
        Compute vector representations for words.
        Args:
            sentences: List of unique sentences
        Returns:
            Dictionary mapping sentences to their vector representations
        """
        if self.model is None:
            self.load_model()
        logger.info(f"Computing vectors for {len(sentences)} sections ...")
        # print(sentences[0])
        # create list of just sentences
        just_sentences = [sent['content'] for sent in sentences]
        # Compute embeddings
        embeddings = self.model.encode(just_sentences, show_progress_bar=True)
        # Create dictionary
        sentences_vectors = {}
        for i, sent in enumerate(sentences):
            sentences_vectors[f'sentence-{sentences[i]["id"]}'] = {
                'id': sentences[i]['id'],
                'fullpath': sentences[i]['fullpath'],
                'qanon-title': sentences[i]['qanon-title'],
                'section-prefix': sentences[i]['section-prefix'],
                'content': sentences[i]['content'],
                'embeddings': embeddings[i].tolist()
            }
            print(f'section {i} embedded!')
        logger.info("section vectors computed successfully!")
        return sentences_vectors
    def find_closest_words(self, word_vectors: Dict[str, List[float]],
                          key_words: List[str], top_k: int = 20) -> Dict[str, List[str]]:
        """
        Find the closest words to each key word.
        Args:
            word_vectors: Dictionary of word vectors
            key_words: List of key words to find neighbors for
            top_k: Number of closest words to find
        Returns:
            Dictionary mapping key words to their closest neighbors
        """
        logger.info(f"Finding {top_k} closest words for {len(key_words)} key words...")
        # Convert to numpy arrays for faster computation
        words = list(word_vectors.keys())
        vectors = np.array(list(word_vectors.values()))
        closest_words = {}
        for key_word in key_words:
            if key_word in word_vectors:
                # Get the key word vector
                key_vector = np.array(word_vectors[key_word]).reshape(1, -1)
                # Compute cosine similarities
                similarities = cosine_similarity(key_vector, vectors)[0]
                # Get indices of top k similar words (excluding the key word itself)
                word_indices = np.argsort(similarities)[::-1]
                # Filter out the key word itself and get top k
                closest_indices = []
                for idx in word_indices:
                    if words[idx] != key_word and len(closest_indices) < top_k:
                        closest_indices.append(idx)
                # Get the closest words
                closest_words[key_word] = [words[idx] for idx in closest_indices]
                logger.info(f"Found {len(closest_words[key_word])} closest words for '{key_word}'")
            else:
                logger.warning(f"Key word '{key_word}' not found in word vectors")
                closest_words[key_word] = []
        return closest_words
    def reduce_to_3d(self, word_vectors: Dict[str, List[float]],
                    method: str = 'tsne') -> Dict[str, List[float]]:
        """
        Reduce word vectors to 3D coordinates.
        Args:
            word_vectors: Dictionary of word vectors
            method: Dimensionality reduction method ('pca' or 'tsne')
        Returns:
            Dictionary mapping words to their 3D coordinates
        """
        logger.info(f"Reducing dimensions to 3D using {method.upper()}...")
        words = list(word_vectors.keys())
        vectors = np.array(list(word_vectors.values()))
        if method.lower() == 'pca':
            reducer = PCA(n_components=3, random_state=42)
        elif method.lower() == 'tsne':
            reducer = TSNE(n_components=3, random_state=42, perplexity=min(30, len(vectors)-1))
        else:
            raise ValueError("Method must be 'pca' or 'tsne'")
        # Reduce dimensions
        reduced_vectors = reducer.fit_transform(vectors)
        # Create dictionary
        word_vectors_3d = {}
        for i, word in enumerate(words):
            word_vectors_3d[word] = reduced_vectors[i].tolist()
        logger.info("Dimensionality reduction completed!")
        return word_vectors_3d
    def save_json(self, data: dict, file_path: str):
        """
        Save data to JSON file.
        Args:
            data: Data to save
            file_path: Output file path
        """
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, ensure_ascii=False, indent=2)
            logger.info(f"Data saved to {file_path}")
        except Exception as e:
            logger.error(f"Error saving to {file_path}: {e}")
            raise
    # def create_3d_visualization(self, word_vectors_3d: Dict[str, List[float]],
    #                            selected_words: Dict[str, List[str]],
    #                            output_path: str = "persian_words_3d.html"):
    #     """
    #     Create 3D visualization of words.
    #     Args:
    #         word_vectors_3d: Dictionary of 3D word coordinates
    #         selected_words: Dictionary of selected words for each key word
    #         output_path: Output file path for the visualization
    #     """
    #     logger.info("Creating 3D visualization...")
    #     # Prepare data for plotting
    #     words = list(word_vectors_3d.keys())
    #     coords = np.array(list(word_vectors_3d.values()))
    #     # Create color mapping for key words and their neighbors
    #     colors = []
    #     sizes = []
    #     hover_texts = []
    #     for word in words:
    #         # Check if word is a key word
    #         is_key_word = word in self.key_words
    #         # Check if word is in selected words
    #         in_selected = False
    #         key_word_group = None
    #         for key_word, selected_list in selected_words.items():
    #             if word in selected_list:
    #                 in_selected = True
    #                 key_word_group = key_word
    #                 break
    #         if is_key_word:
    #             colors.append('red')
    #             sizes.append(15)
    #             hover_texts.append(f"کلیدواژه: {word}")
    #         elif in_selected:
    #             colors.append('blue')
    #             sizes.append(10)
    #             hover_texts.append(f"کلمه مرتبط با '{key_word_group}': {word}")
    #         else:
    #             colors.append('lightgray')
    #             sizes.append(5)
    #             hover_texts.append(f"کلمه: {word}")
    #     # Create 3D scatter plot
    #     fig = go.Figure()
    #     # Add scatter plot
    #     fig.add_trace(go.Scatter3d(
    #         x=coords[:, 0],
    #         y=coords[:, 1],
    #         z=coords[:, 2],
    #         mode='markers+text',
    #         marker=dict(
    #             size=sizes,
    #             color=colors,
    #             opacity=0.8
    #         ),
    #         text=words,
    #         textposition="middle center",
    #         hovertext=hover_texts,
    #         hoverinfo='text'
    #     ))
    #     # Update layout
    #     fig.update_layout(
    #         title={
    #             'text': 'نمایش سه‌بعدی کلمات فارسی',
    #             'x': 0.5,
    #             'xanchor': 'center',
    #             'font': {'size': 20}
    #         },
    #         scene=dict(
    #             xaxis_title='محور X',
    #             yaxis_title='محور Y',
    #             zaxis_title='محور Z',
    #             camera=dict(
    #                 eye=dict(x=1.5, y=1.5, z=1.5)
    #             )
    #         ),
    #         width=1000,
    #         height=800,
    #         showlegend=False
    #     )
    #     # Save the plot
    #     fig.write_html(output_path)
    #     logger.info(f"3D visualization saved to {output_path}")
    #     return fig
    def process_pipeline(self, input_file: str, output_dir: str = "output"):
        """
        Run the complete processing pipeline.
        Args:
            input_file(str): Path to input JSON file
            output_dir(str): Output directory for results
        """
        # Create output directory
        Path(output_dir).mkdir(exist_ok=True)
        logger.info("Starting Persian Vector Analysis Pipeline...")
        # Step 1: Load data
        # sentences = self.load_json_data(input_file)
        sentences = ALL_SECTIONS
        # for s in sentences:
        #     s_len = len(self.tokenize_sentence(s))
        #     if s_len > 512:
        #         print(f'long: {s}')
        # Step 2: Extract words
        # all_words = self.extract_words(sentences)
        # Step 3: Remove stop words
        # filtered_words = self.remove_stop_words(all_words)
        # filtered_words = all_words
        # Step 4: Get unique words
        # unique_words = self.get_unique_words(filtered_words)
        # Step 5: Compute word vectors
        sentences_vectors = self.compute_word_vectors(sentences)
        # Step 6: Save word vectors
        self.save_json(sentences_vectors, f"{output_dir}/sections-vec-285k.json")
        # Step 7: Find closest words to key words
        # selected_words = self.find_closest_words(word_vectors, self.key_words)
        # Step 8: Save selected words
        # self.save_json(selected_words, f"{output_dir}/selected_words.json")
        # Step 9: Reduce to 3D
        # word_vectors_3d = self.reduce_to_3d(word_vectors, method='tsne')
        # Step 10: Save 3D vectors
        # self.save_json(word_vectors_3d, f"{output_dir}/words_vector_3d.json")
        # Step 11: Create visualization
        # self.create_3d_visualization(word_vectors_3d, selected_words,
                                #    f"{output_dir}/persian_words_3d.html")
        logger.info("Pipeline completed successfully!")
        # Print summary
        print("\n" + "="*50)
        print("PIPELINE SUMMARY")
        print("="*50)
        print(f"Input sentences: {len(sentences)}")
        # print(f"Total words extracted: {len(all_words)}")
        # print(f"Unique words after preprocessing: {len(unique_words)}")
        # print(f"Word vectors computed: {len(word_vectors)}")
        # print(f"Key words processed: {len(self.key_words)}")
        print(f"Output files saved to: {output_dir}/")
        print("="*50)
 def full_path_text_maker(full_path):
    """
    این متد مسیر یک سکشن را می گیرد و متنی را بر اساس ترتیب بخش های آن از جزء به کل بازسازی می کند و بر می گرداند
    Args:
        full_path(list): لیستی از عناصر مشخص کننده مسیر درختی این سکشن
    Returns:
        full_path_text(str): متن بازسازی شده از مسیر یک سکشن
    """
    full_path_text = ""
    for i, path_item in enumerate(reversed(full_path)):
        if i == len(full_path) - 1:
            full_path_text += ''.join(f'{path_item}')
            break
        full_path_text += ''.join(f'{path_item} از ')
    full_path_text = full_path_text.strip()
    return full_path_text
 def main():
    """
    Main function to run the Persian Vector Analysis.
    """
    # Initialize analyzer
    analyzer = PersianVectorAnalyzer()
    # Define input and output paths
    # input_file = "./output-speechs/nahj_speechs_sentences.json"
    # output_dir = "output-speechs"
    # input_file = "./majles/data/sections.json"
    input_file = ""
    output_dir = "./data/majles-output"
    # Run the complete pipeline
    analyzer.process_pipeline(input_file, output_dir)
 if __name__ == "__main__":
    eh_obj = ElasticHelper()
    path = "/home/gpu/data_11/14040611/mj_qa_section.zip"
    sections_elastic = eh_obj.iterateJsonFile(path, True)
    all_count = 0
    dont_cares = []
    ALL_SECTIONS = []
    for index, item in enumerate(sections_elastic):
        all_count +=1
        source = item['source']
        section_path = source['other_info']['full_path']
        id = item['id']
        filtered_keys = ['فصل','موخره','امضاء','عنوان']
        section_path = source['other_info']['full_path']
        flag = False
        if '>' in section_path:
            path_parts = section_path.split('>')
            for key in filtered_keys:
                if key in path_parts[-1]:
                    dont_cares.append(id)
                    flag = True
                    break
            if flag:
                continue
        else:
            for key in filtered_keys:
                if key in section_path:
                    dont_cares.append(id)
                    flag = True
                    break
            if flag:
                continue
        qanon_title = source['qanon_title']
        full_path_text = full_path_text_maker(section_path.split('>'))
        section_prefix = f"محتوای {full_path_text} {cleaning(qanon_title)} عبارت است از: "
        try:
            content = cleaning(item['source']['content'])
            # کنار گذاشتن سکشن های خیلی کوچک که عملا محتوا ندارند
            if len(content.split()) <= 10:
                continue
        except Exception as error:
            print(error)
            continue
        data = {
            'id': id,
            'fullpath': section_path,
            'qanon-title': qanon_title,
            'section-prefix': section_prefix,
            'content': content
        }
        ALL_SECTIONS.append(data)
    print(f'all_count: {all_count}')
    print(f'dont_cares: {len(dont_cares)}')
    print(f'ALL_SECTIONS without dont-cares: {len(ALL_SECTIONS)}')
    main()
    """
    :: *** نکته مهم *** ::
    NOTE !!! after this process run convert_qavanin_json_to_faiss.py due to create faiss index which is used in RAG process
    """
--- a/util/normalizer.py
+++ b/util/normalizer.py
@ -0,0 +1,76 @@
 #import hazm
 from cleantext import clean
 import re
 def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext
 #normalizer = hazm.Normalizer()
 wierd_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u'\U00010000-\U0010ffff'
        u"\u200d"
        u"\u2640-\u2642"
        u"\u2600-\u2B55"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\u3030"
        u"\ufe0f"
        u"\u2069"
        u"\u2066"
        # u"\u200c"
        u"\u2068"
        u"\u2067"
        "]+", flags=re.UNICODE)
 def cleaning(text):
    text = text.strip()
    # regular cleaning
    # text = clean(text,
    #     fix_unicode=True,
    #     to_ascii=False,
    #     lower=True,
    #     no_line_breaks=True,
    #     no_urls=True,
    #     no_emails=True,
    #     no_phone_numbers=True,
    #     no_numbers=False,
    #     no_digits=False,
    #     no_currency_symbols=True,
    #     no_punct=False,
    #     replace_with_url="",
    #     replace_with_email="",
    #     replace_with_phone_number="",
    #     replace_with_number="",
    #     replace_with_digit="0",
    #     replace_with_currency_symbol="",
    # )
    text = clean(text,
                 extra_spaces = True,
                 lowercase = True
                 ) 
    # cleaning htmls
    text = cleanhtml(text)
    # normalizing
    #text = normalizer.normalize(text)
    # removing wierd patterns
    text = wierd_pattern.sub(r'', text)
    # removing extra spaces, hashtags
    text = re.sub("#", "", text)
    text = re.sub("\s+", " ", text)
    return text
`@ -6,5 +6,5 @@ COPY . /src/app`

	`EXPOSE 80`	`EXPOSE 80`

	`CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80","--host=0.0.0.0"]`	`CMD [ "uvicorn","main:app","--reload","--port","80","--host=0.0.0.0"]`
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.66
-.8
-.8
-.8
-.33
-.33
-.33
-.33
-.33
-.33
-.33
-.33
-.33
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.53
-.44
-.44
-.44
-.44
-.44
-.44
-.25
-.25
-.25
-.25
-.25
-.25
-.25
-.25
-.64
-.64
-.64
-.64
-.64
-.64
-.64
-.64
-.4
-.4
-.4
-.43
-.43
-.32
-.98
-.98
-.98
-.01
-.02
-.02
-.02
-.02
-.2
-.2
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.13
-.0
-.05
-.05
-.1
-.01
-.16
-.16
-.16
-.16
-.16
-.16
-.11
-.11
-.15
-.78
-.78
-.78
-.69
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.07
-.63
-.63
-.63
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.67
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.35
-.09
-.65
-.13
-.53
-.41
-.41
-.29
-.9
-.9
-.67
-.67
-.67
-.67
-.93
-.33
-.57
-.5
-.5
-.47
-.47
-.35
-.83
-.83
-.83
-.83
-.66
-.66
-.66
-.66
-.66
-.66
-.66
-.15
-.15
-.82
-.75
-.75
-.86
-.13
-.76
-.86
-.19
-.97
-.25
-.35
-.39
-.5
-.93
-.27
-.29
-.76
-.21
-.04
-.47
-.87
-.15
-.68
-.68
-.47
-.95
-.88
-.16
-.23
-.6
-.38
-.13
-.61
-.69
-.34
-.44
-.63
-.84
-.16
-.25
-.23
-.18
-.4
-.09
-.85
-.47
-.78
-.65
-.94
-.89
-.28
-.89
-.52
-.94
-.51
-.72
-.0
-.81
-.06
-.76
-.26
-.4
-.79
-.9
-.53
-.69
-.6
-.47
-.04
-.76
-.0
-.19
-.66
-.93
-.03
-.22
-.89
-.82
-.72
-.96
-.47
-.6
-.33
-.28
-.11
-.65
-.03
-.74
-.07
-.93
-.69
-.45
-.09
-.49
-.13
-.72
-.63
-.79
-.02
-.35
-.24
-.15
-.43
-.25
-.95
-.93
-.53
-.59
-.86
-.13
-.29
-.68
-.11
-.01
-.61
-.55
-.0
-.56
-.84
-.84
-.03
-.31
-.79
-.39
-.55
-.88
-.76
-.76
-.21
-.21
-.25
-.66
-.66
-.26
-.25
-.58
-.9
-.18
-.18
-.18
-.19
-.6
-.03
-.73
-.73
-.73
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.47
-.09
-.63
-.63
-.63
-.63
-.63
-.63
-.63
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.82
-.5
-.02
-.3
-.78
-.78
-.08
-.08
-.08
-.08
-.08
-.08
-.08
-.08
-.08
-.08
-.08
-.31
-.31
-.36
-.36
-.36
-.85
-.85
-.68
-.32
-.32
-.5
-.85
-.85
-.2
-.37
-.37
-.37
-.37
-.37
-.94
-.03
-.2
-.97
-.58
-.58
-.58
-.58
-.58
-.58
-.58
-.58
-.58
-.58
-.16
-.16
-.16
-.16
-.16
-.16
-.16
-.16
-.16
-.03
-.03
-.03
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.8
-.5
-.5
-.5
-.05
-.48
-.48
-.71
-.93
-.93
-.69
-.69
-.69
-.58
-.24
-.41
-.93
-.15
-.15
-.15
-.55
-.02
-.44
-.7
-.7
-.61
-.61
-.26
-.26
-.26
-.26
-.56
-.56
-.56
-.56
-.56
-.56
-.56
-.56
-.15
-.15
-.15
-.16
-.16
-.39
-.57
-.57
-.57
-.61
-.61
-.61
-.85
-.93
-.08
-.27
-.27
-.4
-.4
-.4
-.66
-.93
-.93
-.29
-.29
-.29
-.02
-.02
-.02
-.2
-.46
		`@ -0,0 +1,2 @@`
							`sudo docker build -t docker.tavasi.ir/tavasi/qachat_base:1.0.0 -f dockerfile_base .`
							`sudo docker build -t docker.tavasi.ir/tavasi/qachat2:1.0.0 .`