This commit is contained in:
hsafae 2025-09-28 09:24:27 +00:00
parent a57d289e1c
commit d79de7461e
8 changed files with 1040 additions and 92 deletions

View File

@ -11,11 +11,11 @@ from sklearn.metrics.pairwise import cosine_similarity
import datetime
import re
import random
from fastapi.middleware.cors import CORSMiddleware
from embedder_sbert_qavanin_285k import PersianVectorAnalyzer
from normalizer import cleaning
from fastapi import FastAPI
#from normalizer import cleaning
from fastapi import FastAPI ,Header
from pydantic import BaseModel
# LLM Libs
from openai import OpenAI
from langchain_openai import ChatOpenAI # pip install -U langchain_openai
@ -24,14 +24,23 @@ import requests
today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}'
chatbot = FastAPI()
origins = ["*"]
chatbot.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# -------------------
# مدل‌ها و مسیر داده
# -------------------
EMBED_MODEL = "/home/sabr/MODLES/rag_chat/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
RERANKER_MODEL = "/home/sabr/MODLES/rag_chat/BAAI/bge-reranker-v2-m3"
FAISS_INDEX_PATH = "./qavanin-faiss/faiss_index_qavanin_285k.index"
FAISS_METADATA_PATH = "./qavanin-faiss/faiss_index_qavanin_285k_metadata.json"
# مدل‌ها و مسیر دادهsrc/app/qavanin-faiss/faiss_index_qavanin_285k_metadata.json
# -------------------/src/app/qavanin-faiss
EMBED_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
RERANKER_MODEL = "BAAI/bge-reranker-v2-m3"
FAISS_INDEX_PATH = "/src/app/qavanin-faiss/faiss_index_qavanin_285k.index"
FAISS_METADATA_PATH = "/src/app/qavanin-faiss/faiss_index_qavanin_285k_metadata.json"
RERANK_BATCH = int(os.environ.get("RERANK_BATCH", 256))
# print(f'RERANK_BATCH: {RERANK_BATCH}')
@ -67,6 +76,37 @@ def get_client():
return client
def llm_base_request(query):
# model = 'cf.gemma-3-12b-it'
model = 'gpt-4o-mini'
prompt = f'برای متن {query} زیر، عنوانی کوتاه که بین 3 تا 6 کلمه داشته باشد، انتخاب کن. غیر از عنوان، به هیچ وجه توضیح اضافه ای در قبل یا بعد آن اضافه نکن.'
client = get_client()
try:
messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(
messages = messages,
model= model) # "gpt-4o", "gpt-4o-mini", "deepseek-chat" , "gemini-2.0-flash", gemini-2.5-flash-lite
# gpt-4o : 500
# gpt-4o-mini : 34
# deepseek-chat: : 150
# gemini-2.0-flash : error
# cf.gemma-3-12b-it : 1
# gemini-2.5-flash-lite : 35 خیلی خوب
answer = response.choices[0].message.content
# پاسخ را هم به سابقه اضافه می‌کنیم
messages.append({"role": "assistant", "content": answer})
except Exception as error:
with open('./llm-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file:
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
file.write(error_message)
return ''
return answer
def llm_request(query, model):
if query == '':
@ -121,7 +161,8 @@ class HybridRetrieverReranker:
self.N = len(content_list)
# Dense
self.embedder = SentenceTransformer(EMBED_MODEL, device=self.device)
self.embedder = SentenceTransformer(EMBED_MODEL,cache_folder='/src/MODELS', device=self.device)
#self.embedder = SentenceTransformer(EMBED_MODEL, device=self.device)
# Sparse (مثل قبل برای حفظ خروجی)
self.vectorizer = TfidfVectorizer(
@ -132,7 +173,7 @@ class HybridRetrieverReranker:
self.tfidf_matrix = self.vectorizer.fit_transform(self.content_list)
# Reranker
self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL, use_fast=True)
self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL,cache_dir='/src/MODELS', use_fast=True)
self.reranker = AutoModelForSequenceClassification.from_pretrained(
RERANKER_MODEL
).to(self.device)
@ -290,16 +331,17 @@ class HybridRetrieverReranker:
def single_query(query: str):
query = cleaning(query)
# query = cleaning(query)
retrived_sections_ids = []
retrived_sections = pipe.search(query, content_list, topk_dense=30, topk_sparse=30, pre_rerank_k=30, final_k=10)
final_similars = ''
for i, row in enumerate(retrived_sections, 1):
id_value = '{' + str(ids[row['idx']]) + '}'
result = f"id: {id_value} \n{row['prefix']} {row['content']}\n"
retrived_sections_ids.append(ids[row['idx']])
final_similars += ''.join(result)
return final_similars, retrived_sections
return final_similars, retrived_sections_ids
def find_refrences(llm_answer: str) -> List[str]:
"""
@ -313,7 +355,12 @@ def find_refrences(llm_answer: str) -> List[str]:
"""
pattern = r"\{[^\}]+\}"
refrence_ids = re.findall(pattern, llm_answer)
new_refrences_ids = []
for itm in refrence_ids:
refrence = itm.lstrip('{')
refrence = refrence.lstrip('}')
new_refrences_ids.append(refrence)
# refrence_ids = [item.lstrip('{').rstrip('}') for item in refrence_ids]
return refrence_ids
def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[str]:
@ -330,12 +377,12 @@ def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[str]:
for index, ref in enumerate(refrences_list,1):
# breakpoint()
llm_answer = llm_answer.replace(ref, f'[{index}]')
id = ref.lstrip('{')
id = id.rstrip('}')
refrences += ''.join(f'[{index}] https://majles.tavasi.ir/entity/detail/view/qsection/{id}\n')
# id = ref.lstrip('{')
# id = id.rstrip('}')
# refrences += ''.join(f'[{index}] https://majles.tavasi.ir/entity/detail/view/qsection/{id}\n')
llm_answer = f'{llm_answer}\n\nمنابع پاسخ‌:\n{refrences.strip()}'
return llm_answer
# llm_answer = f'{llm_answer}\n\nمنابع پاسخ‌:\n{refrences.strip()}'
return llm_answer.strip()
# load basic items
content_list, ids, prefix_list, faiss_index = load_faiss_index(FAISS_INDEX_PATH, FAISS_METADATA_PATH)
@ -347,26 +394,62 @@ messages = [
{"role": "system", "content": "تو یک دستیار خبره در زمینه حقوق و قوانین مرتبط به آن هستی و می توانی متون حقوقی را به صورت دقیق توضیح بدهی . پاسخ ها باید الزاما به زبان فارسی باشد. پاسخ ها فقط از متون قانونی که در پرامپت وجود دارد استخراج شود."},
]
def run_chatbot(query:str, chat_id:str):
models = ["gemini-2.5-flash-lite", "gpt-4o-mini"]
def save_result(chat_obj: object) -> bool:
# index result in elastic
pass
def run_chatbot(query:str, chat_id:str):
prompt_status = True
status_text = 'لطفا متن سوال را وارد نمائید'
if query == '':
return 'لطفا متن سوال را وارد نمائید'
prompt_status = False
start_time = (datetime.datetime.now())
result_passages_text, result_passages_ids = single_query(query)
end_retrive = datetime.datetime.now()
print('-'*40)
retrive_duration = (end_retrive - start_time).total_seconds()
print(f'retrive duration: {str(retrive_duration)}')
# در صورتی که وضعیت پرامپت معتبر باشد، وارد فرایند شو
if prompt_status:
result_passages_text, result_passages_ids = single_query(query)
end_retrive = datetime.datetime.now()
print('-'*40)
retrive_duration = (end_retrive - start_time).total_seconds()
print(f'retrive duration: {str(retrive_duration)}')
prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result_passages_text}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
try:
model = "gemini-2.5-flash-lite"
llm_answer = llm_request(prompt, model)
except Exception as error:
model = "gpt-4o-mini"
llm_answer = llm_request(prompt, model)
prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result_passages_text}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
llm_model = ''
for model in models:
try:
llm_model = model
llm_answer = llm_request(prompt, model)
except Exception as error:
error = f'model: {model} \n{error}\n\n'
prompt_status = False
status_text = 'با عرض پوزش، سرویس موقتا در دسترس نیست. لطفا دقایقی دیگر دوباره تلاش نمائید!'
else:
chat_obj = {
'id' : chat_id, # str
'title' : '', # str
'user_id' : '',
'user_query' : query, # str
'model_key' : llm_model, # str
'retrived_passage' : result_passages_text, # str
'retrived_ref_ids' : result_passages_ids, # list[obj]
'prompt_type' : 'question-answer', # str
'retrived_duration' : retrive_duration, # str
'llm_duration' : '0', # str
'full_duration' : '0', # str
'time_create' : str(start_time), # str
'used_ref_ids' : [], # list[str]
'prompt_answer' : '', # str
'status_text' : status_text,
'status' : prompt_status, # or False # bool
}
# آبجکت ایجاد شده با بازگردان
return chat_obj, status_text
llm_answer_duration = (datetime.datetime.now() - end_retrive).total_seconds()
print(f'llm answer duration: {str(llm_answer_duration)}')
@ -378,33 +461,51 @@ def run_chatbot(query:str, chat_id:str):
print(f'full prompt duration: {full_prompt_duration}')
print('~'*40)
chat_obj = {
'chat-id' : chat_id, # str
'chat-title' : '', # str
'user-id' : '',
'user-query' : query, # str
'model' : model, # str
'result-passages' : result_passages_text, # str
'retrived-passages-ids' : result_passages_ids, # list[obj]
'retrive-duration' : retrive_duration, # str
'llm-answer-duration' : llm_answer_duration, # str
'full-prompt-duration' : full_prompt_duration, # str
'chat-date' : str(start_time), # str
'used-refrences-in-answer' : used_refrences_in_answer, # list[str]
'llm-answer' : llm_answer, # str
}
# prev_chat_data = []
# with open('./llm-answer/chat-messages.json', mode='r', encoding='utf-8') as file:
# prev_chat_data = json.load(file)
# prev_chat_data.append(chat_obj)
status_text ='پاسخ با موفقیت ایجاد شد'
# with open('./llm-answer/chat-messages.json', mode='w', encoding='utf-8') as output:
# json.dump(prev_chat_data, output, ensure_ascii=False, indent=2)
title = llm_base_request(query)
if title == '':
title = query[0:15]
chat_obj = {
'id' : chat_id, # str
'title' : title, # str
'user_id' : '',
'user_query' : query, # str
'model_key' : llm_model, # str
'retrived_passage' : result_passages_text, # str
'retrived_ref_ids' : result_passages_ids, # list[obj]
'prompt_type' : 'question-answer', # str
'retrived_duration' : retrive_duration, # str
'llm_duration' : llm_answer_duration, # str
'full_duration' : full_prompt_duration, # str
'time_create' : str(start_time), # str
'used_ref_ids' : used_refrences_in_answer, # list[str]
'prompt_answer' : llm_answer, # str
'status_text' : status_text, # str
'status' : True, # or False # bool
}
prev_chat_data = []
with open('./llm-answer/chat-messages.json', mode='r', encoding='utf-8') as file:
prev_chat_data = json.load(file)
prev_chat_data.append(chat_obj)
with open('./llm-answer/chat-messages.json', mode='w', encoding='utf-8') as output:
json.dump(prev_chat_data, output, ensure_ascii=False, indent=2)
# save_result(chat_obj)
# ایجاد آبجکت بازگشتی به فرانت
# chat_obj.pop('retrived_passage')
# chat_obj.pop('prompt_type')
return chat_obj
@chatbot.post("/credit_refresh")
def credit_refresh():
"""
Returns remained credit
"""
url = "https://api.avalai.ir/user/credit"
headers = {
"Content-Type": "application/json",
@ -415,21 +516,30 @@ def credit_refresh():
with open('./llm-answer/credit.txt','w') as file:
file.write(str(remained_credit.json()['remaining_irt']))
return str(remained_credit.json()['remaining_irt'])
def create_chat_id():
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
print('date ', date )
chat_id = f'{date}-{random.randint(100000, 999999)}'
print('chat_id ', chat_id )
return chat_id
print('#'*19)
print('-Chatbot is Ready!!!!!-')
print('#'*19)
# تعریف مدل داده‌ها برای درخواست‌های API
class Query(BaseModel):
query: str
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
chat_id = f'{date}-{random.randint(100000, 999999)}'
print('#'*19)
print('-Chatbot is Ready!-')
print('#'*19)
# مسیر API برای اجرا کردن run_chatbot
@chatbot.post("/run_chatbot")
def chat(query: Query):
def run_chat(query: Query):
print('query ', query )
chat_id = create_chat_id()
print('query.query ', query.query )
answer = run_chatbot(query.query, chat_id)
credit_refresh()
return {"answer": answer}
# uvicorn src.app:app --reload

View File

@ -1,21 +1,5 @@
FROM python:3.10.12
RUN pip install cleantext==1.1.4
RUN pip install elasticsearch7==7.17.12
RUN pip install faiss_cpu==1.9.0
RUN pip install fastapi==0.117.1
RUN pip install hazm==0.10.0
RUN pip install langchain_openai==0.3.33
RUN pip install numpy==1.21.5
RUN pip install openai==1.108.1
RUN pip install pandas==2.3.2
RUN pip install pydantic==2.11.9
RUN pip install scikit_learn==1.7.2
RUN pip install sentence_transformers==2.5.1
RUN pip install torch==2.4.0
RUN pip install transformers==4.55.1
#RUN pip install torch==2.1.2
FROM qchat_base:1.0.0
RUN pip install uvicorn[standard]
WORKDIR /src/app
@ -23,5 +7,5 @@ COPY . /src/app
EXPOSE 80
CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80" ]
CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80","--host=0.0.0.0"]

View File

@ -30,7 +30,7 @@ from transformers import AutoTokenizer
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from normalizer import cleaning
#from normalizer import cleaning
try:
from elastic_helper import ElasticHelper
except Exception as error:
@ -43,8 +43,8 @@ except Exception as error:
# from plotly.subplots import make_subplots
# Persian text processing
import hazm
from hazm import Normalizer, word_tokenize, POSTagger
# import hazm
# from hazm import Normalizer, word_tokenize, POSTagger
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@ -67,7 +67,7 @@ class PersianVectorAnalyzer:
"""
self.model_name = model_name
self.model = None
self.normalizer = Normalizer()
#self.normalizer = Normalizer()
self.stop_words = self._load_persian_stop_words()
self.key_words = [
"خدا", "بنده", "جهاد", "ولی", "زکات",
@ -206,7 +206,7 @@ class PersianVectorAnalyzer:
"""
# Normalize text
text = self.normalizer.normalize(text)
#text = self.normalizer.normalize(text)
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)

File diff suppressed because one or more lines are too long

View File

@ -1 +1 @@
6085.11
197951.1

View File

@ -0,0 +1,5 @@
query: برای حمایت از ازدواج جوانان و تشکیل خانواده جهت افزایش جمعیت ، چه مواردی پیش بینی شده است؟
error:Error code: 400 - {'error': {'message': 'Developer instruction is not enabled for this model. Please use a different model that supports developer instructions. Please contact support at support@avalai.ir and include the request ID 01998c28-4ccb-7bc3-97a7-0403baa6ed35 in your email if you believe this is an error.', 'type': 'invalid_request', 'param': None, 'code': 'invalid_argument', 'request_id': '01998c28-4ccb-7bc3-97a7-0403baa6ed35'}}
-------------------------------

View File

@ -1,4 +1,4 @@
import hazm
#import hazm
from cleantext import clean
import re
@ -7,7 +7,7 @@ def cleanhtml(raw_html):
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
normalizer = hazm.Normalizer()
#normalizer = hazm.Normalizer()
wierd_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
@ -64,7 +64,7 @@ def cleaning(text):
text = cleanhtml(text)
# normalizing
text = normalizer.normalize(text)
#text = normalizer.normalize(text)
# removing wierd patterns
text = wierd_pattern.sub(r'', text)

1
run_docker.bash Normal file
View File

@ -0,0 +1 @@
sudo docker run --name qachat -p 80:80 -v /home/sabr/rag_qavanin_api/:/src/app/ -v /home/sabr/rag_qavanin_api/qavanin-faiss/:/src/app/qavanin-faiss/ -v /home/sabr/rag_qavanin_api/llm-answer/:/src/app/llm-answer/ -v /home/sabr/MODELS:/src/MODELS -it --restart unless-stopped qachat:1.0.0