fix bugs
This commit is contained in:
parent
a57d289e1c
commit
d79de7461e
238
chatbot.py
238
chatbot.py
|
@ -11,11 +11,11 @@ from sklearn.metrics.pairwise import cosine_similarity
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from embedder_sbert_qavanin_285k import PersianVectorAnalyzer
|
from embedder_sbert_qavanin_285k import PersianVectorAnalyzer
|
||||||
from normalizer import cleaning
|
#from normalizer import cleaning
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI ,Header
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
# LLM Libs
|
# LLM Libs
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from langchain_openai import ChatOpenAI # pip install -U langchain_openai
|
from langchain_openai import ChatOpenAI # pip install -U langchain_openai
|
||||||
|
@ -24,14 +24,23 @@ import requests
|
||||||
today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}'
|
today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}'
|
||||||
|
|
||||||
chatbot = FastAPI()
|
chatbot = FastAPI()
|
||||||
|
origins = ["*"]
|
||||||
|
|
||||||
|
chatbot.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=origins,
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
# -------------------
|
# -------------------
|
||||||
# مدلها و مسیر داده
|
# مدلها و مسیر دادهsrc/app/qavanin-faiss/faiss_index_qavanin_285k_metadata.json
|
||||||
# -------------------
|
# -------------------/src/app/qavanin-faiss
|
||||||
EMBED_MODEL = "/home/sabr/MODLES/rag_chat/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
EMBED_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
||||||
RERANKER_MODEL = "/home/sabr/MODLES/rag_chat/BAAI/bge-reranker-v2-m3"
|
RERANKER_MODEL = "BAAI/bge-reranker-v2-m3"
|
||||||
FAISS_INDEX_PATH = "./qavanin-faiss/faiss_index_qavanin_285k.index"
|
FAISS_INDEX_PATH = "/src/app/qavanin-faiss/faiss_index_qavanin_285k.index"
|
||||||
FAISS_METADATA_PATH = "./qavanin-faiss/faiss_index_qavanin_285k_metadata.json"
|
FAISS_METADATA_PATH = "/src/app/qavanin-faiss/faiss_index_qavanin_285k_metadata.json"
|
||||||
|
|
||||||
RERANK_BATCH = int(os.environ.get("RERANK_BATCH", 256))
|
RERANK_BATCH = int(os.environ.get("RERANK_BATCH", 256))
|
||||||
# print(f'RERANK_BATCH: {RERANK_BATCH}')
|
# print(f'RERANK_BATCH: {RERANK_BATCH}')
|
||||||
|
@ -67,6 +76,37 @@ def get_client():
|
||||||
|
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
def llm_base_request(query):
|
||||||
|
# model = 'cf.gemma-3-12b-it'
|
||||||
|
model = 'gpt-4o-mini'
|
||||||
|
prompt = f'برای متن {query} زیر، عنوانی کوتاه که بین 3 تا 6 کلمه داشته باشد، انتخاب کن. غیر از عنوان، به هیچ وجه توضیح اضافه ای در قبل یا بعد آن اضافه نکن.'
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
messages = messages,
|
||||||
|
model= model) # "gpt-4o", "gpt-4o-mini", "deepseek-chat" , "gemini-2.0-flash", gemini-2.5-flash-lite
|
||||||
|
# gpt-4o : 500
|
||||||
|
# gpt-4o-mini : 34
|
||||||
|
# deepseek-chat: : 150
|
||||||
|
# gemini-2.0-flash : error
|
||||||
|
# cf.gemma-3-12b-it : 1
|
||||||
|
# gemini-2.5-flash-lite : 35 خیلی خوب
|
||||||
|
|
||||||
|
answer = response.choices[0].message.content
|
||||||
|
# پاسخ را هم به سابقه اضافه میکنیم
|
||||||
|
messages.append({"role": "assistant", "content": answer})
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as error:
|
||||||
|
with open('./llm-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file:
|
||||||
|
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
|
||||||
|
file.write(error_message)
|
||||||
|
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
def llm_request(query, model):
|
def llm_request(query, model):
|
||||||
|
|
||||||
if query == '':
|
if query == '':
|
||||||
|
@ -121,7 +161,8 @@ class HybridRetrieverReranker:
|
||||||
self.N = len(content_list)
|
self.N = len(content_list)
|
||||||
|
|
||||||
# Dense
|
# Dense
|
||||||
self.embedder = SentenceTransformer(EMBED_MODEL, device=self.device)
|
self.embedder = SentenceTransformer(EMBED_MODEL,cache_folder='/src/MODELS', device=self.device)
|
||||||
|
#self.embedder = SentenceTransformer(EMBED_MODEL, device=self.device)
|
||||||
|
|
||||||
# Sparse (مثل قبل برای حفظ خروجی)
|
# Sparse (مثل قبل برای حفظ خروجی)
|
||||||
self.vectorizer = TfidfVectorizer(
|
self.vectorizer = TfidfVectorizer(
|
||||||
|
@ -132,7 +173,7 @@ class HybridRetrieverReranker:
|
||||||
self.tfidf_matrix = self.vectorizer.fit_transform(self.content_list)
|
self.tfidf_matrix = self.vectorizer.fit_transform(self.content_list)
|
||||||
|
|
||||||
# Reranker
|
# Reranker
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL, use_fast=True)
|
self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL,cache_dir='/src/MODELS', use_fast=True)
|
||||||
self.reranker = AutoModelForSequenceClassification.from_pretrained(
|
self.reranker = AutoModelForSequenceClassification.from_pretrained(
|
||||||
RERANKER_MODEL
|
RERANKER_MODEL
|
||||||
).to(self.device)
|
).to(self.device)
|
||||||
|
@ -290,16 +331,17 @@ class HybridRetrieverReranker:
|
||||||
|
|
||||||
def single_query(query: str):
|
def single_query(query: str):
|
||||||
|
|
||||||
query = cleaning(query)
|
# query = cleaning(query)
|
||||||
|
retrived_sections_ids = []
|
||||||
retrived_sections = pipe.search(query, content_list, topk_dense=30, topk_sparse=30, pre_rerank_k=30, final_k=10)
|
retrived_sections = pipe.search(query, content_list, topk_dense=30, topk_sparse=30, pre_rerank_k=30, final_k=10)
|
||||||
final_similars = ''
|
final_similars = ''
|
||||||
for i, row in enumerate(retrived_sections, 1):
|
for i, row in enumerate(retrived_sections, 1):
|
||||||
id_value = '{' + str(ids[row['idx']]) + '}'
|
id_value = '{' + str(ids[row['idx']]) + '}'
|
||||||
result = f"id: {id_value} \n{row['prefix']} {row['content']}\n"
|
result = f"id: {id_value} \n{row['prefix']} {row['content']}\n"
|
||||||
|
retrived_sections_ids.append(ids[row['idx']])
|
||||||
final_similars += ''.join(result)
|
final_similars += ''.join(result)
|
||||||
|
|
||||||
return final_similars, retrived_sections
|
return final_similars, retrived_sections_ids
|
||||||
|
|
||||||
def find_refrences(llm_answer: str) -> List[str]:
|
def find_refrences(llm_answer: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
|
@ -313,7 +355,12 @@ def find_refrences(llm_answer: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
pattern = r"\{[^\}]+\}"
|
pattern = r"\{[^\}]+\}"
|
||||||
refrence_ids = re.findall(pattern, llm_answer)
|
refrence_ids = re.findall(pattern, llm_answer)
|
||||||
|
new_refrences_ids = []
|
||||||
|
for itm in refrence_ids:
|
||||||
|
refrence = itm.lstrip('{')
|
||||||
|
refrence = refrence.lstrip('}')
|
||||||
|
new_refrences_ids.append(refrence)
|
||||||
|
# refrence_ids = [item.lstrip('{').rstrip('}') for item in refrence_ids]
|
||||||
return refrence_ids
|
return refrence_ids
|
||||||
|
|
||||||
def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[str]:
|
def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[str]:
|
||||||
|
@ -330,12 +377,12 @@ def replace_refrences(llm_answer: str, refrences_list:List[str]) -> List[str]:
|
||||||
for index, ref in enumerate(refrences_list,1):
|
for index, ref in enumerate(refrences_list,1):
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
llm_answer = llm_answer.replace(ref, f'[{index}]')
|
llm_answer = llm_answer.replace(ref, f'[{index}]')
|
||||||
id = ref.lstrip('{')
|
# id = ref.lstrip('{')
|
||||||
id = id.rstrip('}')
|
# id = id.rstrip('}')
|
||||||
refrences += ''.join(f'[{index}] https://majles.tavasi.ir/entity/detail/view/qsection/{id}\n')
|
# refrences += ''.join(f'[{index}] https://majles.tavasi.ir/entity/detail/view/qsection/{id}\n')
|
||||||
|
|
||||||
llm_answer = f'{llm_answer}\n\nمنابع پاسخ:\n{refrences.strip()}'
|
# llm_answer = f'{llm_answer}\n\nمنابع پاسخ:\n{refrences.strip()}'
|
||||||
return llm_answer
|
return llm_answer.strip()
|
||||||
|
|
||||||
# load basic items
|
# load basic items
|
||||||
content_list, ids, prefix_list, faiss_index = load_faiss_index(FAISS_INDEX_PATH, FAISS_METADATA_PATH)
|
content_list, ids, prefix_list, faiss_index = load_faiss_index(FAISS_INDEX_PATH, FAISS_METADATA_PATH)
|
||||||
|
@ -347,26 +394,62 @@ messages = [
|
||||||
{"role": "system", "content": "تو یک دستیار خبره در زمینه حقوق و قوانین مرتبط به آن هستی و می توانی متون حقوقی را به صورت دقیق توضیح بدهی . پاسخ ها باید الزاما به زبان فارسی باشد. پاسخ ها فقط از متون قانونی که در پرامپت وجود دارد استخراج شود."},
|
{"role": "system", "content": "تو یک دستیار خبره در زمینه حقوق و قوانین مرتبط به آن هستی و می توانی متون حقوقی را به صورت دقیق توضیح بدهی . پاسخ ها باید الزاما به زبان فارسی باشد. پاسخ ها فقط از متون قانونی که در پرامپت وجود دارد استخراج شود."},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
models = ["gemini-2.5-flash-lite", "gpt-4o-mini"]
|
||||||
|
|
||||||
|
def save_result(chat_obj: object) -> bool:
|
||||||
|
# index result in elastic
|
||||||
|
pass
|
||||||
|
|
||||||
def run_chatbot(query:str, chat_id:str):
|
def run_chatbot(query:str, chat_id:str):
|
||||||
|
prompt_status = True
|
||||||
|
status_text = 'لطفا متن سوال را وارد نمائید'
|
||||||
if query == '':
|
if query == '':
|
||||||
return 'لطفا متن سوال را وارد نمائید'
|
prompt_status = False
|
||||||
|
|
||||||
start_time = (datetime.datetime.now())
|
start_time = (datetime.datetime.now())
|
||||||
|
|
||||||
result_passages_text, result_passages_ids = single_query(query)
|
# در صورتی که وضعیت پرامپت معتبر باشد، وارد فرایند شو
|
||||||
end_retrive = datetime.datetime.now()
|
if prompt_status:
|
||||||
print('-'*40)
|
result_passages_text, result_passages_ids = single_query(query)
|
||||||
retrive_duration = (end_retrive - start_time).total_seconds()
|
end_retrive = datetime.datetime.now()
|
||||||
print(f'retrive duration: {str(retrive_duration)}')
|
print('-'*40)
|
||||||
|
retrive_duration = (end_retrive - start_time).total_seconds()
|
||||||
prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result_passages_text}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
|
print(f'retrive duration: {str(retrive_duration)}')
|
||||||
try:
|
|
||||||
model = "gemini-2.5-flash-lite"
|
prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result_passages_text}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
|
||||||
llm_answer = llm_request(prompt, model)
|
|
||||||
except Exception as error:
|
llm_model = ''
|
||||||
model = "gpt-4o-mini"
|
for model in models:
|
||||||
llm_answer = llm_request(prompt, model)
|
try:
|
||||||
|
llm_model = model
|
||||||
|
llm_answer = llm_request(prompt, model)
|
||||||
|
except Exception as error:
|
||||||
|
error = f'model: {model} \n{error}\n\n'
|
||||||
|
prompt_status = False
|
||||||
|
status_text = 'با عرض پوزش، سرویس موقتا در دسترس نیست. لطفا دقایقی دیگر دوباره تلاش نمائید!'
|
||||||
|
|
||||||
|
else:
|
||||||
|
chat_obj = {
|
||||||
|
'id' : chat_id, # str
|
||||||
|
'title' : '', # str
|
||||||
|
'user_id' : '',
|
||||||
|
'user_query' : query, # str
|
||||||
|
'model_key' : llm_model, # str
|
||||||
|
'retrived_passage' : result_passages_text, # str
|
||||||
|
'retrived_ref_ids' : result_passages_ids, # list[obj]
|
||||||
|
'prompt_type' : 'question-answer', # str
|
||||||
|
'retrived_duration' : retrive_duration, # str
|
||||||
|
'llm_duration' : '0', # str
|
||||||
|
'full_duration' : '0', # str
|
||||||
|
'time_create' : str(start_time), # str
|
||||||
|
'used_ref_ids' : [], # list[str]
|
||||||
|
'prompt_answer' : '', # str
|
||||||
|
'status_text' : status_text,
|
||||||
|
'status' : prompt_status, # or False # bool
|
||||||
|
}
|
||||||
|
|
||||||
|
# آبجکت ایجاد شده با بازگردان
|
||||||
|
return chat_obj, status_text
|
||||||
|
|
||||||
llm_answer_duration = (datetime.datetime.now() - end_retrive).total_seconds()
|
llm_answer_duration = (datetime.datetime.now() - end_retrive).total_seconds()
|
||||||
print(f'llm answer duration: {str(llm_answer_duration)}')
|
print(f'llm answer duration: {str(llm_answer_duration)}')
|
||||||
|
@ -377,34 +460,52 @@ def run_chatbot(query:str, chat_id:str):
|
||||||
full_prompt_duration = (datetime.datetime.now() - start_time).total_seconds()
|
full_prompt_duration = (datetime.datetime.now() - start_time).total_seconds()
|
||||||
print(f'full prompt duration: {full_prompt_duration}')
|
print(f'full prompt duration: {full_prompt_duration}')
|
||||||
print('~'*40)
|
print('~'*40)
|
||||||
|
|
||||||
|
status_text ='پاسخ با موفقیت ایجاد شد'
|
||||||
|
|
||||||
|
title = llm_base_request(query)
|
||||||
|
if title == '':
|
||||||
|
title = query[0:15]
|
||||||
|
|
||||||
chat_obj = {
|
chat_obj = {
|
||||||
'chat-id' : chat_id, # str
|
'id' : chat_id, # str
|
||||||
'chat-title' : '', # str
|
'title' : title, # str
|
||||||
'user-id' : '',
|
'user_id' : '',
|
||||||
'user-query' : query, # str
|
'user_query' : query, # str
|
||||||
'model' : model, # str
|
'model_key' : llm_model, # str
|
||||||
'result-passages' : result_passages_text, # str
|
'retrived_passage' : result_passages_text, # str
|
||||||
'retrived-passages-ids' : result_passages_ids, # list[obj]
|
'retrived_ref_ids' : result_passages_ids, # list[obj]
|
||||||
'retrive-duration' : retrive_duration, # str
|
'prompt_type' : 'question-answer', # str
|
||||||
'llm-answer-duration' : llm_answer_duration, # str
|
'retrived_duration' : retrive_duration, # str
|
||||||
'full-prompt-duration' : full_prompt_duration, # str
|
'llm_duration' : llm_answer_duration, # str
|
||||||
'chat-date' : str(start_time), # str
|
'full_duration' : full_prompt_duration, # str
|
||||||
'used-refrences-in-answer' : used_refrences_in_answer, # list[str]
|
'time_create' : str(start_time), # str
|
||||||
'llm-answer' : llm_answer, # str
|
'used_ref_ids' : used_refrences_in_answer, # list[str]
|
||||||
|
'prompt_answer' : llm_answer, # str
|
||||||
|
'status_text' : status_text, # str
|
||||||
|
'status' : True, # or False # bool
|
||||||
}
|
}
|
||||||
# prev_chat_data = []
|
prev_chat_data = []
|
||||||
# with open('./llm-answer/chat-messages.json', mode='r', encoding='utf-8') as file:
|
with open('./llm-answer/chat-messages.json', mode='r', encoding='utf-8') as file:
|
||||||
# prev_chat_data = json.load(file)
|
prev_chat_data = json.load(file)
|
||||||
# prev_chat_data.append(chat_obj)
|
prev_chat_data.append(chat_obj)
|
||||||
|
|
||||||
# with open('./llm-answer/chat-messages.json', mode='w', encoding='utf-8') as output:
|
with open('./llm-answer/chat-messages.json', mode='w', encoding='utf-8') as output:
|
||||||
# json.dump(prev_chat_data, output, ensure_ascii=False, indent=2)
|
json.dump(prev_chat_data, output, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
# save_result(chat_obj)
|
||||||
|
|
||||||
|
# ایجاد آبجکت بازگشتی به فرانت
|
||||||
|
# chat_obj.pop('retrived_passage')
|
||||||
|
# chat_obj.pop('prompt_type')
|
||||||
|
|
||||||
return chat_obj
|
return chat_obj
|
||||||
|
|
||||||
@chatbot.post("/credit_refresh")
|
@chatbot.post("/credit_refresh")
|
||||||
def credit_refresh():
|
def credit_refresh():
|
||||||
|
"""
|
||||||
|
Returns remained credit
|
||||||
|
"""
|
||||||
url = "https://api.avalai.ir/user/credit"
|
url = "https://api.avalai.ir/user/credit"
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
@ -415,21 +516,30 @@ def credit_refresh():
|
||||||
with open('./llm-answer/credit.txt','w') as file:
|
with open('./llm-answer/credit.txt','w') as file:
|
||||||
file.write(str(remained_credit.json()['remaining_irt']))
|
file.write(str(remained_credit.json()['remaining_irt']))
|
||||||
return str(remained_credit.json()['remaining_irt'])
|
return str(remained_credit.json()['remaining_irt'])
|
||||||
|
|
||||||
|
def create_chat_id():
|
||||||
|
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
|
||||||
|
print('date ', date )
|
||||||
|
chat_id = f'{date}-{random.randint(100000, 999999)}'
|
||||||
|
print('chat_id ', chat_id )
|
||||||
|
return chat_id
|
||||||
|
|
||||||
|
print('#'*19)
|
||||||
|
print('-Chatbot is Ready!!!!!-')
|
||||||
|
print('#'*19)
|
||||||
|
|
||||||
# تعریف مدل دادهها برای درخواستهای API
|
# تعریف مدل دادهها برای درخواستهای API
|
||||||
class Query(BaseModel):
|
class Query(BaseModel):
|
||||||
query: str
|
query: str
|
||||||
|
|
||||||
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
|
|
||||||
chat_id = f'{date}-{random.randint(100000, 999999)}'
|
|
||||||
print('#'*19)
|
|
||||||
print('-Chatbot is Ready!-')
|
|
||||||
print('#'*19)
|
|
||||||
# مسیر API برای اجرا کردن run_chatbot
|
# مسیر API برای اجرا کردن run_chatbot
|
||||||
@chatbot.post("/run_chatbot")
|
@chatbot.post("/run_chatbot")
|
||||||
def chat(query: Query):
|
def run_chat(query: Query):
|
||||||
|
print('query ', query )
|
||||||
|
chat_id = create_chat_id()
|
||||||
|
print('query.query ', query.query )
|
||||||
answer = run_chatbot(query.query, chat_id)
|
answer = run_chatbot(query.query, chat_id)
|
||||||
credit_refresh()
|
credit_refresh()
|
||||||
|
|
||||||
return {"answer": answer}
|
return {"answer": answer}
|
||||||
|
|
||||||
# uvicorn src.app:app --reload
|
# uvicorn src.app:app --reload
|
||||||
|
@ -473,4 +583,4 @@ if __name__ == "__main__":
|
||||||
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')
|
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')
|
||||||
print('----------------------------------------------------------')
|
print('----------------------------------------------------------')
|
||||||
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
|
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
|
||||||
|
|
22
dockerfile
22
dockerfile
|
@ -1,21 +1,5 @@
|
||||||
FROM python:3.10.12
|
FROM qchat_base:1.0.0
|
||||||
|
RUN pip install uvicorn[standard]
|
||||||
RUN pip install cleantext==1.1.4
|
|
||||||
RUN pip install elasticsearch7==7.17.12
|
|
||||||
RUN pip install faiss_cpu==1.9.0
|
|
||||||
RUN pip install fastapi==0.117.1
|
|
||||||
RUN pip install hazm==0.10.0
|
|
||||||
RUN pip install langchain_openai==0.3.33
|
|
||||||
RUN pip install numpy==1.21.5
|
|
||||||
RUN pip install openai==1.108.1
|
|
||||||
RUN pip install pandas==2.3.2
|
|
||||||
RUN pip install pydantic==2.11.9
|
|
||||||
RUN pip install scikit_learn==1.7.2
|
|
||||||
RUN pip install sentence_transformers==2.5.1
|
|
||||||
RUN pip install torch==2.4.0
|
|
||||||
RUN pip install transformers==4.55.1
|
|
||||||
#RUN pip install torch==2.1.2
|
|
||||||
|
|
||||||
|
|
||||||
WORKDIR /src/app
|
WORKDIR /src/app
|
||||||
|
|
||||||
|
@ -23,5 +7,5 @@ COPY . /src/app
|
||||||
|
|
||||||
EXPOSE 80
|
EXPOSE 80
|
||||||
|
|
||||||
CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80" ]
|
CMD [ "uvicorn","chatbot:chatbot","--reload","--port","80","--host=0.0.0.0"]
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ from transformers import AutoTokenizer
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
from sklearn.manifold import TSNE
|
from sklearn.manifold import TSNE
|
||||||
from sklearn.metrics.pairwise import cosine_similarity
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
from normalizer import cleaning
|
#from normalizer import cleaning
|
||||||
try:
|
try:
|
||||||
from elastic_helper import ElasticHelper
|
from elastic_helper import ElasticHelper
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
|
@ -43,8 +43,8 @@ except Exception as error:
|
||||||
# from plotly.subplots import make_subplots
|
# from plotly.subplots import make_subplots
|
||||||
|
|
||||||
# Persian text processing
|
# Persian text processing
|
||||||
import hazm
|
# import hazm
|
||||||
from hazm import Normalizer, word_tokenize, POSTagger
|
# from hazm import Normalizer, word_tokenize, POSTagger
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
@ -67,7 +67,7 @@ class PersianVectorAnalyzer:
|
||||||
"""
|
"""
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.model = None
|
self.model = None
|
||||||
self.normalizer = Normalizer()
|
#self.normalizer = Normalizer()
|
||||||
self.stop_words = self._load_persian_stop_words()
|
self.stop_words = self._load_persian_stop_words()
|
||||||
self.key_words = [
|
self.key_words = [
|
||||||
"خدا", "بنده", "جهاد", "ولی", "زکات",
|
"خدا", "بنده", "جهاد", "ولی", "زکات",
|
||||||
|
@ -206,7 +206,7 @@ class PersianVectorAnalyzer:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Normalize text
|
# Normalize text
|
||||||
text = self.normalizer.normalize(text)
|
#text = self.normalizer.normalize(text)
|
||||||
|
|
||||||
# Remove extra whitespace
|
# Remove extra whitespace
|
||||||
text = re.sub(r'\s+', ' ', text)
|
text = re.sub(r'\s+', ' ', text)
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
6085.11
|
197951.1
|
|
@ -0,0 +1,5 @@
|
||||||
|
|
||||||
|
|
||||||
|
query: برای حمایت از ازدواج جوانان و تشکیل خانواده جهت افزایش جمعیت ، چه مواردی پیش بینی شده است؟
|
||||||
|
error:Error code: 400 - {'error': {'message': 'Developer instruction is not enabled for this model. Please use a different model that supports developer instructions. Please contact support at support@avalai.ir and include the request ID 01998c28-4ccb-7bc3-97a7-0403baa6ed35 in your email if you believe this is an error.', 'type': 'invalid_request', 'param': None, 'code': 'invalid_argument', 'request_id': '01998c28-4ccb-7bc3-97a7-0403baa6ed35'}}
|
||||||
|
-------------------------------
|
|
@ -1,4 +1,4 @@
|
||||||
import hazm
|
#import hazm
|
||||||
from cleantext import clean
|
from cleantext import clean
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ def cleanhtml(raw_html):
|
||||||
cleantext = re.sub(cleanr, '', raw_html)
|
cleantext = re.sub(cleanr, '', raw_html)
|
||||||
return cleantext
|
return cleantext
|
||||||
|
|
||||||
normalizer = hazm.Normalizer()
|
#normalizer = hazm.Normalizer()
|
||||||
wierd_pattern = re.compile("["
|
wierd_pattern = re.compile("["
|
||||||
u"\U0001F600-\U0001F64F" # emoticons
|
u"\U0001F600-\U0001F64F" # emoticons
|
||||||
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
||||||
|
@ -64,7 +64,7 @@ def cleaning(text):
|
||||||
text = cleanhtml(text)
|
text = cleanhtml(text)
|
||||||
|
|
||||||
# normalizing
|
# normalizing
|
||||||
text = normalizer.normalize(text)
|
#text = normalizer.normalize(text)
|
||||||
|
|
||||||
# removing wierd patterns
|
# removing wierd patterns
|
||||||
text = wierd_pattern.sub(r'', text)
|
text = wierd_pattern.sub(r'', text)
|
||||||
|
|
1
run_docker.bash
Normal file
1
run_docker.bash
Normal file
|
@ -0,0 +1 @@
|
||||||
|
sudo docker run --name qachat -p 80:80 -v /home/sabr/rag_qavanin_api/:/src/app/ -v /home/sabr/rag_qavanin_api/qavanin-faiss/:/src/app/qavanin-faiss/ -v /home/sabr/rag_qavanin_api/llm-answer/:/src/app/llm-answer/ -v /home/sabr/MODELS:/src/MODELS -it --restart unless-stopped qachat:1.0.0
|
Loading…
Reference in New Issue
Block a user