data model latest updates

This commit is contained in:
ajokar 2026-02-23 11:49:43 +00:00
parent dc2bc292b0
commit 700571aa6a
3 changed files with 309 additions and 134 deletions

View File

@ -1,3 +1,4 @@
from datetime import datetime
import sqlite3 import sqlite3
import json import json
@ -80,11 +81,19 @@ def get_all_data():
conn = sqlite3.connect('./db/nahj.db') conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor() cursor = conn.cursor()
# اجرای کوئری برای دریافت همه داده‌ها
cursor.execute('SELECT * FROM speeches') cursor.execute('SELECT * FROM speeches')
result = cursor.fetchall() result = cursor.fetchall()
# دریافت نام ستون‌ها
columns = [desc[0] for desc in cursor.description]
conn.close() conn.close()
return result
# تبدیل داده‌ها به دیکشنری (کلید-مقدار)
data_as_dict = [dict(zip(columns, row)) for row in result]
return data_as_dict
# متد ایجاد جدول چت # متد ایجاد جدول چت
def create_chat_table(): def create_chat_table():
@ -201,30 +210,207 @@ def get_chats_by_user_id(user_id):
return chats return chats
def insert_error(query, error_message):
conn = sqlite3.connect('./db/nahj.db') # اتصال به دیتابیس
cursor = conn.cursor()
try:
# درج داده در جدول `error`
cursor.execute('''
INSERT INTO error (query, error_message)
VALUES (?, ?)
''', (query, error_message))
conn.commit() # ذخیره تغییرات
print("Error inserted successfully.")
except Exception as e:
print(f"Failed to insert error: {e}")
finally:
conn.close() # بستن اتصال
# اضافه کردن یک رکورد جدید
def add_credit(remained_credit, date=None):
conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor()
if date is None:
date = datetime.now().strftime('%Y-%m-%d')
try:
cursor.execute('''
INSERT INTO credit (remained_credit, date)
VALUES (?, ?)
''', (remained_credit, date))
conn.commit()
print("Record added successfully.")
except Exception as e:
print(f"Failed to add record: {e}")
finally:
conn.close()
# بازخوانی آخرین رکورد
def get_last_credit():
conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor()
try:
cursor.execute('''
SELECT * FROM credit
ORDER BY id DESC
LIMIT 1
''')
last_record = cursor.fetchone()
conn.close()
if last_record:
return {"id": last_record[0], "remained_credit": last_record[1], "date": last_record[2]}
else:
return None
except Exception as e:
print(f"Failed to fetch last record: {e}")
conn.close()
return None
def insert_request(update_id, username=None, text=None, answer=None, message_id=None, user_id=None, is_bot=None, date_value=None, chat_id=None, req_type=None, first_name=None, last_name=None, is_active=None):
conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor()
if date_value:
date_value = datetime.fromtimestamp(date_value).strftime('%Y-%m-%d %H:%M:%S')
try:
# درج رکورد جدید در جدول `requests`
cursor.execute('''
INSERT INTO requests (update_id, username, text, answer, message_id, user_id, is_bot, date, chat_id, type, first_name, last_name, is_active)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (update_id, username, text, answer, message_id, user_id, is_bot, date_value, chat_id, req_type, first_name, last_name, is_active))
conn.commit()
# print("Record inserted successfully.")
except Exception as e:
print(f"Failed to insert record: {e}")
finally:
conn.close()
def update_request(update_id, username=None, text=None, answer=None, message_id=None, user_id=None, is_bot=None,
date_value=None, chat_id=None, req_type=None, first_name=None, last_name=None, is_active=None):
conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor()
try:
# لیست ستون‌هایی که به‌روزرسانی می‌شوند
update_fields = []
update_values = []
if username is not None:
update_fields.append("username = ?")
update_values.append(username)
if text is not None:
update_fields.append("text = ?")
update_values.append(text)
if answer is not None:
update_fields.append("answer = ?")
update_values.append(answer)
if message_id is not None:
update_fields.append("message_id = ?")
update_values.append(message_id)
if user_id is not None:
update_fields.append("user_id = ?")
update_values.append(user_id)
if is_bot is not None:
update_fields.append("is_bot = ?")
update_values.append(is_bot)
if date_value is not None:
update_fields.append("date = ?")
update_values.append(date_value)
if chat_id is not None:
update_fields.append("chat_id = ?")
update_values.append(chat_id)
if req_type is not None:
update_fields.append("type = ?")
update_values.append(req_type)
if first_name is not None:
update_fields.append("first_name = ?")
update_values.append(first_name)
if last_name is not None:
update_fields.append("last_name = ?")
update_values.append(last_name)
if is_active is not None:
update_fields.append("is_active = ?")
update_values.append(is_active)
# افزودن مقدار `update_id` برای شرط WHERE
update_values.append(update_id)
# اجرای کوئری آپدیت
cursor.execute(f'''
UPDATE requests
SET {', '.join(update_fields)}
WHERE update_id = ?
''', update_values)
conn.commit()
# print("Record updated successfully.")
except Exception as e:
print(f"Failed to update record: {e}")
finally:
conn.close()
def get_last_request():
conn = sqlite3.connect('./db/nahj.db')
cursor = conn.cursor()
try:
# بازخوانی آخرین رکورد بر اساس بیشترین مقدار update_id
cursor.execute('''
SELECT * FROM requests
ORDER BY update_id DESC
LIMIT 1
''')
last_record = cursor.fetchone()
conn.close()
if last_record:
return {
"update_id": last_record[0],
"username": last_record[1],
"text": last_record[2],
"answer": last_record[3],
"message_id": last_record[4],
"user_id": last_record[5],
"is_bot": last_record[6],
"date": last_record[7],
"chat_id": last_record[8],
"type": last_record[9],
"first_name": last_record[10],
"last_name": last_record[11],
"is_active": last_record[12]
}
else:
return None # اگر جدولی خالی باشد
except Exception as e:
print(f"Failed to fetch last record: {e}")
conn.close()
return None
def create_tables(): def create_tables():
create_speechs_table() create_speechs_table()
create_chat_table() create_chat_table()
# مثال استفاده # مثال استفاده
if __name__ == "__main__": if __name__ == "__main__":
pass
# ایجاد جدول (فقط بار اول اجرا می‌شود) # ایجاد جدول (فقط بار اول اجرا می‌شود)
#create_tables() #create_tables()
# درج داده‌ها # درج داده‌ها
with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file: # with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file:
data = json.load(file) # data = json.load(file)
insert_data(data) # insert_data(data)
# خواندن داده بر اساس id
record = get_data_by_id("wisdom774")
print("Record by ID:", record)
# خواندن داده بر اساس part_id
records_by_part = get_data_by_part_id("sp13")
print("Records by Part ID:", records_by_part)
# خواندن داده بر اساس context_id
records_by_context = get_data_by_context_id("sn1")
print("Records by Context ID:", records_by_context)

View File

@ -6,6 +6,7 @@ import logging
# import uvicorn # import uvicorn
import time import time
import nahj_engine as nahj_chat import nahj_engine as nahj_chat
import data_model as dm
# =========================== # ===========================
# پیکربندی اولیه # پیکربندی اولیه
# =========================== # ===========================
@ -62,33 +63,30 @@ class BaleBot:
return None return None
async def get_latest_req_id(self): async def get_latest_req_id(self):
latest_req_id = 0
# should write in DATABASE
with open('./bale_bot/requests.json', 'r', encoding='utf-8') as file:
prev_reqs = json.load(file)
if prev_reqs: latest_request = dm.get_last_request()
latest_req_id = prev_reqs[-1]['update_id'] latest_req_id = latest_request['update_id']
if not latest_req_id:
latest_req_id = 0
return latest_req_id + 1 return latest_req_id + 1
async def save_entery(self, update): async def save_entery(self, update_item):
all_reqs = [] is_active = True
prev_reqs = [] answer = ''
message = update_item['message']
fromm = message['from']
chat = message['chat']
try: try:
# should read from DATABASE dm.insert_request(update_item['update_id'],fromm['username'],message['text'], answer, message['message_id'],fromm['id'],fromm['is_bot'],message['date'],chat['id'],chat['type'],fromm['first_name'],fromm['last_name'], is_active)
with open('./bale_bot/requests.json', 'r', encoding='utf-8') as file:
prev_reqs = json.load(file)
all_reqs.extend(prev_reqs)
except: except:
prev_reqs = [] return update_item['update_id']
all_reqs.extend(update)
# should write in DATABASE return update_item['update_id']
with open('./bale_bot/requests.json', 'w', encoding='utf-8') as file:
data = json.dumps(all_reqs, ensure_ascii=False, indent=2) async def update_request(self, update_id, answer):
file.write(data) dm.update_request(update_id= update_id, answer= answer)
return True
async def split_text_into_chunks(self, text, max_length=4000): async def split_text_into_chunks(self, text, max_length=4000):
""" """
@ -270,10 +268,10 @@ class BaleBot:
reply_chuncs = await self.split_text_into_chunks(reply) reply_chuncs = await self.split_text_into_chunks(reply)
for i, paragraph in enumerate(reply_chuncs): for i, paragraph in enumerate(reply_chuncs):
resultt = await self.send_message(chat_id, paragraph, keyboard) await self.send_message(chat_id, paragraph, keyboard)
# print(f"sent to bale server result {i+1}: {resultt}")
await self.save_chat_data(text, reply, first_name, username) # await self.save_chat_data(text, reply, first_name, username)
return reply
async def main(): async def main():
@ -285,10 +283,12 @@ async def main():
# print(f"reading bale-bot server. len update: {len(update)}") # print(f"reading bale-bot server. len update: {len(update)}")
if update: if update:
print(f'{len(update)} requests recognized!') print(f'{len(update)} requests recognized!')
await bale_bot.save_entery(update)
for i, item in enumerate(update, 1): for i, item in enumerate(update, 1):
print(f'handle input {i}/{len(update)}') print(f'handle input {i}/{len(update)}')
await bale_bot.handle_update(item) update_id = await bale_bot.save_entery(item)
answer = await bale_bot.handle_update(item)
if answer:
await bale_bot.update_request(update_id, answer)
time.sleep(1) time.sleep(1)
# =========================== # ===========================

View File

@ -28,7 +28,8 @@ today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime
# ------------------- # -------------------
# مدل‌ها و مسیر داده # مدل‌ها و مسیر داده
# ------------------- # -------------------
MODEL_PATH = "/home/sabr/MODELS/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2" # مسیر مدل FastText فارسی # MODEL_PATH = "/home/sabr/MODELS/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2"
MODEL_PATH = "./models/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2"
RERANKER_MODEL = "BAAI/bge-reranker-v2-m3" RERANKER_MODEL = "BAAI/bge-reranker-v2-m3"
# RERANKER_MODEL = "/home/gpu/HFHOME/hub/models/BAAI/bge-reranker-v2-m3" # RERANKER_MODEL = "/home/gpu/HFHOME/hub/models/BAAI/bge-reranker-v2-m3"
FAISS_INDEX_PATH = "./data-faiss/faiss_index_nahj.index" FAISS_INDEX_PATH = "./data-faiss/faiss_index_nahj.index"
@ -40,27 +41,44 @@ def get_key():
key = 'aa-Fu5oeQv8jx8NCWV39WenJ7Yy1mbcFJ4P20CLQURkql2Eleta' # nahj_rag api key key = 'aa-Fu5oeQv8jx8NCWV39WenJ7Yy1mbcFJ4P20CLQURkql2Eleta' # nahj_rag api key
return key return key
def read_records():
metadata = dm.get_all_data()
ids, part_ids, context_ids, large_titles, normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = [], [], [], [], [], [], [], [], [], []
for item in metadata:
ids.append(item['id'])
context_ids.append(item['context_id'])
part_ids.append(item['part_id'])
titles.append(item['title'])
large_titles.append(item['large_title'])
normalized_sentences.append(item['normalized_sentence'])
urls.append(item['url'])
types.append(item['types'])
arabic_texts.append(item['arabic_text'])
Interpretation_links.append(item['interpretation_links'])
# ids.append(item["id"])
# part_ids.append(item["part_id"])
# context_ids.append(item["context_id"])
# large_titles.append(item["large_title"])
# normalized_sentences.append(item["normalized_text"])
# titles.append(item["title"])
# urls.append(item["url"])
# arabic_texts.append(item["arabic_text"])
# Interpretation_links.append(item["Interpretation_link"])
# types.append(item["type"])
return ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types
def load_faiss_index(index_path: str, metadata_path: str): def load_faiss_index(index_path: str, metadata_path: str):
index = faiss.read_index(index_path) index = faiss.read_index(index_path)
# with open(metadata_path, "r", encoding="utf-8") as file:
# metadata = json.load(file)
metadata = dm.get_all_data()
ids, part_ids, context_ids, large_titles, normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = [], [], [], [], [], [], [], [], [], [], [] ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = read_records()
for item in metadata:
ids.append(item["id"])
part_ids.append(item["part_id"])
context_ids.append(item["context_id"])
large_titles.append(item["large_title"])
normalized_sentences.append(item["normalized_text"])
titles.append(item["title"])
urls.append(item["url"])
arabic_texts.append(item["arabic_text"])
Interpretation_links.append(item["Interpretation_link"])
types.append(item["type"])
return ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types, index return ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types, index
def get_client(): def get_client():
url = "https://api.avalai.ir/v1" url = "https://api.avalai.ir/v1"
client = OpenAI( client = OpenAI(
@ -72,7 +90,7 @@ def get_client():
def llm_free_request(llm_messages, model="gemini-2.5-flash-lite"): def llm_free_request(llm_messages, model="gemini-2.5-flash-lite"):
print(f'llm_free_request func ...') print(f'llm_free_request func ...')
print(f'using model: {model}') print(f'using model: {model}')
client = get_client()
try: try:
response = client.chat.completions.create( response = client.chat.completions.create(
messages=llm_messages, messages=llm_messages,
@ -81,10 +99,10 @@ def llm_free_request(llm_messages, model="gemini-2.5-flash-lite"):
answer = response.choices[0].message.content answer = response.choices[0].message.content
except Exception as error: except Exception as error:
with open('./nahj-answer/error-in-llm-free-req.txt', mode='a+', encoding='utf-8') as file: dm.insert_error(query= query, error_message= error)
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
file.write(error_message) answer = 'متاسفانه خطایی رخ داده است.'
return 'متاسفانه خطایی رخ داده است.'
return answer return answer
def llm_request(query, model="gemini-2.5-flash-lite"): def llm_request(query, model="gemini-2.5-flash-lite"):
@ -92,7 +110,7 @@ def llm_request(query, model="gemini-2.5-flash-lite"):
if query == '': if query == '':
return 'لطفا متن سوال را وارد نمائید' return 'لطفا متن سوال را وارد نمائید'
determine_refrence = """شناسه هر سخنرانی در ابتدای آن و با فرمت "id: {idvalue}" آمده است...""" # determine_refrence = """شناسه هر سخنرانی در ابتدای آن و با فرمت "id: {idvalue}" آمده است..."""
try: try:
messages.append({"role": "user", "content": query}) messages.append({"role": "user", "content": query})
response = client.chat.completions.create( response = client.chat.completions.create(
@ -103,10 +121,10 @@ def llm_request(query, model="gemini-2.5-flash-lite"):
messages.append({"role": "assistant", "content": answer}) messages.append({"role": "assistant", "content": answer})
except Exception as error: except Exception as error:
# should write in DATABASE # should write in DATABASE
with open('./nahj-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file: dm.insert_error(query= query, error_message= error)
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
file.write(error_message) answer = 'با عرض پوزش؛ متاسفانه خطایی رخ داده است.'
return 'با عرض پوزش؛ متاسفانه خطایی رخ داده است.'
return answer return answer
class QueryAnalysisPipeline: class QueryAnalysisPipeline:
@ -193,28 +211,35 @@ class QueryAnalysisPipeline:
# ----------------------------- # -----------------------------
class HybridRetrieverReranker: class HybridRetrieverReranker:
__slots__ = ( __slots__ = (
"device", "content_list", "ids","context_ids","large_titles","normalized_sentences","titles","urls","arabic_texts","Interpretation_links","types", "N", "embedder", "faiss_index", "device", "ids","part_ids","context_ids","large_titles","sentences","titles","urls","arabic_texts","Interpretation_links","types", "N", "embedder", "faiss_index", "vectorizer", "tfidf_matrix", "tokenizer", "reranker", "dense_alpha"
"vectorizer", "tfidf_matrix", "tokenizer", "reranker", "dense_alpha"
) )
# ids, dates, titles, content_list, urls,
def __init__(self, ids: List[str], dates: List[str], titles: List[str], content_list: List[str],urls: List[str] , faiss_index, # ids, part_ids,context_ids,large_titles,sentences , titles, urls, arabic_texts, Interpretation_links, types,
def __init__(self, ids: List[str], part_ids: List[str], context_ids: List[str], large_titles: List[str], sentences: List[str], titles: List[str], urls: List[str], arabic_texts: List[str], Interpretation_links: List[str], types: List[str], faiss_index,
dense_alpha: float = 0.6, device: str = None): dense_alpha: float = 0.6, device: str = None):
if device is None: if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = device self.device = device
self.content_list = content_list
self.ids = ids self.ids = ids
self.dates = dates self.part_ids = part_ids
self.context_ids = context_ids
self.large_titles = large_titles
self.sentences = sentences
self.titles = titles self.titles = titles
self.urls = urls self.urls = urls
self.arabic_texts = arabic_texts
self.Interpretation_links = Interpretation_links
self.types = types
self.faiss_index = faiss_index self.faiss_index = faiss_index
self.N = len(content_list) self.N = len(sentences)
# --- Dense Embedder --- # --- Dense Embedder ---
print("Loading SentenceTransformer model ...") print("Loading SentenceTransformer model ...")
self.embedder = SentenceTransformer(MODEL_PATH, device=self.device) self.embedder = SentenceTransformer(MODEL_PATH, device=self.device)
# embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device= self.device)
# embedder.save(MODEL_PATH)
# --- Sparse (TF-IDF) --- # --- Sparse (TF-IDF) ---
self.vectorizer = TfidfVectorizer( self.vectorizer = TfidfVectorizer(
@ -222,7 +247,7 @@ class HybridRetrieverReranker:
ngram_range=(1, 2), ngram_range=(1, 2),
token_pattern=r"(?u)\b[\w\u0600-\u06FF]{2,}\b", token_pattern=r"(?u)\b[\w\u0600-\u06FF]{2,}\b",
) )
self.tfidf_matrix = self.vectorizer.fit_transform(self.content_list) self.tfidf_matrix = self.vectorizer.fit_transform(self.sentences)
# --- Reranker --- # --- Reranker ---
# self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL, use_fast=True, local_files_only= True) # self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL, use_fast=True, local_files_only= True)
@ -334,8 +359,8 @@ class HybridRetrieverReranker:
)[:final_k] )[:final_k]
return reranked return reranked
def get_passages(self, cand_idx, content_list): def get_passages(self, cand_idx, sentences_list):
passages = [content_list[idx] for idx in cand_idx] passages = [sentences_list[idx] for idx in cand_idx]
return passages return passages
# --- Search --- # --- Search ---
@ -351,29 +376,22 @@ class HybridRetrieverReranker:
# print(item) # print(item)
pre_rerank_k = final_k pre_rerank_k = final_k
cand_idx = self.fuse(d_idx, d_scores, s_idx, s_scores, pre_rerank_k) cand_idx = self.fuse(d_idx, d_scores, s_idx, s_scores, pre_rerank_k)
# for item in d_scores:
# print(item)
# print('&&&&&&&&&&&&&&&&&&&&&&&&&')
passages = self.get_passages(cand_idx, sentence_list)
# for itemm in s_scores:
# print(itemm)
# print('&&&&&&&&&&&&&&&&&&&&&&&&&') # passages = self.get_passages(cand_idx, sentence_list)
# print('&&&&&&&&&&&&&&&&&&&&&&&&&')
# reranked = self.rerank(query, cand_idx, passages, final_k) # reranked = self.rerank(query, cand_idx, passages, final_k)
# return [ # return [
# {"idx": i, "content": self.sentence_list[i], "rerank_score": score} # {"idx": i, "content": self.sentence_list[i], "rerank_score": score}
# for i, score in reranked # for i, score in reranked
# ] # ]
with open("./data/passages.json", 'w',encoding='utf-8') as file:
json.dump(passages, file, ensure_ascii=False, indent=4)
# return [ # return [
# {"idx": i, "content": self.sentence_list[i], "rerank_score": score} # {"idx": i, "content": self.sentence_list[i], "rerank_score": score}
# for i, score in reranked # for i, score in reranked
# ] # ]
return [ return [
{"idx": i, "content": self.sentence_list[i]} {"idx": i, "content": self.sentences[i]}
for i in cand_idx for i in cand_idx
] ]
@ -381,7 +399,7 @@ def single_query(query: str):
query = cleaning(query) query = cleaning(query)
retrived_sections = pipe.search(query, sentences, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=17) retrived_sections = pipe.search(query, sentences, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=10)
retrived_sections_list = [] retrived_sections_list = []
final_similars_text = '' final_similars_text = ''
@ -399,7 +417,7 @@ def single_query(query: str):
final_similars_text += ''.join(result) final_similars_text += ''.join(result)
id_list += f"{ids[row['idx']]}\n" id_list += f"{ids[row['idx']]}\n"
retrived_sections_list.append(row) retrived_sections_list.append(row)
return idlist, final_similars_text, retrived_sections_list return id_list, final_similars_text, retrived_sections_list
def find_refrences(llm_answer: str) -> List[str]: def find_refrences(llm_answer: str) -> List[str]:
""" """
@ -451,7 +469,7 @@ client = get_client()
models = [ "gemini-2.5-flash-lite", "gpt-4o-mini","deepseek-reasoner"] models = [ "gemini-2.5-flash-lite", "gpt-4o-mini","deepseek-reasoner"]
def save_result(chat_obj: object) -> bool: def save_result(chat_obj: object) -> bool:
# index result in elastic # index result in DATABASE
pass pass
def run_chatbot(query:str, chat_id:str): def run_chatbot(query:str, chat_id:str):
@ -532,7 +550,7 @@ def run_chatbot(query:str, chat_id:str):
'status' : True, # or False # bool 'status' : True, # or False # bool
} }
save_result(chat_obj) # save_result(chat_obj)
status_text ='پاسخ با موفقیت ایجاد شد' status_text ='پاسخ با موفقیت ایجاد شد'
return chat_obj, status_text return chat_obj, status_text
@ -547,11 +565,11 @@ def credit_refresh():
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer {get_key()}" "Authorization": f"Bearer {get_key()}"
} }
remained_credit = requests.get(url, headers=headers) remained_credit = str((requests.get(url, headers=headers)).json()['remaining_irt'])
with open('./llm-answer/credit.txt','w') as file: dm.add_credit(remained_credit= remained_credit)
file.write(str(remained_credit.json()['remaining_irt']))
return str(remained_credit.json()['remaining_irt']) return remained_credit
# تعریف مدل داده‌ها برای درخواست‌های API # تعریف مدل داده‌ها برای درخواست‌های API
# class Query(BaseModel): # class Query(BaseModel):
# query: str # query: str
@ -559,7 +577,7 @@ def credit_refresh():
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-') date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
chat_id = f'{date}-{random.randint(100000, 999999)}' chat_id = f'{date}-{random.randint(100000, 999999)}'
print('#'*29) print('#'*29)
print(' - NAHJ ENGINE IS READY! - ') print(' - NAHJ ENGINE IS READY! - ')
print('#'*29) print('#'*29)
# مسیر API برای اجرا کردن run_chatbot # مسیر API برای اجرا کردن run_chatbot
# @chatbot.post("/run_chatbot") # @chatbot.post("/run_chatbot")
@ -600,16 +618,15 @@ def get_passages_by_paragraphs(retrived_sections_list):
""" """
بازسازی متن های مشابه بر اساس پاراگراف آنها بازسازی متن های مشابه بر اساس پاراگراف آنها
""" """
with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file: data = dm.get_all_data()
data = json.load(file)
final_passages = '' final_passages = ''
for item in retrived_sections_list: for item in retrived_sections_list:
title = item['large_title']
filtered_data = {} filtered_data = {}
for row in data: for row in data:
if row['part_id'] == item['part_id']: if row['part_id'] == item['part_id']:
filtered_data[row['id']] = row filtered_data[row['id']] = row
title = row['large_title']
# مرتب سازی بر اساس ترتیب جمله در پاراگراف # مرتب سازی بر اساس ترتیب جمله در پاراگراف
sorted_data = dict(sorted(filtered_data.items())) sorted_data = dict(sorted(filtered_data.items()))
@ -621,7 +638,7 @@ def get_passages_by_paragraphs(retrived_sections_list):
final_passages += ''.join(f'{paragraph.strip()}\n\n') final_passages += ''.join(f'{paragraph.strip()}\n\n')
return final_passages return final_passages
pass
def bale_search(query): def bale_search(query):
start = datetime.datetime.now() start = datetime.datetime.now()
@ -634,20 +651,6 @@ def bale_search(query):
# پاسخ حداکثر 300 کلمه باشد. # پاسخ حداکثر 300 کلمه باشد.
refrences = '' refrences = ''
# recognized_refrences = find_refrences(llm_answer)
# llm_answer = replace_refrences(llm_answer, recognized_refrences)
# with open('./nahj-answer/result-khamenei.txt', mode='a+', encoding='utf-8') as file:
# result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
# file.write(result_message)
# with open('./nahj-answer/passages-khamenei.txt', mode='a+', encoding='utf-8') as file:
# result_message = f'متن پرامپت: {query.strip()}\nمواد مشابه: {result_passages} \n----------------------------------------------------------\n'
# file.write(result_message)
# with open('./nahj-answer/chat-leader.txt', mode='w', encoding='utf-8') as file:
# result_message = f'{query.strip()}\n\nپاسخ:\n {llm_answer} \n----------------------------------------------------------\n'
# file.write(result_message)
print('---------------------------------------------') print('---------------------------------------------')
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}') print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')
@ -672,20 +675,6 @@ def bale_chat(query):
print(f'llm duration: {(datetime.datetime.now() - end_retrive).total_seconds()}') print(f'llm duration: {(datetime.datetime.now() - end_retrive).total_seconds()}')
refrences = '' refrences = ''
# recognized_refrences = find_refrences(llm_answer)
# llm_answer = replace_refrences(llm_answer, recognized_refrences)
# with open('./nahj-answer/result-khamenei.txt', mode='a+', encoding='utf-8') as file:
# result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
# file.write(result_message)
# with open('./nahj-answer/passages-khamenei.txt', mode='a+', encoding='utf-8') as file:
# result_message = f'متن پرامپت: {query.strip()}\nمواد مشابه: {result_passages} \n----------------------------------------------------------\n'
# file.write(result_message)
# with open('./nahj-answer/chat-leader.txt', mode='w', encoding='utf-8') as file:
# result_message = f'{query.strip()}\n\nپاسخ:\n {llm_answer} \n----------------------------------------------------------\n'
# file.write(result_message)
print('---------------------------------------------') print('---------------------------------------------')
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}') print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')