data model latest updates
This commit is contained in:
parent
dc2bc292b0
commit
700571aa6a
218
data_model.py
218
data_model.py
|
|
@ -1,3 +1,4 @@
|
|||
from datetime import datetime
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
|
|
@ -80,11 +81,19 @@ def get_all_data():
|
|||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# اجرای کوئری برای دریافت همه دادهها
|
||||
cursor.execute('SELECT * FROM speeches')
|
||||
result = cursor.fetchall()
|
||||
|
||||
# دریافت نام ستونها
|
||||
columns = [desc[0] for desc in cursor.description]
|
||||
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
# تبدیل دادهها به دیکشنری (کلید-مقدار)
|
||||
data_as_dict = [dict(zip(columns, row)) for row in result]
|
||||
|
||||
return data_as_dict
|
||||
|
||||
# متد ایجاد جدول چت
|
||||
def create_chat_table():
|
||||
|
|
@ -201,30 +210,207 @@ def get_chats_by_user_id(user_id):
|
|||
|
||||
return chats
|
||||
|
||||
def insert_error(query, error_message):
|
||||
conn = sqlite3.connect('./db/nahj.db') # اتصال به دیتابیس
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# درج داده در جدول `error`
|
||||
cursor.execute('''
|
||||
INSERT INTO error (query, error_message)
|
||||
VALUES (?, ?)
|
||||
''', (query, error_message))
|
||||
|
||||
conn.commit() # ذخیره تغییرات
|
||||
print("Error inserted successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to insert error: {e}")
|
||||
finally:
|
||||
conn.close() # بستن اتصال
|
||||
|
||||
|
||||
# اضافه کردن یک رکورد جدید
|
||||
def add_credit(remained_credit, date=None):
|
||||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
if date is None:
|
||||
date = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO credit (remained_credit, date)
|
||||
VALUES (?, ?)
|
||||
''', (remained_credit, date))
|
||||
|
||||
conn.commit()
|
||||
print("Record added successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to add record: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# بازخوانی آخرین رکورد
|
||||
def get_last_credit():
|
||||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute('''
|
||||
SELECT * FROM credit
|
||||
ORDER BY id DESC
|
||||
LIMIT 1
|
||||
''')
|
||||
last_record = cursor.fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
if last_record:
|
||||
return {"id": last_record[0], "remained_credit": last_record[1], "date": last_record[2]}
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Failed to fetch last record: {e}")
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
def insert_request(update_id, username=None, text=None, answer=None, message_id=None, user_id=None, is_bot=None, date_value=None, chat_id=None, req_type=None, first_name=None, last_name=None, is_active=None):
|
||||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
if date_value:
|
||||
date_value = datetime.fromtimestamp(date_value).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
try:
|
||||
# درج رکورد جدید در جدول `requests`
|
||||
cursor.execute('''
|
||||
INSERT INTO requests (update_id, username, text, answer, message_id, user_id, is_bot, date, chat_id, type, first_name, last_name, is_active)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (update_id, username, text, answer, message_id, user_id, is_bot, date_value, chat_id, req_type, first_name, last_name, is_active))
|
||||
|
||||
conn.commit()
|
||||
# print("Record inserted successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to insert record: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def update_request(update_id, username=None, text=None, answer=None, message_id=None, user_id=None, is_bot=None,
|
||||
date_value=None, chat_id=None, req_type=None, first_name=None, last_name=None, is_active=None):
|
||||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# لیست ستونهایی که بهروزرسانی میشوند
|
||||
update_fields = []
|
||||
update_values = []
|
||||
|
||||
if username is not None:
|
||||
update_fields.append("username = ?")
|
||||
update_values.append(username)
|
||||
if text is not None:
|
||||
update_fields.append("text = ?")
|
||||
update_values.append(text)
|
||||
if answer is not None:
|
||||
update_fields.append("answer = ?")
|
||||
update_values.append(answer)
|
||||
if message_id is not None:
|
||||
update_fields.append("message_id = ?")
|
||||
update_values.append(message_id)
|
||||
if user_id is not None:
|
||||
update_fields.append("user_id = ?")
|
||||
update_values.append(user_id)
|
||||
if is_bot is not None:
|
||||
update_fields.append("is_bot = ?")
|
||||
update_values.append(is_bot)
|
||||
if date_value is not None:
|
||||
update_fields.append("date = ?")
|
||||
update_values.append(date_value)
|
||||
if chat_id is not None:
|
||||
update_fields.append("chat_id = ?")
|
||||
update_values.append(chat_id)
|
||||
if req_type is not None:
|
||||
update_fields.append("type = ?")
|
||||
update_values.append(req_type)
|
||||
if first_name is not None:
|
||||
update_fields.append("first_name = ?")
|
||||
update_values.append(first_name)
|
||||
if last_name is not None:
|
||||
update_fields.append("last_name = ?")
|
||||
update_values.append(last_name)
|
||||
if is_active is not None:
|
||||
update_fields.append("is_active = ?")
|
||||
update_values.append(is_active)
|
||||
|
||||
# افزودن مقدار `update_id` برای شرط WHERE
|
||||
update_values.append(update_id)
|
||||
|
||||
# اجرای کوئری آپدیت
|
||||
cursor.execute(f'''
|
||||
UPDATE requests
|
||||
SET {', '.join(update_fields)}
|
||||
WHERE update_id = ?
|
||||
''', update_values)
|
||||
|
||||
conn.commit()
|
||||
# print("Record updated successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to update record: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_last_request():
|
||||
conn = sqlite3.connect('./db/nahj.db')
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# بازخوانی آخرین رکورد بر اساس بیشترین مقدار update_id
|
||||
cursor.execute('''
|
||||
SELECT * FROM requests
|
||||
ORDER BY update_id DESC
|
||||
LIMIT 1
|
||||
''')
|
||||
last_record = cursor.fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
if last_record:
|
||||
return {
|
||||
"update_id": last_record[0],
|
||||
"username": last_record[1],
|
||||
"text": last_record[2],
|
||||
"answer": last_record[3],
|
||||
"message_id": last_record[4],
|
||||
"user_id": last_record[5],
|
||||
"is_bot": last_record[6],
|
||||
"date": last_record[7],
|
||||
"chat_id": last_record[8],
|
||||
"type": last_record[9],
|
||||
"first_name": last_record[10],
|
||||
"last_name": last_record[11],
|
||||
"is_active": last_record[12]
|
||||
}
|
||||
else:
|
||||
return None # اگر جدولی خالی باشد
|
||||
except Exception as e:
|
||||
print(f"Failed to fetch last record: {e}")
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
|
||||
def create_tables():
|
||||
create_speechs_table()
|
||||
create_chat_table()
|
||||
|
||||
# مثال استفاده
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
# ایجاد جدول (فقط بار اول اجرا میشود)
|
||||
#create_tables()
|
||||
|
||||
|
||||
# درج دادهها
|
||||
with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
# with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file:
|
||||
# data = json.load(file)
|
||||
|
||||
insert_data(data)
|
||||
|
||||
# خواندن داده بر اساس id
|
||||
record = get_data_by_id("wisdom774")
|
||||
print("Record by ID:", record)
|
||||
|
||||
# خواندن داده بر اساس part_id
|
||||
records_by_part = get_data_by_part_id("sp13")
|
||||
print("Records by Part ID:", records_by_part)
|
||||
|
||||
# خواندن داده بر اساس context_id
|
||||
records_by_context = get_data_by_context_id("sn1")
|
||||
print("Records by Context ID:", records_by_context)
|
||||
# insert_data(data)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import logging
|
|||
# import uvicorn
|
||||
import time
|
||||
import nahj_engine as nahj_chat
|
||||
import data_model as dm
|
||||
# ===========================
|
||||
# پیکربندی اولیه
|
||||
# ===========================
|
||||
|
|
@ -62,33 +63,30 @@ class BaleBot:
|
|||
return None
|
||||
|
||||
async def get_latest_req_id(self):
|
||||
latest_req_id = 0
|
||||
# should write in DATABASE
|
||||
with open('./bale_bot/requests.json', 'r', encoding='utf-8') as file:
|
||||
prev_reqs = json.load(file)
|
||||
|
||||
if prev_reqs:
|
||||
latest_req_id = prev_reqs[-1]['update_id']
|
||||
latest_request = dm.get_last_request()
|
||||
latest_req_id = latest_request['update_id']
|
||||
if not latest_req_id:
|
||||
latest_req_id = 0
|
||||
|
||||
return latest_req_id + 1
|
||||
|
||||
async def save_entery(self, update):
|
||||
all_reqs = []
|
||||
prev_reqs = []
|
||||
async def save_entery(self, update_item):
|
||||
is_active = True
|
||||
answer = ''
|
||||
message = update_item['message']
|
||||
fromm = message['from']
|
||||
chat = message['chat']
|
||||
try:
|
||||
# should read from DATABASE
|
||||
with open('./bale_bot/requests.json', 'r', encoding='utf-8') as file:
|
||||
prev_reqs = json.load(file)
|
||||
all_reqs.extend(prev_reqs)
|
||||
dm.insert_request(update_item['update_id'],fromm['username'],message['text'], answer, message['message_id'],fromm['id'],fromm['is_bot'],message['date'],chat['id'],chat['type'],fromm['first_name'],fromm['last_name'], is_active)
|
||||
except:
|
||||
prev_reqs = []
|
||||
all_reqs.extend(update)
|
||||
# should write in DATABASE
|
||||
with open('./bale_bot/requests.json', 'w', encoding='utf-8') as file:
|
||||
data = json.dumps(all_reqs, ensure_ascii=False, indent=2)
|
||||
file.write(data)
|
||||
|
||||
return True
|
||||
return update_item['update_id']
|
||||
|
||||
return update_item['update_id']
|
||||
|
||||
async def update_request(self, update_id, answer):
|
||||
dm.update_request(update_id= update_id, answer= answer)
|
||||
|
||||
|
||||
async def split_text_into_chunks(self, text, max_length=4000):
|
||||
"""
|
||||
|
|
@ -270,10 +268,10 @@ class BaleBot:
|
|||
reply_chuncs = await self.split_text_into_chunks(reply)
|
||||
|
||||
for i, paragraph in enumerate(reply_chuncs):
|
||||
resultt = await self.send_message(chat_id, paragraph, keyboard)
|
||||
# print(f"sent to bale server result {i+1}: {resultt}")
|
||||
await self.send_message(chat_id, paragraph, keyboard)
|
||||
|
||||
await self.save_chat_data(text, reply, first_name, username)
|
||||
# await self.save_chat_data(text, reply, first_name, username)
|
||||
return reply
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
@ -285,10 +283,12 @@ async def main():
|
|||
# print(f"reading bale-bot server. len update: {len(update)}")
|
||||
if update:
|
||||
print(f'{len(update)} requests recognized!')
|
||||
await bale_bot.save_entery(update)
|
||||
for i, item in enumerate(update, 1):
|
||||
print(f'handle input {i}/{len(update)}')
|
||||
await bale_bot.handle_update(item)
|
||||
update_id = await bale_bot.save_entery(item)
|
||||
answer = await bale_bot.handle_update(item)
|
||||
if answer:
|
||||
await bale_bot.update_request(update_id, answer)
|
||||
time.sleep(1)
|
||||
|
||||
# ===========================
|
||||
|
|
|
|||
173
nahj_engine.py
173
nahj_engine.py
|
|
@ -28,7 +28,8 @@ today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime
|
|||
# -------------------
|
||||
# مدلها و مسیر داده
|
||||
# -------------------
|
||||
MODEL_PATH = "/home/sabr/MODELS/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2" # مسیر مدل FastText فارسی
|
||||
# MODEL_PATH = "/home/sabr/MODELS/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2"
|
||||
MODEL_PATH = "./models/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2"
|
||||
RERANKER_MODEL = "BAAI/bge-reranker-v2-m3"
|
||||
# RERANKER_MODEL = "/home/gpu/HFHOME/hub/models/BAAI/bge-reranker-v2-m3"
|
||||
FAISS_INDEX_PATH = "./data-faiss/faiss_index_nahj.index"
|
||||
|
|
@ -40,26 +41,43 @@ def get_key():
|
|||
key = 'aa-Fu5oeQv8jx8NCWV39WenJ7Yy1mbcFJ4P20CLQURkql2Eleta' # nahj_rag api key
|
||||
return key
|
||||
|
||||
def read_records():
|
||||
metadata = dm.get_all_data()
|
||||
ids, part_ids, context_ids, large_titles, normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = [], [], [], [], [], [], [], [], [], []
|
||||
for item in metadata:
|
||||
|
||||
ids.append(item['id'])
|
||||
context_ids.append(item['context_id'])
|
||||
part_ids.append(item['part_id'])
|
||||
titles.append(item['title'])
|
||||
large_titles.append(item['large_title'])
|
||||
normalized_sentences.append(item['normalized_sentence'])
|
||||
urls.append(item['url'])
|
||||
types.append(item['types'])
|
||||
arabic_texts.append(item['arabic_text'])
|
||||
Interpretation_links.append(item['interpretation_links'])
|
||||
|
||||
# ids.append(item["id"])
|
||||
# part_ids.append(item["part_id"])
|
||||
# context_ids.append(item["context_id"])
|
||||
# large_titles.append(item["large_title"])
|
||||
# normalized_sentences.append(item["normalized_text"])
|
||||
# titles.append(item["title"])
|
||||
# urls.append(item["url"])
|
||||
# arabic_texts.append(item["arabic_text"])
|
||||
# Interpretation_links.append(item["Interpretation_link"])
|
||||
# types.append(item["type"])
|
||||
|
||||
return ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types
|
||||
|
||||
def load_faiss_index(index_path: str, metadata_path: str):
|
||||
index = faiss.read_index(index_path)
|
||||
# with open(metadata_path, "r", encoding="utf-8") as file:
|
||||
# metadata = json.load(file)
|
||||
metadata = dm.get_all_data()
|
||||
|
||||
ids, part_ids, context_ids, large_titles, normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = [], [], [], [], [], [], [], [], [], [], []
|
||||
for item in metadata:
|
||||
ids.append(item["id"])
|
||||
part_ids.append(item["part_id"])
|
||||
context_ids.append(item["context_id"])
|
||||
large_titles.append(item["large_title"])
|
||||
normalized_sentences.append(item["normalized_text"])
|
||||
titles.append(item["title"])
|
||||
urls.append(item["url"])
|
||||
arabic_texts.append(item["arabic_text"])
|
||||
Interpretation_links.append(item["Interpretation_link"])
|
||||
types.append(item["type"])
|
||||
|
||||
|
||||
ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types = read_records()
|
||||
|
||||
return ids, part_ids,context_ids,large_titles,normalized_sentences, titles, urls, arabic_texts, Interpretation_links, types, index
|
||||
|
||||
|
||||
|
||||
def get_client():
|
||||
url = "https://api.avalai.ir/v1"
|
||||
|
|
@ -72,7 +90,7 @@ def get_client():
|
|||
def llm_free_request(llm_messages, model="gemini-2.5-flash-lite"):
|
||||
print(f'llm_free_request func ...')
|
||||
print(f'using model: {model}')
|
||||
client = get_client()
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
messages=llm_messages,
|
||||
|
|
@ -81,10 +99,10 @@ def llm_free_request(llm_messages, model="gemini-2.5-flash-lite"):
|
|||
answer = response.choices[0].message.content
|
||||
|
||||
except Exception as error:
|
||||
with open('./nahj-answer/error-in-llm-free-req.txt', mode='a+', encoding='utf-8') as file:
|
||||
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
|
||||
file.write(error_message)
|
||||
return 'متاسفانه خطایی رخ داده است.'
|
||||
dm.insert_error(query= query, error_message= error)
|
||||
|
||||
answer = 'متاسفانه خطایی رخ داده است.'
|
||||
|
||||
return answer
|
||||
|
||||
def llm_request(query, model="gemini-2.5-flash-lite"):
|
||||
|
|
@ -92,7 +110,7 @@ def llm_request(query, model="gemini-2.5-flash-lite"):
|
|||
if query == '':
|
||||
return 'لطفا متن سوال را وارد نمائید'
|
||||
|
||||
determine_refrence = """شناسه هر سخنرانی در ابتدای آن و با فرمت "id: {idvalue}" آمده است..."""
|
||||
# determine_refrence = """شناسه هر سخنرانی در ابتدای آن و با فرمت "id: {idvalue}" آمده است..."""
|
||||
try:
|
||||
messages.append({"role": "user", "content": query})
|
||||
response = client.chat.completions.create(
|
||||
|
|
@ -103,10 +121,10 @@ def llm_request(query, model="gemini-2.5-flash-lite"):
|
|||
messages.append({"role": "assistant", "content": answer})
|
||||
except Exception as error:
|
||||
# should write in DATABASE
|
||||
with open('./nahj-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file:
|
||||
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
|
||||
file.write(error_message)
|
||||
return 'با عرض پوزش؛ متاسفانه خطایی رخ داده است.'
|
||||
dm.insert_error(query= query, error_message= error)
|
||||
|
||||
answer = 'با عرض پوزش؛ متاسفانه خطایی رخ داده است.'
|
||||
|
||||
return answer
|
||||
|
||||
class QueryAnalysisPipeline:
|
||||
|
|
@ -193,36 +211,43 @@ class QueryAnalysisPipeline:
|
|||
# -----------------------------
|
||||
class HybridRetrieverReranker:
|
||||
__slots__ = (
|
||||
"device", "content_list", "ids","context_ids","large_titles","normalized_sentences","titles","urls","arabic_texts","Interpretation_links","types", "N", "embedder", "faiss_index",
|
||||
"vectorizer", "tfidf_matrix", "tokenizer", "reranker", "dense_alpha"
|
||||
"device", "ids","part_ids","context_ids","large_titles","sentences","titles","urls","arabic_texts","Interpretation_links","types", "N", "embedder", "faiss_index", "vectorizer", "tfidf_matrix", "tokenizer", "reranker", "dense_alpha"
|
||||
)
|
||||
# ids, dates, titles, content_list, urls,
|
||||
def __init__(self, ids: List[str], dates: List[str], titles: List[str], content_list: List[str],urls: List[str] , faiss_index,
|
||||
|
||||
# ids, part_ids,context_ids,large_titles,sentences , titles, urls, arabic_texts, Interpretation_links, types,
|
||||
def __init__(self, ids: List[str], part_ids: List[str], context_ids: List[str], large_titles: List[str], sentences: List[str], titles: List[str], urls: List[str], arabic_texts: List[str], Interpretation_links: List[str], types: List[str], faiss_index,
|
||||
dense_alpha: float = 0.6, device: str = None):
|
||||
|
||||
if device is None:
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.device = device
|
||||
|
||||
self.content_list = content_list
|
||||
self.ids = ids
|
||||
self.dates = dates
|
||||
self.part_ids = part_ids
|
||||
self.context_ids = context_ids
|
||||
self.large_titles = large_titles
|
||||
self.sentences = sentences
|
||||
self.titles = titles
|
||||
self.urls = urls
|
||||
self.arabic_texts = arabic_texts
|
||||
self.Interpretation_links = Interpretation_links
|
||||
self.types = types
|
||||
self.faiss_index = faiss_index
|
||||
self.N = len(content_list)
|
||||
self.N = len(sentences)
|
||||
|
||||
# --- Dense Embedder ---
|
||||
print("Loading SentenceTransformer model ...")
|
||||
self.embedder = SentenceTransformer(MODEL_PATH, device=self.device)
|
||||
|
||||
# embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device= self.device)
|
||||
# embedder.save(MODEL_PATH)
|
||||
|
||||
# --- Sparse (TF-IDF) ---
|
||||
self.vectorizer = TfidfVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1, 2),
|
||||
token_pattern=r"(?u)\b[\w\u0600-\u06FF]{2,}\b",
|
||||
)
|
||||
self.tfidf_matrix = self.vectorizer.fit_transform(self.content_list)
|
||||
self.tfidf_matrix = self.vectorizer.fit_transform(self.sentences)
|
||||
|
||||
# --- Reranker ---
|
||||
# self.tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL, use_fast=True, local_files_only= True)
|
||||
|
|
@ -334,8 +359,8 @@ class HybridRetrieverReranker:
|
|||
)[:final_k]
|
||||
return reranked
|
||||
|
||||
def get_passages(self, cand_idx, content_list):
|
||||
passages = [content_list[idx] for idx in cand_idx]
|
||||
def get_passages(self, cand_idx, sentences_list):
|
||||
passages = [sentences_list[idx] for idx in cand_idx]
|
||||
return passages
|
||||
|
||||
# --- Search ---
|
||||
|
|
@ -351,29 +376,22 @@ class HybridRetrieverReranker:
|
|||
# print(item)
|
||||
pre_rerank_k = final_k
|
||||
cand_idx = self.fuse(d_idx, d_scores, s_idx, s_scores, pre_rerank_k)
|
||||
# for item in d_scores:
|
||||
# print(item)
|
||||
# print('&&&&&&&&&&&&&&&&&&&&&&&&&')
|
||||
passages = self.get_passages(cand_idx, sentence_list)
|
||||
# for itemm in s_scores:
|
||||
# print(itemm)
|
||||
|
||||
# print('&&&&&&&&&&&&&&&&&&&&&&&&&')
|
||||
# print('&&&&&&&&&&&&&&&&&&&&&&&&&')
|
||||
|
||||
# passages = self.get_passages(cand_idx, sentence_list)
|
||||
|
||||
# reranked = self.rerank(query, cand_idx, passages, final_k)
|
||||
# return [
|
||||
# {"idx": i, "content": self.sentence_list[i], "rerank_score": score}
|
||||
# for i, score in reranked
|
||||
# ]
|
||||
with open("./data/passages.json", 'w',encoding='utf-8') as file:
|
||||
json.dump(passages, file, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
# return [
|
||||
# {"idx": i, "content": self.sentence_list[i], "rerank_score": score}
|
||||
# for i, score in reranked
|
||||
# ]
|
||||
return [
|
||||
{"idx": i, "content": self.sentence_list[i]}
|
||||
{"idx": i, "content": self.sentences[i]}
|
||||
for i in cand_idx
|
||||
]
|
||||
|
||||
|
|
@ -381,7 +399,7 @@ def single_query(query: str):
|
|||
|
||||
query = cleaning(query)
|
||||
|
||||
retrived_sections = pipe.search(query, sentences, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=17)
|
||||
retrived_sections = pipe.search(query, sentences, topk_dense=100, topk_sparse=100, pre_rerank_k=100, final_k=10)
|
||||
|
||||
retrived_sections_list = []
|
||||
final_similars_text = ''
|
||||
|
|
@ -399,7 +417,7 @@ def single_query(query: str):
|
|||
final_similars_text += ''.join(result)
|
||||
id_list += f"{ids[row['idx']]}\n"
|
||||
retrived_sections_list.append(row)
|
||||
return idlist, final_similars_text, retrived_sections_list
|
||||
return id_list, final_similars_text, retrived_sections_list
|
||||
|
||||
def find_refrences(llm_answer: str) -> List[str]:
|
||||
"""
|
||||
|
|
@ -451,7 +469,7 @@ client = get_client()
|
|||
models = [ "gemini-2.5-flash-lite", "gpt-4o-mini","deepseek-reasoner"]
|
||||
|
||||
def save_result(chat_obj: object) -> bool:
|
||||
# index result in elastic
|
||||
# index result in DATABASE
|
||||
pass
|
||||
|
||||
def run_chatbot(query:str, chat_id:str):
|
||||
|
|
@ -532,7 +550,7 @@ def run_chatbot(query:str, chat_id:str):
|
|||
'status' : True, # or False # bool
|
||||
}
|
||||
|
||||
save_result(chat_obj)
|
||||
# save_result(chat_obj)
|
||||
|
||||
status_text ='پاسخ با موفقیت ایجاد شد'
|
||||
return chat_obj, status_text
|
||||
|
|
@ -547,11 +565,11 @@ def credit_refresh():
|
|||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {get_key()}"
|
||||
}
|
||||
remained_credit = requests.get(url, headers=headers)
|
||||
remained_credit = str((requests.get(url, headers=headers)).json()['remaining_irt'])
|
||||
|
||||
with open('./llm-answer/credit.txt','w') as file:
|
||||
file.write(str(remained_credit.json()['remaining_irt']))
|
||||
return str(remained_credit.json()['remaining_irt'])
|
||||
dm.add_credit(remained_credit= remained_credit)
|
||||
|
||||
return remained_credit
|
||||
# تعریف مدل دادهها برای درخواستهای API
|
||||
# class Query(BaseModel):
|
||||
# query: str
|
||||
|
|
@ -559,7 +577,7 @@ def credit_refresh():
|
|||
date = str((datetime.datetime.now())).replace(' ','-').replace(':','').replace('.','-')
|
||||
chat_id = f'{date}-{random.randint(100000, 999999)}'
|
||||
print('#'*29)
|
||||
print(' - NAHJ ENGINE IS READY! - ')
|
||||
print(' - NAHJ ENGINE IS READY! - ')
|
||||
print('#'*29)
|
||||
# مسیر API برای اجرا کردن run_chatbot
|
||||
# @chatbot.post("/run_chatbot")
|
||||
|
|
@ -600,16 +618,15 @@ def get_passages_by_paragraphs(retrived_sections_list):
|
|||
"""
|
||||
بازسازی متن های مشابه بر اساس پاراگراف آنها
|
||||
"""
|
||||
with open('./data-faiss/faiss_index_nahj_metadata.json', 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
data = dm.get_all_data()
|
||||
|
||||
final_passages = ''
|
||||
for item in retrived_sections_list:
|
||||
title = item['large_title']
|
||||
filtered_data = {}
|
||||
for row in data:
|
||||
if row['part_id'] == item['part_id']:
|
||||
filtered_data[row['id']] = row
|
||||
title = row['large_title']
|
||||
|
||||
# مرتب سازی بر اساس ترتیب جمله در پاراگراف
|
||||
sorted_data = dict(sorted(filtered_data.items()))
|
||||
|
|
@ -621,7 +638,7 @@ def get_passages_by_paragraphs(retrived_sections_list):
|
|||
final_passages += ''.join(f'{paragraph.strip()}\n\n')
|
||||
|
||||
return final_passages
|
||||
pass
|
||||
|
||||
|
||||
def bale_search(query):
|
||||
start = datetime.datetime.now()
|
||||
|
|
@ -634,20 +651,6 @@ def bale_search(query):
|
|||
# پاسخ حداکثر 300 کلمه باشد.
|
||||
|
||||
refrences = ''
|
||||
# recognized_refrences = find_refrences(llm_answer)
|
||||
# llm_answer = replace_refrences(llm_answer, recognized_refrences)
|
||||
|
||||
# with open('./nahj-answer/result-khamenei.txt', mode='a+', encoding='utf-8') as file:
|
||||
# result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
# with open('./nahj-answer/passages-khamenei.txt', mode='a+', encoding='utf-8') as file:
|
||||
# result_message = f'متن پرامپت: {query.strip()}\nمواد مشابه: {result_passages} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
# with open('./nahj-answer/chat-leader.txt', mode='w', encoding='utf-8') as file:
|
||||
# result_message = f'{query.strip()}\n\nپاسخ:\n {llm_answer} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
print('---------------------------------------------')
|
||||
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')
|
||||
|
|
@ -672,20 +675,6 @@ def bale_chat(query):
|
|||
print(f'llm duration: {(datetime.datetime.now() - end_retrive).total_seconds()}')
|
||||
|
||||
refrences = ''
|
||||
# recognized_refrences = find_refrences(llm_answer)
|
||||
# llm_answer = replace_refrences(llm_answer, recognized_refrences)
|
||||
|
||||
# with open('./nahj-answer/result-khamenei.txt', mode='a+', encoding='utf-8') as file:
|
||||
# result_message = f'متن پرامپت: {query.strip()}\n\nپاسخ: {llm_answer} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
# with open('./nahj-answer/passages-khamenei.txt', mode='a+', encoding='utf-8') as file:
|
||||
# result_message = f'متن پرامپت: {query.strip()}\nمواد مشابه: {result_passages} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
# with open('./nahj-answer/chat-leader.txt', mode='w', encoding='utf-8') as file:
|
||||
# result_message = f'{query.strip()}\n\nپاسخ:\n {llm_answer} \n----------------------------------------------------------\n'
|
||||
# file.write(result_message)
|
||||
|
||||
print('---------------------------------------------')
|
||||
print(f'full duration: {(datetime.datetime.now() - start).total_seconds()}')
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user