first-step
This commit is contained in:
commit
4a56846008
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
__pycache__/
|
||||
.env
|
||||
config.env
|
||||
app/lib
|
||||
.venv
|
||||
.idea
|
||||
_temp.py
|
||||
_data_json/
|
||||
__pycache__/*
|
||||
42
bale_buttons.py
Normal file
42
bale_buttons.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
START_BUTTONS_INLINE = [
|
||||
[{"text": "گفتگو", "callback_data": "law_chat"}],
|
||||
[{"text": "جستجو", "callback_data": "law_search"}],
|
||||
[
|
||||
# {"text": "راهنما", "callback_data": "help"},
|
||||
{"text": "درباره ما", "callback_data": "about"},
|
||||
{"text": "تماس با ما", "callback_data": "contact"},
|
||||
],
|
||||
]
|
||||
|
||||
|
||||
BUTTON_TEXT_TO_CALLBACK = {
|
||||
"جستجو": "law_search",
|
||||
"گفتگو": "law_chat",
|
||||
# "راهنما": "help",
|
||||
"درباره ما": "about",
|
||||
"تماس با ما": "contact",
|
||||
}
|
||||
BUTTON_TEXT_TO_CALLBACK_LIST = [
|
||||
[
|
||||
"جستجو",
|
||||
"گفتگو",
|
||||
# "راهنما",
|
||||
"درباره ما",
|
||||
"تماس با ما",
|
||||
],
|
||||
]
|
||||
|
||||
MORE_BUTTON = [[{"text": "🔽 نمایش نتایج بیشتر", "callback_data": f"more"}]]
|
||||
|
||||
|
||||
CHAT_EFFORT_BUTTONS = [
|
||||
[
|
||||
# {"text": "⚡ سریع", "callback_data": "chat_effort_low"},
|
||||
{"text": "🧠 بررسی عمیق تر", "callback_data": "chat_effort_medium"},
|
||||
# {"text": "⚖ نرمال", "callback_data": "chat_effort_medium"},
|
||||
]
|
||||
# [
|
||||
# {"text": "➕ ادامه پاسخ", "callback_data": "chat_more"}
|
||||
# ]
|
||||
]
|
||||
35
bale_massages.py
Normal file
35
bale_massages.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
STARTMESSAGE = """👋 سلام دوست عزیز! 🤗
|
||||
به دستیار هوشمند قانون یار خوش آمدید!
|
||||
فقط کافیه به من بگید چه کمکی از دستم برمیاد!"""
|
||||
|
||||
|
||||
|
||||
ABOUT = """
|
||||
من ربات گفتگوگر حقوقی هستم که روی قوانین رسمی جمهوری اسلامی ایران از سامانه قانونیار مجلس شورای اسلامی توسعه یافتم.
|
||||
لذا به هر سوال و گفتگویی طبق همان منابع پاسخ میدهم
|
||||
نشانی سامانه منبع در زیر آمده است
|
||||
[qanonyar.parliran.ir](https://qanonyar.parliran.ir)
|
||||
|
||||
کارفرما : مرکز فناوری مجلس شورای اسلامی ایران
|
||||
|
||||
"""
|
||||
CONTACT_US = """لطفا برای ارتباط با ما از طریق مرکز فناوری مجلس شورای اسلامی ایران اقدام فرمایید"""
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
JOST_OJO_TEXT = """📚✨ هر پرسش حقوقی که از قوانین کشور دارید یا هر متن حقوقی که براتون مهم هست، همینجا بفرستید تا با استناد به قوانین با شما گفتگو کنم..
|
||||
|
||||
📝 میتونید:
|
||||
• موضوع کلی رو بگید
|
||||
• سؤال بپرسید
|
||||
• یا متن حقوقی که داری برام بنویسید
|
||||
|
||||
🎯 هرچقدر دقیقتر و واضحتر توضیح بدید، بهتر میتوانم راهنمایی کنم.
|
||||
"""
|
||||
HOURGLASS = "⏳" # استیکر ساعت شنی
|
||||
WAIT_TEXT = f"{HOURGLASS} لطفاً کمی صبر کنید...\nدر حال پردازش درخواست قبلی شما هستم."
|
||||
|
||||
|
||||
GOFT_OGO_TEXT = "💬 آماده گفتگو هستم"
|
||||
116
base_model.py
Normal file
116
base_model.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class BaleStartMessageForm(BaseModel):
|
||||
id: int
|
||||
is_bot: bool = False
|
||||
first_name: str
|
||||
last_name: Optional[str] = None
|
||||
username: Optional[str] = None
|
||||
|
||||
|
||||
class BaleStartMessageChat(BaseModel):
|
||||
id: int
|
||||
type: str
|
||||
username: Optional[str] = None
|
||||
first_name: Optional[str] = None
|
||||
|
||||
|
||||
class BaleStartMessage(BaseModel):
|
||||
message_id: int
|
||||
from_user: BaleStartMessageForm = Field(..., alias="from")
|
||||
date: int
|
||||
chat: BaleStartMessageChat
|
||||
text: str
|
||||
entities: List[dict] = []
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class BaleCallbackFrom(BaseModel):
|
||||
id: int
|
||||
is_bot: bool
|
||||
first_name: str
|
||||
username: Optional[str] = None
|
||||
|
||||
|
||||
class BaleCallbackMessage(BaseModel):
|
||||
message_id: int
|
||||
chat: BaleStartMessageChat
|
||||
text: Optional[str]
|
||||
|
||||
|
||||
class BaleCallbackQuery(BaseModel):
|
||||
id: str
|
||||
from_user: BaleCallbackFrom = Field(..., alias="from")
|
||||
message: BaleCallbackMessage
|
||||
data: str
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class BaleUpdate(BaseModel):
|
||||
update_id: int
|
||||
message: Optional[BaleStartMessage] = None
|
||||
callback_query: Optional[BaleCallbackQuery] = None
|
||||
|
||||
class Config:
|
||||
exclude_none = True
|
||||
|
||||
|
||||
class QaChatSingle(BaseModel):
|
||||
id: str
|
||||
chat_id: int
|
||||
user_query: str
|
||||
model_key: str
|
||||
model_effort: str
|
||||
|
||||
retrived_passage: str
|
||||
retrived_ref_ids: str
|
||||
retrived_duration: Optional[int] = 0
|
||||
prompt_type: str = "question"
|
||||
llm_duration: int
|
||||
full_duration: Optional[int] = 0
|
||||
time_create: Optional[int] = 0
|
||||
used_ref_ids: Optional[str] = ""
|
||||
status_text: Optional[str] = ""
|
||||
status: Optional[int] = 0
|
||||
prompt_answer: str
|
||||
other_info: dict | None
|
||||
|
||||
|
||||
class QaChatBlock(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
user_id: str
|
||||
is_premium: bool
|
||||
chat: QaChatSingle
|
||||
total_token: int
|
||||
is_end: bool
|
||||
|
||||
|
||||
class QaChat(BaseModel):
|
||||
id: str
|
||||
chat_id: int
|
||||
title: Optional[str] = ""
|
||||
user_id: str
|
||||
user_query: str
|
||||
query_type: str = "question" # llm -> greeting, other, legal_question | rag -> question
|
||||
full_duration: Optional[float] = 0
|
||||
other_info: Optional[dict] = ""
|
||||
|
||||
ss_ref_ids: Optional[List[str]] = ""
|
||||
ss_model_key: Optional[str] = ""
|
||||
ss_duration: Optional[float] = 0
|
||||
ss_answer: Optional[str] = ""
|
||||
|
||||
llm_ref_ids: Optional[List[str]] = []
|
||||
llm_model_key: Optional[str] = ""
|
||||
llm_duration: Optional[float] = 0
|
||||
llm_answer: Optional[str] = ""
|
||||
|
||||
status_text: Optional[str] = ""
|
||||
status: Optional[int] = 0
|
||||
632
main.py
Normal file
632
main.py
Normal file
|
|
@ -0,0 +1,632 @@
|
|||
#################
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time, re
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from enum import Enum
|
||||
|
||||
#################
|
||||
from base_model import BaleUpdate, QaChat
|
||||
from bale_buttons import (
|
||||
START_BUTTONS_INLINE,
|
||||
BUTTON_TEXT_TO_CALLBACK,
|
||||
BUTTON_TEXT_TO_CALLBACK_LIST,
|
||||
MORE_BUTTON, CHAT_EFFORT_BUTTONS
|
||||
)
|
||||
from bale_massages import (
|
||||
STARTMESSAGE,
|
||||
ABOUT,
|
||||
CONTACT_US,
|
||||
JOST_OJO_TEXT,
|
||||
HOURGLASS,
|
||||
WAIT_TEXT,GOFT_OGO_TEXT
|
||||
)
|
||||
from utils import load_orjson, save_orjson, ElasticHelper, split_text_chunks
|
||||
|
||||
|
||||
############## Create app
|
||||
app = FastAPI()
|
||||
##############
|
||||
|
||||
############## Global-Params
|
||||
load_dotenv()
|
||||
|
||||
TOKEN = os.getenv("BALE_TOKEN")
|
||||
ES_URL = os.getenv("ES_URL")
|
||||
ES_PASSWORD = os.getenv("ES_PASSWORD")
|
||||
ES_USER_NAME = os.getenv("ES_USER_NAME")
|
||||
ES_INDEX_NAME = os.getenv("ES_INDEX_NAME")
|
||||
|
||||
BASE_URL = f"https://tapi.bale.ai/bot{TOKEN}"
|
||||
DATA_DIR = os.path.join(".", "_data_json")
|
||||
if not os.path.exists(DATA_DIR):
|
||||
os.makedirs(DATA_DIR)
|
||||
|
||||
GLOBAL_DOMAIN = "https://bl.tavasi.ir" # f"https://YOUR_DOMAIN.com
|
||||
WEBHOOK_URL = f"{GLOBAL_DOMAIN}/webhook/{TOKEN}"
|
||||
SET_WEBHOOK_URL = f"https://tapi.bale.ai/bot{TOKEN}/setWebhook"
|
||||
|
||||
MAX_LIMIT_RAG = 100
|
||||
STEP_RAG = 10
|
||||
USER_STATE = {}
|
||||
USER_LAST_QUERY = {}
|
||||
USER_PAGINATION = {}
|
||||
USER_CHAT_EFFORT = {}
|
||||
USER_CHAT_LIMIT = {}
|
||||
USER_LAST_CHAT_QUERY = {}
|
||||
|
||||
TIME_OUT = 60
|
||||
MAX_LEN = 4000 # کمی کمتر از حد پایه امنتر است
|
||||
|
||||
ES_HELPER = ElasticHelper(
|
||||
es_url=ES_URL,
|
||||
es_pass=ES_PASSWORD,
|
||||
es_user=ES_USER_NAME,
|
||||
)
|
||||
|
||||
class UserState(str, Enum):
|
||||
MAIN = "main"
|
||||
LAW_SEARCH = "law_search"
|
||||
LAW_CHAT = "law_chat"
|
||||
BUSY = "busy"
|
||||
|
||||
|
||||
class SessionManager:
|
||||
|
||||
def __init__(self):
|
||||
self.states = {}
|
||||
self.pagination = {}
|
||||
|
||||
def init(self, chat_id: int):
|
||||
if chat_id not in self.states:
|
||||
self.states[chat_id] = UserState.MAIN
|
||||
|
||||
def get_state(self, chat_id: int):
|
||||
return self.states.get(chat_id, UserState.MAIN)
|
||||
|
||||
def set_state(self, chat_id: int, state: UserState):
|
||||
self.states[chat_id] = state
|
||||
|
||||
def set_query(self, chat_id: int, query: str):
|
||||
self.pagination[chat_id] = {"query": query, "limit": 10, "step": 10}
|
||||
|
||||
def increase_limit(self, chat_id: int):
|
||||
if chat_id in self.pagination:
|
||||
self.pagination[chat_id]["limit"] += self.pagination[chat_id]["step"]
|
||||
return self.pagination[chat_id]
|
||||
return None
|
||||
|
||||
def get_pagination(self, chat_id: int):
|
||||
return self.pagination.get(chat_id)
|
||||
|
||||
def clear(self, chat_id: int):
|
||||
self.pagination.pop(chat_id, None)
|
||||
self.states[chat_id] = UserState.MAIN
|
||||
|
||||
|
||||
class BaleBot:
|
||||
|
||||
def __init__(self, session: SessionManager):
|
||||
self.session = session
|
||||
|
||||
async def handle_update(self, update: BaleUpdate):
|
||||
|
||||
if update.message:
|
||||
return await self.handle_message(update)
|
||||
|
||||
if update.callback_query:
|
||||
return await self.handle_callback(update)
|
||||
|
||||
async def handle_message(self, update: BaleUpdate):
|
||||
chat_id = update.message.chat.id
|
||||
text = (update.message.text or "").strip()
|
||||
|
||||
self.session.init(chat_id)
|
||||
|
||||
# /start
|
||||
if text == "/start":
|
||||
self.session.clear(chat_id)
|
||||
send_message(chat_id, STARTMESSAGE, buttons=START_BUTTONS_INLINE)
|
||||
return {"ok": True}
|
||||
|
||||
if text == "تماس با ما":
|
||||
send_message(chat_id, CONTACT_US)
|
||||
return {"ok": True}
|
||||
|
||||
if text == "درباره ما":
|
||||
send_message(chat_id, ABOUT)
|
||||
return {"ok": True}
|
||||
|
||||
# ✅ اگر BUSY بود
|
||||
if self.session.get_state(chat_id) == UserState.BUSY:
|
||||
send_message(chat_id, WAIT_TEXT)
|
||||
return {"ok": True}
|
||||
|
||||
# ✅ شبیهسازی کلیک روی دکمهها با متن
|
||||
if text == "جستجو":
|
||||
self.session.set_state(chat_id, UserState.LAW_SEARCH)
|
||||
send_message(chat_id, JOST_OJO_TEXT)
|
||||
return {"ok": True}
|
||||
|
||||
if text == "گفتگو":
|
||||
self.session.set_state(chat_id, UserState.LAW_CHAT)
|
||||
send_message(chat_id, GOFT_OGO_TEXT)
|
||||
return {"ok": True}
|
||||
|
||||
# وضعیت فعلی کاربر
|
||||
state = self.session.get_state(chat_id)
|
||||
|
||||
if state == UserState.LAW_SEARCH:
|
||||
return await self.handle_search(chat_id, update, text)
|
||||
|
||||
elif state == UserState.LAW_CHAT:
|
||||
return await self.handle_chat(chat_id, text)
|
||||
|
||||
# اگر هیچکدوم نبود → پیشفرض بزن بره گفتگو
|
||||
else:
|
||||
self.session.set_state(chat_id, UserState.LAW_CHAT)
|
||||
return await self.handle_chat(chat_id, text)
|
||||
|
||||
|
||||
async def handle_callback(self, update: BaleUpdate):
|
||||
chat_id = update.callback_query.message.chat.id
|
||||
data = update.callback_query.data
|
||||
|
||||
if data == "law_search":
|
||||
self.session.set_state(chat_id, UserState.LAW_SEARCH)
|
||||
send_message(chat_id, JOST_OJO_TEXT)
|
||||
return {"ok": True}
|
||||
|
||||
if data.startswith("chat_effort_"):
|
||||
level = data.replace("chat_effort_", "")
|
||||
USER_CHAT_EFFORT[chat_id] = level
|
||||
|
||||
send_message(chat_id, f"✅ بله حتما، لطفا کمی منتظر بمانید ...")
|
||||
|
||||
# اجرای مجدد آخرین پرسش
|
||||
last_query = USER_LAST_CHAT_QUERY.get(chat_id)
|
||||
if last_query:
|
||||
return await self.handle_chat(chat_id, last_query, again=True)
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
if data == "chat_more":
|
||||
pag = self.session.increase_limit(chat_id)
|
||||
|
||||
if not pag:
|
||||
send_message(chat_id, "❌ سوالی برای ادامه نیست")
|
||||
return {"ok": True}
|
||||
|
||||
last_query = USER_LAST_CHAT_QUERY.get(chat_id)
|
||||
if last_query:
|
||||
return await self.handle_chat(chat_id, last_query)
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
|
||||
if data == "law_chat":
|
||||
self.session.set_state(chat_id, UserState.LAW_CHAT)
|
||||
send_message(chat_id, GOFT_OGO_TEXT)
|
||||
return {"ok": True}
|
||||
|
||||
if data == "more":
|
||||
return await self.handle_more(chat_id)
|
||||
|
||||
if data == "stop":
|
||||
self.session.clear(chat_id)
|
||||
send_message(chat_id, "✅ پایان نمایش")
|
||||
return {"ok": True}
|
||||
|
||||
async def handle_search(self, chat_id, update, text):
|
||||
|
||||
self.session.set_state(chat_id, UserState.BUSY)
|
||||
|
||||
self.session.set_query(chat_id, text)
|
||||
|
||||
send_message(chat_id, "⏳ در حال جستجو...")
|
||||
|
||||
try:
|
||||
pag = self.session.get_pagination(chat_id)
|
||||
|
||||
result = await result_semantic_search(
|
||||
text=pag["query"],
|
||||
limit=pag["limit"]
|
||||
)
|
||||
|
||||
# print(f'result rag {result} {type(result["ss_answer"])}')
|
||||
chunked_text_ = get_in_form(
|
||||
title=pag["query"],
|
||||
sections=result["ss_answer"],
|
||||
)
|
||||
# print(f'chunked_text_ rag {chunked_text_}')
|
||||
|
||||
send_message(
|
||||
chat_id,
|
||||
chunked_text=chunked_text_,
|
||||
buttons=MORE_BUTTON
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
send_message(chat_id, "❌ خطا در جستجو")
|
||||
print(e)
|
||||
|
||||
finally:
|
||||
self.session.set_state(chat_id, UserState.MAIN)
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
async def handle_more(self, chat_id):
|
||||
|
||||
pag = self.session.increase_limit(chat_id)
|
||||
|
||||
if not pag:
|
||||
send_message(chat_id, "❌ درخواستی یافت نشد")
|
||||
return {"ok": True}
|
||||
|
||||
self.session.set_state(chat_id, UserState.BUSY)
|
||||
|
||||
try:
|
||||
result = await result_semantic_search(
|
||||
text=pag["query"],
|
||||
limit=pag["limit"]
|
||||
)
|
||||
|
||||
chunked_text_ = get_in_form(
|
||||
title=pag["query"],
|
||||
sections=result["ss_answer"],
|
||||
)
|
||||
send_message(
|
||||
chat_id,
|
||||
chunked_text=chunked_text_,
|
||||
buttons=MORE_BUTTON
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
send_message(chat_id, "❌ خطا در افزایش نتایج")
|
||||
|
||||
finally:
|
||||
self.session.set_state(chat_id, UserState.MAIN)
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
async def handle_chat(self, chat_id: int, text: str, again=False):
|
||||
|
||||
# رفتن به حالت BUSY
|
||||
self.session.set_state(chat_id, UserState.BUSY)
|
||||
|
||||
send_message(chat_id, "⏳ در حال پردازش...")
|
||||
|
||||
try:
|
||||
# اگر اولین پیام است، آن را به عنوان query ذخیره کن
|
||||
pag = self.session.get_pagination(chat_id)
|
||||
if not pag:
|
||||
self.session.set_query(chat_id, text)
|
||||
pag = self.session.get_pagination(chat_id)
|
||||
else:
|
||||
# اگر query قبلا بوده ولی کاربر چیز جدیدی تایپ کرده
|
||||
if text.strip():
|
||||
self.session.set_query(chat_id, text)
|
||||
pag = self.session.get_pagination(chat_id)
|
||||
|
||||
limit = pag["limit"]
|
||||
|
||||
# effort را از حافظه بیرونی بگیر (یا پیش فرض)
|
||||
effort = USER_CHAT_EFFORT.get(chat_id, "low")
|
||||
|
||||
|
||||
result = await result_chat(
|
||||
text=pag["query"],
|
||||
limit=limit,
|
||||
effort=effort,
|
||||
again=again
|
||||
)
|
||||
# print(f'result {result}')
|
||||
text_ = result['llm_answer']
|
||||
query_type = result['query_type']
|
||||
if isinstance(text_, str):
|
||||
text_ = format_answer_bale(answer_text=text_)
|
||||
|
||||
# print(f'text_ {text_}')
|
||||
# ذخیره آخرین سوال برای دکمهها
|
||||
USER_LAST_CHAT_QUERY[chat_id] = pag["query"]
|
||||
|
||||
print ( '-='*10, effort, query_type)
|
||||
if query_type == 'legal_question' and effort != 'medium' :
|
||||
send_message(
|
||||
chat_id=chat_id,
|
||||
chunked_text=text_,
|
||||
buttons=CHAT_EFFORT_BUTTONS
|
||||
)
|
||||
else :
|
||||
send_message(
|
||||
chat_id=chat_id,
|
||||
chunked_text=text_
|
||||
)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print("ERROR in handle_chat:", e)
|
||||
send_message(chat_id, "❌ خطا در پردازش")
|
||||
|
||||
finally:
|
||||
# بازگشت به حالت چت
|
||||
self.session.set_state(chat_id, UserState.LAW_CHAT)
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
|
||||
WEB_LINK = "https://majles.tavasi.ir/entity/detail/view/qsection/"
|
||||
|
||||
def get_in_form(title: str, sections: list, max_len: int = 4000):
|
||||
chunks = []
|
||||
current = f"برای پرسش: {title}\n\n"
|
||||
ref_text = "«منبع»"
|
||||
|
||||
for i, data in enumerate(sections, start=1):
|
||||
sec_text = data.get("content", "")
|
||||
idx = data.get("id")
|
||||
|
||||
# ساخت ref کامل
|
||||
ref = f"[{ref_text}]({WEB_LINK}{idx})"
|
||||
# متن کامل آیتم
|
||||
block = f"{i}: {sec_text}\n{ref}\n\n"
|
||||
|
||||
# اگر با اضافه شدن این آیتم از حد مجاز عبور میکنیم → شروع چانک جدید
|
||||
if len(current) + len(block) > max_len:
|
||||
chunks.append(current.rstrip())
|
||||
current = ""
|
||||
|
||||
current += block
|
||||
|
||||
# آخرین چانک را هم اضافه کن
|
||||
if current.strip():
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
|
||||
def format_answer_bale(answer_text: str, max_len: int = 4000):
|
||||
"""
|
||||
answer_text: متن خروجی مدل که داخلش عبارتهای مثل (منبع: qs2117427) وجود دارد
|
||||
sources: مثل ['qs2117427']
|
||||
"""
|
||||
ref_text = "«منبع»"
|
||||
|
||||
def make_link(src):
|
||||
return f"[{ref_text}]({WEB_LINK}{src})"
|
||||
|
||||
# الگو برای تشخیص هر پرانتز که شامل یک یا چند کد باشد
|
||||
# مثلا: (qs123) یا (qs123, qs456, qs789)
|
||||
pattern = r"\((?:منبع[:: ]+)?([a-zA-Z0-9_, ]+)\)"
|
||||
|
||||
def replace_source(m):
|
||||
content = m.group(1)
|
||||
codes = [c.strip() for c in content.split(",")] # جداسازی چند کد
|
||||
links = [make_link(code) for code in codes]
|
||||
full_match = m.group(0)
|
||||
# if "منبع" in full_match:
|
||||
# print(f'Found explicit source(s): {links}')
|
||||
# else:
|
||||
# print(f'Found implicit source(s): {links}')
|
||||
return ", ".join(links) # جایگزینی همه کدها با لینکهایشان
|
||||
|
||||
# جایگزینی در متن
|
||||
answer_text = re.sub(pattern, replace_source, answer_text)
|
||||
|
||||
# اگر طول کمتر از max_len بود → تمام
|
||||
if len(answer_text) <= max_len:
|
||||
return [answer_text]
|
||||
|
||||
# تقسیم متن اگر طول زیاد شد
|
||||
chunks = []
|
||||
current = ""
|
||||
|
||||
sentences = answer_text.split(". ")
|
||||
for sentence in sentences:
|
||||
st = sentence.strip()
|
||||
if not st.endswith("."):
|
||||
st += "."
|
||||
|
||||
if len(current) + len(st) > max_len:
|
||||
chunks.append(current.strip())
|
||||
current = ""
|
||||
|
||||
current += st + " "
|
||||
|
||||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
|
||||
def send_message(
|
||||
chat_id: int,
|
||||
text: str = None,
|
||||
buttons=None, # inline buttons
|
||||
chunked_text=None,
|
||||
):
|
||||
url = f"https://tapi.bale.ai/bot{TOKEN}/sendMessage"
|
||||
|
||||
if chunked_text is None:
|
||||
chunks = split_text_chunks(text)
|
||||
else:
|
||||
chunks = chunked_text
|
||||
|
||||
reply_keyboard = [
|
||||
[{"text": btn} for btn in row] for row in BUTTON_TEXT_TO_CALLBACK_LIST
|
||||
]
|
||||
|
||||
total = len(chunks)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
is_last = (i == total - 1)
|
||||
|
||||
payload = {
|
||||
"chat_id": chat_id,
|
||||
"text": chunk,
|
||||
}
|
||||
|
||||
# فقط برای پیام آخر کیبورد بفرست
|
||||
if is_last:
|
||||
|
||||
reply_markup = {
|
||||
"keyboard": reply_keyboard,
|
||||
"resize_keyboard": True,
|
||||
"one_time_keyboard": False,
|
||||
}
|
||||
|
||||
if buttons:
|
||||
reply_markup["inline_keyboard"] = buttons
|
||||
|
||||
payload["reply_markup"] = reply_markup
|
||||
|
||||
r = requests.post(url, json=payload)
|
||||
|
||||
# try:
|
||||
# r.json()
|
||||
# print("Send:", r.status_code)
|
||||
# except:
|
||||
# print("Send:", r.status_code)
|
||||
|
||||
|
||||
async def save_to_es(data: QaChat):
|
||||
# print("save_to_es data rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr")
|
||||
try:
|
||||
|
||||
es_res = ES_HELPER.update_index_doc(
|
||||
is_update_state=False,
|
||||
index_name_o=ES_INDEX_NAME,
|
||||
eid=data.id,
|
||||
data=data.model_dump(),
|
||||
)
|
||||
# type_name, payload, request
|
||||
# print(f"Saved {es_res}")
|
||||
except:
|
||||
print("save_to_es - 000000000000000000000000000000000000000000000")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
def initialize_webhook():
|
||||
"""
|
||||
این تابع برای ربات اجباری است
|
||||
به سرور بله مشخص میکند که به چه server ایی درخواست ها را با این توکن داده شده ارسال کند
|
||||
"""
|
||||
params = {"url": WEBHOOK_URL}
|
||||
r = requests.get(SET_WEBHOOK_URL, params=params)
|
||||
print("Webhook set status:", r.status_code)
|
||||
print("Webhook set response:", r.json())
|
||||
|
||||
|
||||
|
||||
def chunked_simple_text(answer_text, max_len=4000):
|
||||
chunks = []
|
||||
current = ""
|
||||
|
||||
sentences = answer_text.split(". ")
|
||||
for sentence in sentences:
|
||||
st = sentence.strip()
|
||||
if not st.endswith("."):
|
||||
st += "."
|
||||
|
||||
if len(current) + len(st) > max_len:
|
||||
chunks.append(current.strip())
|
||||
current = ""
|
||||
|
||||
current += st + " "
|
||||
|
||||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
async def result_chat(text, limit=10, effort="low", again=False):
|
||||
url = "http://2.188.15.101:8009/run_chat"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=TIME_OUT) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
json={
|
||||
"query": text,
|
||||
"limit": limit,
|
||||
"effort": effort,
|
||||
"again": again,
|
||||
"mode_type":"bale"
|
||||
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("result", "❌ پاسخی دریافت نشد")
|
||||
|
||||
# print('results_chat ',type(result))
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ خطای RAG:\n{str(e)}")
|
||||
return "❌ ارتباط با سرور قطع میباشد"
|
||||
|
||||
async def result_semantic_search(text, limit):
|
||||
|
||||
url = "http://2.188.15.101:8009/run_semantic_search"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=TIME_OUT) as client:
|
||||
response = await client.post(url, json={"query": text, "limit": limit})
|
||||
response.raise_for_status()
|
||||
data = response.json() # ⚠️ اینجا response.json() فقط داده میدهد، شیء نیست
|
||||
result = data.get("result")
|
||||
# result = chunked_simple_text(result)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ خطای RAG:\n{str(e)}")
|
||||
return "ارتباط با بالا قطع می باشد❌"
|
||||
|
||||
|
||||
|
||||
|
||||
session = SessionManager()
|
||||
bot = BaleBot(session)
|
||||
|
||||
|
||||
@app.post(f"/webhook/{TOKEN}")
|
||||
async def webhook(request: Request):
|
||||
raw = await request.json()
|
||||
|
||||
try:
|
||||
update = BaleUpdate(**raw)
|
||||
except Exception as e:
|
||||
print("❌ Parse Error", e)
|
||||
return {"ok": True}
|
||||
|
||||
print(f'update {update}')
|
||||
|
||||
return await bot.handle_update(update)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("\n====== Bot Status: ======")
|
||||
initialize_webhook()
|
||||
|
||||
print("\n====== Bot Webhook Server Running ======")
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8005, reload=True)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# # 1️⃣ ابتدا وبهوک را ست میکنیم
|
||||
# print("\n====== Bot Status: ======")
|
||||
# initialize_webhook()
|
||||
|
||||
# # 2️⃣ بعد سرور FastAPI را اجرا میکنیم تا پیامها را دریافت کند
|
||||
# print("\n====== Bot Webhook Server Running ======")
|
||||
# uvicorn.run("main:app", host="0.0.0.0", port=8005, reload=True)
|
||||
|
||||
|
||||
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
elasticsearch==8.13.2
|
||||
13
samle_env
Normal file
13
samle_env
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
BALE_TOKEN =""
|
||||
BALE_ID=""
|
||||
BALE_NAME=""
|
||||
|
||||
# ELASTIC CONFIGURATION
|
||||
ES_URL="http://:"
|
||||
ES_PASSWORD=""
|
||||
ES_USER_NAME=""
|
||||
ES_INDEX_NAME=""
|
||||
ELASTIC_cluster_name=""
|
||||
ELASTIC_node_name=""
|
||||
ELASTIC_LIMIT_IMPORT=""
|
||||
|
||||
131
utils.py
Normal file
131
utils.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
from elasticsearch import Elasticsearch, helpers
|
||||
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from time import sleep
|
||||
|
||||
# ------------------------ Global-params
|
||||
|
||||
def load_orjson(path: str | Path):
|
||||
path = Path(path)
|
||||
with path.open("rb") as f: # باید باینری باز بشه برای orjson
|
||||
return orjson.loads(f.read())
|
||||
|
||||
|
||||
def save_orjson(path, data):
|
||||
with open(path, "wb") as f:
|
||||
f.write(
|
||||
orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_NON_STR_KEYS)
|
||||
)
|
||||
|
||||
|
||||
def split_text_chunks(text: str, max_len: int = 4000):
|
||||
"""Split a long text into safe chunks."""
|
||||
return [text[i : i + max_len] for i in range(0, len(text), max_len)]
|
||||
|
||||
|
||||
class ElasticHelper:
|
||||
"""
|
||||
کلاس ElasticHelper:
|
||||
نوع ورودی: بدون ورودی مستقیم در تعریف کلاس
|
||||
نوع خروجی: شیء از نوع ElasticHelper
|
||||
عملیات:
|
||||
- متغیرهای کلاسی برای شمارش و مدیریت عملیات تعریف میکند
|
||||
- مسیر پیشفرض مپینگها را تنظیم میکند
|
||||
"""
|
||||
|
||||
counter = 0
|
||||
total = 0
|
||||
id = ""
|
||||
path_mappings = os.getcwd() + "/repo/_other/"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
es_url="http://127.0.0.1:6900",
|
||||
es_pass="",
|
||||
es_user="elastic",
|
||||
path_mappings="",
|
||||
):
|
||||
"""
|
||||
نوع ورودی:
|
||||
- es_url: آدرس Elasticsearch (str) - پیشفرض "http://127.0.0.1:6900"
|
||||
- es_pass: رمز عبور (str) - پیشفرض خالی
|
||||
- es_user: نام کاربری (str) - پیشفرض "elastic"
|
||||
- path_mappings: مسیر مپینگها (str) - پیشفرض خالی
|
||||
نوع خروجی: شیء ElasticHelper
|
||||
عملیات:
|
||||
- اتصال به Elasticsearch را برقرار میکند
|
||||
- در صورت وجود رمز عبور، از احراز هویت استفاده میکند
|
||||
- تا 10 بار برای اتصال مجدد تلاش میکند (هر بار 5 ثانیه انتظار)
|
||||
- در صورت عدم موفقیت، پیام خطا نمایش داده میشود
|
||||
"""
|
||||
if path_mappings:
|
||||
self.path_mappings = path_mappings
|
||||
|
||||
if es_pass == "":
|
||||
self.es = Elasticsearch(es_url)
|
||||
else:
|
||||
self.es = Elasticsearch(
|
||||
es_url,
|
||||
basic_auth=(es_user, es_pass),
|
||||
verify_certs=False,
|
||||
)
|
||||
# print(es_url)
|
||||
# print(self.es)
|
||||
|
||||
self.success_connect = False
|
||||
for a in range(0, 10):
|
||||
try:
|
||||
if not self.es.ping():
|
||||
print("Elastic Connection Not ping, sleep 30 s : ", a)
|
||||
sleep(5)
|
||||
continue
|
||||
else:
|
||||
self.success_connect = True
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
break
|
||||
if not self.success_connect:
|
||||
print("******", "not access to elastic service")
|
||||
return
|
||||
|
||||
self.counter = 0
|
||||
self.total = 0
|
||||
self.id = ""
|
||||
|
||||
def search(self, **params):
|
||||
try:
|
||||
res = self.es.search(**params)
|
||||
except:
|
||||
return {"hits": {"hits": []}}
|
||||
return res
|
||||
|
||||
def get_document(self, index_name, id):
|
||||
res = self.es.get(index=index_name, id=id)
|
||||
return res
|
||||
|
||||
def exist_document(self, index_name, id):
|
||||
res = self.es.exists(index=index_name, id=id)
|
||||
return res
|
||||
|
||||
def update_index_doc(self, is_update_state, index_name_o, eid, data):
|
||||
"""
|
||||
نوع ورودی:
|
||||
- is_update_state: تعیین عملیات (update یا index) (bool)
|
||||
- index_name_o: نام اندیس (str)
|
||||
- eid: شناسه سند (str)
|
||||
- data: دادههای سند (dict)
|
||||
نوع خروجی: پاسخ Elasticsearch (dict)
|
||||
عملیات:
|
||||
- اگر is_update_state=True باشد: سند را آپدیت میکند
|
||||
- در غیر این صورت: سند جدید ایجاد میکند
|
||||
"""
|
||||
if is_update_state:
|
||||
resp = self.es.update(index=index_name_o, id=eid, doc=data)
|
||||
# resp = self.es.update(index=index_name_o, id=eid, body={'doc':data})
|
||||
else:
|
||||
resp = self.es.index(index=index_name_o, id=eid, document=data)
|
||||
return resp
|
||||
|
||||
Loading…
Reference in New Issue
Block a user