Compare commits
No commits in common. "a3163d6ec6cd8a4b754795fe53da0b846ff94d6b" and "f67a9194a31090bd170f268e9129bd975b066bc9" have entirely different histories.
a3163d6ec6
...
f67a9194a3
|
|
@ -1,16 +0,0 @@
|
|||
import json
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Callable, Literal, Union, Dict, Any
|
||||
|
||||
|
||||
class ChatLaw(BaseModel):
|
||||
ref_ids: List
|
||||
answer: str
|
||||
answer_type: str
|
||||
llm_reason: str = None
|
||||
metadata: Dict
|
||||
|
||||
class TitleRepeat(BaseModel):
|
||||
title: str
|
||||
id: str
|
||||
score: int
|
||||
400
core/core.py
400
core/core.py
|
|
@ -1,22 +1,32 @@
|
|||
################# modularity
|
||||
### import from external-package
|
||||
import unicodedata, requests, logging, asyncio, httpx, os, json, uuid, traceback, orjson, copy, uvicorn, time, re
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time, re
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Tuple
|
||||
from collections import defaultdict
|
||||
from typing import Union, List
|
||||
from elasticsearch import Elasticsearch, helpers
|
||||
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
import re
|
||||
import unicodedata
|
||||
import httpx
|
||||
import json
|
||||
|
||||
### import from internal-file
|
||||
from router.bale.base_model import *
|
||||
from router.bale.bale_buttons import *
|
||||
from router.bale.bale_massages import *
|
||||
from core.static import *
|
||||
from core.base_model import *
|
||||
|
||||
|
||||
############## Global-Params
|
||||
|
||||
DATA_DIR = os.path.join(".", "_data_json")
|
||||
if not os.path.exists(DATA_DIR):
|
||||
os.makedirs(DATA_DIR)
|
||||
|
|
@ -24,6 +34,36 @@ if not os.path.exists(DATA_DIR):
|
|||
PERSIAN_BOUNDARIES = set(" \n،.؟!؛:")
|
||||
|
||||
|
||||
# f"https://YOUR_DOMAIN.com
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
class DbRule(BaseModel):
|
||||
rule_id: str
|
||||
rule_content: str
|
||||
rule_type: str
|
||||
section_id: str
|
||||
section_content: str
|
||||
section_full_path :str
|
||||
qanon_id: str
|
||||
qanon_etebar: str
|
||||
qanon_title: str
|
||||
state_etebar: str
|
||||
|
||||
class InputRule(BaseModel):
|
||||
rule_id: str
|
||||
rule_content: str
|
||||
rule_type: str
|
||||
section_id: str
|
||||
section_content: str
|
||||
|
||||
class SemanticSearchP2P(BaseModel):
|
||||
in_rule: InputRule
|
||||
db_rule: DbRule
|
||||
score: float = 0
|
||||
metadata: Dict
|
||||
|
||||
|
||||
class Formatter:
|
||||
"""
|
||||
Formatting options
|
||||
|
|
@ -42,8 +82,6 @@ class Formatter:
|
|||
###########################################################
|
||||
def __init__(self, max_len: int = 4000):
|
||||
self.max_len = max_len
|
||||
|
||||
self.dash = "--------------------------------------------------------------------"
|
||||
self._number_map = {
|
||||
"0": "0️⃣",
|
||||
"1": "1️⃣",
|
||||
|
|
@ -128,72 +166,17 @@ class Formatter:
|
|||
# توابع فرمت و ساختار
|
||||
###########################################################
|
||||
|
||||
def form_search_in_law_rules(self, header: str, body: List[SemanticSearchP2P], footer: str = None) -> List[str]:
|
||||
"""
|
||||
گروهبندی بر اساس in_rule.rule_id و ساخت بلوکهای مجزا برای هر گروه.
|
||||
"""
|
||||
if footer:
|
||||
footer = '\n' + footer
|
||||
|
||||
# گروهبندی بر اساس in_rule.rule_id
|
||||
grouped = defaultdict(list)
|
||||
for item in body:
|
||||
key = item.in_rule.rule_id
|
||||
grouped[key].append(item)
|
||||
|
||||
print(f'form_search_in_law_rules -> {len(grouped)}')
|
||||
|
||||
chunks = []
|
||||
current = header
|
||||
|
||||
# برای هر گروه (یعنی یک in_rule.rule_id منحصربهفرد)
|
||||
for group_id, items in grouped.items():
|
||||
# فرض: همهی in_rule در یک گروه، rule_content یکسانی دارند — از اولی استفاده میکنیم
|
||||
in_rule_content = items[0].in_rule.rule_content
|
||||
block_lines = [in_rule_content]
|
||||
|
||||
# لیست db_ruleها با شمارهگذاری
|
||||
for i, item in enumerate(items, start=1):
|
||||
db_content = item.db_rule.rule_content
|
||||
section_id = self.__make_link_qs(src=item.db_rule.section_id)
|
||||
block_lines.append(f"{self.number(i)} گزاره: {db_content} در {section_id}")
|
||||
|
||||
block = "\n".join(block_lines) + "\n\n"
|
||||
|
||||
# بررسی سایز و تقسیم در صورت نیاز
|
||||
if len(current) + len(block) > self.max_len:
|
||||
if current.strip() != header.strip():
|
||||
chunks.append(current.rstrip())
|
||||
current = header + block # شروع چانک جدید با header دوباره (یا بدون header؟)
|
||||
else:
|
||||
current += block
|
||||
|
||||
# اضافه کردن آخرین چانک
|
||||
if current.strip() and current.strip() != header.strip():
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
# footer
|
||||
if footer and chunks:
|
||||
last = chunks[-1]
|
||||
if len(last) + len(footer) <= self.max_len:
|
||||
chunks[-1] = last + footer
|
||||
else:
|
||||
chunks.append(footer)
|
||||
|
||||
return chunks
|
||||
|
||||
def form_search_in_law(self, header: str, sections: List[SingleSearchData], footer:str=None) -> List:
|
||||
def form_search_in_law(self, title: str, sections: List) -> List:
|
||||
"""
|
||||
خروجی به صورت چانک بدون دکمه هر خروجی لینک دارد
|
||||
برش امن لینک ها و اده ها
|
||||
"""
|
||||
footer = '\n\n'+footer
|
||||
chunks = []
|
||||
current = header
|
||||
current = f"برای پرسش: {title}\n\n"
|
||||
|
||||
for i, data in enumerate(sections, start=1):
|
||||
sec_text = data.content
|
||||
idx = data.id
|
||||
sec_text = data.get("content", "")
|
||||
idx = data.get("id")
|
||||
|
||||
# ساخت ref کامل
|
||||
ref = self.__make_link_qs(src=idx)
|
||||
|
|
@ -213,13 +196,6 @@ class Formatter:
|
|||
if current.strip():
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
if footer :
|
||||
last = chunks[-1]
|
||||
if len(last) + len(footer) <= self.max_len:
|
||||
chunks[-1] = last + footer
|
||||
else:
|
||||
chunks.append(footer)
|
||||
|
||||
return chunks
|
||||
|
||||
def form_law_chat(self, answer_text: str):
|
||||
|
|
@ -271,24 +247,27 @@ class Formatter:
|
|||
|
||||
return chunks
|
||||
|
||||
async def form_title_repeated(self, _input: List[TitleRepeat]) -> List:
|
||||
if len(_input) == 0:
|
||||
async def form_title_repeated(self, data: List[Dict[str, str]]):
|
||||
if len(data) == 0:
|
||||
return ["هیچ عنوان تکراری و یا حتی مشابه یافت نشد."]
|
||||
|
||||
chunks = []
|
||||
current = "نزدیکترین عناوین مشابه عنوان قانون موارد زیر می باشد:\n\n"
|
||||
|
||||
for i, item in enumerate(_input, start=1):
|
||||
for i, item in enumerate(data, start=1):
|
||||
title = item.get("title", "")
|
||||
sec_id = item.get("id", "")
|
||||
score = item.get("score", "")
|
||||
|
||||
if not item.title or not item.id:
|
||||
if not title or not sec_id:
|
||||
continue
|
||||
|
||||
ref = self.__make_link_qq(src=item.id)
|
||||
ref = self.__make_link_qq(src=sec_id)
|
||||
|
||||
# بلوک کامل: عنوان + لینک — هر دو در یک بلوک غیرقابل تقسیم
|
||||
# block = f"{i}. {title}(وزن {score})\n{ref}\n"
|
||||
block = (
|
||||
f"{self.number(i)} {self.bold(item.title)}؛ میزان تشابه: %{item.score} ؛{ref}\n"
|
||||
f"{self.number(i)} {self.bold(title)}؛ میزان تشابه: %{score} ؛{ref}\n"
|
||||
)
|
||||
|
||||
# اگر اضافه کردن این بلوک باعث overflow شود → چانک قبلی را ذخیره و current را ریست کن
|
||||
|
|
@ -314,12 +293,11 @@ class Formatter:
|
|||
# print(f'Found implicit source(s): {links}')
|
||||
return ", ".join(links) # جایگزینی همه کدها با لینکهایشان
|
||||
|
||||
async def form_chat(self, llm_text: str, header: str, footer: str=None):
|
||||
async def form_chat(self, llm_text: str, header: str):
|
||||
"""
|
||||
answer_text: متن خروجی مدل که داخلش عبارتهای مثل (منبع: qs2117427) وجود دارد
|
||||
"""
|
||||
if footer:
|
||||
footer = '\n\n'+footer
|
||||
|
||||
# الگو برای تشخیص هر پرانتز که شامل یک یا چند کد باشد
|
||||
# مثلا: (qs123) یا (qs123, qs456, qs789)
|
||||
pattern = r"\((?:منبع[:: ]+)?([a-zA-Z0-9_, ]+)\)"
|
||||
|
|
@ -350,25 +328,17 @@ class Formatter:
|
|||
if current.strip():
|
||||
chunks.append(current.strip())
|
||||
|
||||
if footer and chunks:
|
||||
last = chunks[-1]
|
||||
if len(last) + len(footer) <= self.max_len:
|
||||
chunks[-1] = last + footer
|
||||
else:
|
||||
chunks.append(footer)
|
||||
|
||||
return chunks
|
||||
|
||||
async def form_llm_answer_chat(self, _input, header):
|
||||
if len(_input) > 0:
|
||||
return await self.form_chat(llm_text=_input, header=header)
|
||||
return await self.form_chat(llm_text=_input["text"], header=header)
|
||||
# _input['source']
|
||||
return ["هیچ ماده مرتبطی یافت نشد!"]
|
||||
|
||||
async def form_subject_unity(
|
||||
self,
|
||||
_input: Union[List[RuleRelation], str],
|
||||
header="نتایج اولیه مغایرت های احتمالی :\n",
|
||||
async def form_subject_unity(self,
|
||||
_input:Union[List[RuleRelation], str],
|
||||
header="نتایج اولیه مغایرت های احتمالی :\n"
|
||||
):
|
||||
if isinstance(_input, str):
|
||||
_input = self.form_law_chat(_input)
|
||||
|
|
@ -388,10 +358,7 @@ class Formatter:
|
|||
block_lines = [f"{self.number(idx)} در قانون {self.bold(qanon_title)}"]
|
||||
sample_items_by_section = {}
|
||||
for item in _input:
|
||||
if (
|
||||
item.db_rule.qanon_title == qanon_title
|
||||
and item.db_rule.section_id in section_ids
|
||||
):
|
||||
if item.db_rule.qanon_title == qanon_title and item.db_rule.section_id in section_ids:
|
||||
sid = item.db_rule.section_id
|
||||
if sid not in sample_items_by_section:
|
||||
sample_items_by_section[sid] = item
|
||||
|
|
@ -430,22 +397,18 @@ class Formatter:
|
|||
# Button: add *once* per qanon_title
|
||||
if qanon_title and qanon_title not in seen_qanon_titles:
|
||||
seen_qanon_titles.add(qanon_title)
|
||||
buttons.append(
|
||||
[
|
||||
buttons.append([
|
||||
{
|
||||
"text": f"بررسی مغایرت با {qanon_title}",
|
||||
"callback_data": f"subject_unities:qq:{qanon_title}",
|
||||
"callback_data": f"subject_unities:qq:{qanon_title}"
|
||||
}
|
||||
]
|
||||
)
|
||||
])
|
||||
|
||||
# Final flush
|
||||
if current.strip() and (
|
||||
len(chunks) == 0 or current.strip() != header.rstrip()
|
||||
):
|
||||
if current.strip() and (len(chunks) == 0 or current.strip() != header.rstrip()):
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
input_dict = {item.db_rule.section_id: item for item in _input}
|
||||
input_dict = {item.db_rule.section_id : item for item in _input}
|
||||
mapping_data = defaultdict(list)
|
||||
for k, v in groups.items():
|
||||
for i in v:
|
||||
|
|
@ -453,13 +416,11 @@ class Formatter:
|
|||
|
||||
return chunks, buttons, mapping_data
|
||||
|
||||
|
||||
async def form_rule_making(
|
||||
self, _input, header="گزاره های حقوقی زیر استخراج شد:\n\n", footer=None
|
||||
self, _input, header="گزاره های حقوقی زیر استخراج شد:\n\n"
|
||||
):
|
||||
if len(_input) > 0:
|
||||
if footer:
|
||||
footer = '\n\n'+footer
|
||||
|
||||
chunks = []
|
||||
current = header
|
||||
|
||||
|
|
@ -473,15 +434,8 @@ class Formatter:
|
|||
if current.strip():
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
if footer and chunks:
|
||||
last = chunks[-1]
|
||||
if len(last) + len(footer) <= self.max_len:
|
||||
chunks[-1] = last + footer
|
||||
else:
|
||||
chunks.append(footer)
|
||||
return chunks
|
||||
|
||||
|
||||
return ["هیچ گزاره حقوقی یافت و استخراج نشد!"]
|
||||
|
||||
def get_asl(self, _in: str):
|
||||
|
|
@ -535,30 +489,7 @@ class Formatter:
|
|||
|
||||
return chunks
|
||||
|
||||
async def form_constitution_low(self, input: Dict, _id, _header='نتایچ بررسی مغایرت با اصول مهم قانون اساسی\n\n'):
|
||||
""" """
|
||||
|
||||
chunks = []
|
||||
current = ""
|
||||
|
||||
for k, v in input.items():
|
||||
block = self.get_in_form_single(asl=k, _in_dict=v, _id=_id)
|
||||
|
||||
# اگر این بلاک جا نشد → چانک جدید
|
||||
if len(current) + len(block) > self.max_len:
|
||||
chunks.append(current.rstrip())
|
||||
current = _header + block
|
||||
else:
|
||||
current += block
|
||||
|
||||
|
||||
# آخرین چانک
|
||||
if current.strip():
|
||||
chunks.append(current.rstrip())
|
||||
|
||||
return ''.join(chunks)
|
||||
|
||||
async def form_ss_rules(self, _input: List[Dict], header):
|
||||
async def form_ss_rules(self, _input:List[Dict], header):
|
||||
|
||||
if len(_input) > 1:
|
||||
chunks = []
|
||||
|
|
@ -568,8 +499,8 @@ class Formatter:
|
|||
# -------- 1. group by qanon_id / qanon_title
|
||||
groups = defaultdict(set)
|
||||
for item in _input:
|
||||
key = item["db_rule"]["qanon_title"]
|
||||
groups[key].add(item["db_rule"]["section_id"])
|
||||
key = item['db_rule']['qanon_title']
|
||||
groups[key].add(item['db_rule']['section_id'])
|
||||
|
||||
for qanon_title, ids in groups.items():
|
||||
_i += 1
|
||||
|
|
@ -591,8 +522,10 @@ class Formatter:
|
|||
|
||||
return ["هیچ ماده مرتبطی یافت نشد!"]
|
||||
|
||||
async def form_conflict_detection(
|
||||
self, _input: RuleRelation, header="نتیجه تشخیص مغایرت :\n"
|
||||
|
||||
|
||||
async def form_conflict_detection(self,
|
||||
_input:RuleRelation, header="نتیجه تشخیص مغایرت :\n"
|
||||
):
|
||||
current = header
|
||||
|
||||
|
|
@ -603,8 +536,8 @@ class Formatter:
|
|||
|
||||
return current
|
||||
|
||||
async def form_conflict_type_detection(
|
||||
self, _input: RuleRelation, header="نتیجه تشخیص نوع مغایرت :\n"
|
||||
async def form_conflict_type_detection(self,
|
||||
_input:RuleRelation, header="نتیجه تشخیص نوع مغایرت :\n"
|
||||
):
|
||||
current = header
|
||||
|
||||
|
|
@ -615,8 +548,8 @@ class Formatter:
|
|||
|
||||
return current
|
||||
|
||||
async def form_relation_identification(
|
||||
self, _input: RuleRelation, header="نتیجه رابطه مغایرت :\n"
|
||||
async def form_relation_identification(self,
|
||||
_input:RuleRelation, header="نتیجه رابطه مغایرت :\n"
|
||||
):
|
||||
current = header
|
||||
|
||||
|
|
@ -627,8 +560,8 @@ class Formatter:
|
|||
|
||||
return current
|
||||
|
||||
async def form_evaluation(
|
||||
self, _input: Evaluation, header="نتیجه نهایی بررسی مغایرت :\n"
|
||||
async def form_evaluation(self,
|
||||
_input:Evaluation, header="نتیجه نهایی بررسی مغایرت :\n"
|
||||
):
|
||||
current = header
|
||||
|
||||
|
|
@ -637,18 +570,15 @@ class Formatter:
|
|||
current += f"1. آیا ارزیابی وحدت موضوع صحیح است؟ {_input.is_subject_unity_assessment_correct}\n"
|
||||
current += f"2. آیا ارزیابی تشخیص نوع درست است ؟ {_input.is_conflict_detection_correct}\n"
|
||||
current += f"3. آیا ارزیابی نوع درست است ؟ {_input.is_conflict_type_detection_correct}\n"
|
||||
current += (
|
||||
f"4. رابطه مغایرت چطور؟ {_input.is_relation_type_detection_correct}\n"
|
||||
)
|
||||
current += f"4. رابطه مغایرت چطور؟ {_input.is_relation_type_detection_correct}\n"
|
||||
current += f"5. نوع رابطه ؟ {_input.valid_relation_type}\n"
|
||||
current += f"6.توضیح بیشتر: {_input.comments}\n"
|
||||
|
||||
return current
|
||||
|
||||
async def from_law_writing_policy(
|
||||
self, _input_dict: Dict, header: str
|
||||
) -> List[str]:
|
||||
f_list = [self.bold(header)]
|
||||
async def from_law_writing_policy(self, _input_dict: Dict, header:str) -> List[str]:
|
||||
f_list = [
|
||||
self.bold(header)]
|
||||
_str = {
|
||||
"analyze": "گزارش تحلیلی بندبهبند",
|
||||
"strength": "بیان نقاط قوت",
|
||||
|
|
@ -662,57 +592,63 @@ class Formatter:
|
|||
f_list += [_title]
|
||||
# f_list += ['\n']
|
||||
f_list += [v]
|
||||
f_list += ["\n"]
|
||||
f_list += ['\n']
|
||||
|
||||
return ["\n".join(f_list)]
|
||||
|
||||
|
||||
|
||||
"""
|
||||
deleteMessage
|
||||
message_id
|
||||
chat_id
|
||||
"""
|
||||
|
||||
class RequestManager:
|
||||
def __init__(
|
||||
self,
|
||||
host_url: str,
|
||||
def __init__(self,
|
||||
host_url:str,
|
||||
url_time_out=1200,
|
||||
step_time_out=600,
|
||||
):
|
||||
if host_url.endswith('/'):
|
||||
self.host_url = host_url
|
||||
else:
|
||||
self.host_url = host_url + '/'
|
||||
|
||||
self.url_time_out = url_time_out
|
||||
self.step_time_out = step_time_out
|
||||
TASK_URL = {
|
||||
TASK_URL ={
|
||||
# stream
|
||||
"": "/stream/chat_logical",
|
||||
"":"/stream/chat_logical",
|
||||
|
||||
# none-stream
|
||||
"": "/conflict/general_policy/qs_unity",
|
||||
"": "/conflict/all_qanon/qs_unity",
|
||||
"": "/conflict/general_policy/unity_eval",
|
||||
"": "/conflict/law_writing_policy",
|
||||
"": "/conflict/constitution",
|
||||
"": "/rule_making",
|
||||
"": "/chat",
|
||||
"": "/talk",
|
||||
"": "/semantic_search/chat_logical",
|
||||
"": "/semantic_search/run_semantic_search",
|
||||
"": "/semantic_search/run_chat",
|
||||
"":"/conflict/general_policy/qs_unity",
|
||||
"":"/conflict/all_qanon/qs_unity",
|
||||
"":"/conflict/general_policy/unity_eval",
|
||||
"":"/conflict/law_writing_policy",
|
||||
"":"/conflict/constitution",
|
||||
"":"/rule_making",
|
||||
"":"/chat",
|
||||
"":"/talk",
|
||||
"":"/semantic_search/chat_logical",
|
||||
"":"/semantic_search/run_chat",
|
||||
"":"/semantic_search/run_semantic_search",
|
||||
}
|
||||
|
||||
|
||||
async def get_result(
|
||||
self,
|
||||
payload,
|
||||
url: str,
|
||||
section_id: str = "qs_10001",
|
||||
mode_type="bale",
|
||||
url :str,
|
||||
section_id:str='qs_10001',
|
||||
mode_type='bale',
|
||||
):
|
||||
if url.startswith('/'):
|
||||
url = url[1:]
|
||||
|
||||
_url = self.host_url + url
|
||||
print(f"get_result _url {_url}")
|
||||
_url = self.host_url+url
|
||||
print(
|
||||
f'get_result _url {_url}'
|
||||
)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.url_time_out) as client:
|
||||
response = await client.post(url=_url, json=payload)
|
||||
response = await client.post(
|
||||
url=_url, json=payload
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
result = data.get("result", "❌ پاسخی دریافت نشد")
|
||||
|
|
@ -723,10 +659,11 @@ class RequestManager:
|
|||
print(f"❌ خطای RAG:\n{str(e)}")
|
||||
return "❌ ارتباط با سرور قطع میباشد"
|
||||
|
||||
|
||||
async def stream_result(
|
||||
self,
|
||||
url: str,
|
||||
payload: Dict,
|
||||
url :str,
|
||||
payload : Dict,
|
||||
):
|
||||
"""
|
||||
هر مرحله شامل:
|
||||
|
|
@ -735,15 +672,16 @@ class RequestManager:
|
|||
data : "داده در این مرحله"
|
||||
}
|
||||
"""
|
||||
if url.startswith('/'):
|
||||
url = url[1:]
|
||||
|
||||
timeout = httpx.Timeout(self.step_time_out, read=self.url_time_out)
|
||||
_url = self.host_url + url
|
||||
_url = self.host_url+url
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
# ارسال درخواست به صورت Stream
|
||||
async with client.stream("POST", url=_url, json=payload) as r:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
url=_url,
|
||||
json=payload
|
||||
) as r:
|
||||
# بررسی وضعیت پاسخ
|
||||
if r.status_code != 200:
|
||||
print(f"Error: {r.status_code}")
|
||||
|
|
@ -895,6 +833,38 @@ def make_link_qs(src, ref_text=REF_TEXT):
|
|||
return f"[{ref_text}]({QS_WEB_LINK}{src})"
|
||||
|
||||
|
||||
def encode_uc(update: BaleUpdate) -> str:
|
||||
if update.message:
|
||||
user = update.message.from_user
|
||||
chat = update.message.chat
|
||||
|
||||
elif update.callback_query:
|
||||
user = update.callback_query.from_user
|
||||
chat = update.callback_query.message.chat
|
||||
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
username = user.username or user.id
|
||||
chat_id = chat.id # ✅ فقط chat_id
|
||||
|
||||
return f"{username}:{chat_id}"
|
||||
|
||||
|
||||
def decode_uc(uc_id: str) -> dict:
|
||||
"""
|
||||
ورودی: 'username:chat_id' یا 'user_id:chat_id'
|
||||
خروجی: {'username': ..., 'chat_id': ...}
|
||||
"""
|
||||
|
||||
try:
|
||||
username, chat_id = uc_id.split(":", 1)
|
||||
|
||||
return (username, int(chat_id) if chat_id.isdigit() else chat_id)
|
||||
|
||||
except ValueError:
|
||||
raise ValueError(f"decode_uc")
|
||||
|
||||
|
||||
async def get_from_gpl(in_dict: Dict) -> List[str]:
|
||||
f_list = []
|
||||
|
|
@ -927,17 +897,15 @@ def cer(ref: str, hyp: str) -> float:
|
|||
dp[j] = min(
|
||||
dp[j] + 1, # deletion
|
||||
dp[j - 1] + 1, # insertion
|
||||
prev + (ref[i - 1] != hyp[j - 1]), # substitution
|
||||
prev + (ref[i - 1] != hyp[j - 1]) # substitution
|
||||
)
|
||||
prev = cur
|
||||
|
||||
return (dp[n] / m) * 100
|
||||
|
||||
|
||||
import nltk
|
||||
from nltk.metrics import edit_distance
|
||||
|
||||
|
||||
def cer_ntlk(exist: str, new: str) -> float:
|
||||
"""
|
||||
این روش دقیقتر است، چون تعداد کاراکترهای اضافی یا کم در متن طولانی،
|
||||
|
|
@ -946,7 +914,6 @@ def cer_ntlk(exist: str, new: str) -> float:
|
|||
# edit distance روی کلمات
|
||||
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
||||
|
||||
|
||||
def wer_nltk(new: str, exist: str) -> float:
|
||||
new = new.split()
|
||||
exist = exist.split()
|
||||
|
|
@ -954,6 +921,43 @@ def wer_nltk(new: str, exist: str) -> float:
|
|||
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
||||
|
||||
|
||||
async def title_repeated(
|
||||
qanontitle, search_range: int = 10, url=f"http://localhost:8010/v1/indices/qaqanon/search"
|
||||
):
|
||||
"""
|
||||
- باید با سرویس از حاج آقا گرفته شود
|
||||
Fetch similar titles from the custom Elasticsearch-like REST API.
|
||||
"""
|
||||
# "/majles/similar/title/qaqanon/0/10/none"
|
||||
# headers["Authorization"]="GuestAccess"
|
||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
||||
|
||||
body = {
|
||||
"query": qanontitle, #
|
||||
"from_": 0,
|
||||
"size": search_range+10,
|
||||
"track_total_hits": True,
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=body, timeout=20)
|
||||
|
||||
if response.status_code != 200:
|
||||
print("ERROR:", response.status_code)
|
||||
print(response.text)
|
||||
else:
|
||||
data = response.json()
|
||||
ids = []
|
||||
# print('---------------------------------------> max_score', max_score)
|
||||
# print(data["hits"])
|
||||
|
||||
for i in data["hits"]["hits"]:
|
||||
title = i["_source"]["title"]
|
||||
ids.append(
|
||||
{"title": title, "id": i["_source"]["id"], "score" :wer_nltk(exist=title, new=qanontitle)}
|
||||
# {"title": title, "id": i["_source"]["id"], "score" :cer_ntlk(exist=title, new=qanontitle)}
|
||||
)
|
||||
|
||||
return sorted(ids, key=lambda x: x['score'], reverse=True)[:search_range]
|
||||
|
||||
|
||||
def normalize_persian(text: str) -> str:
|
||||
|
|
@ -1435,3 +1439,5 @@ def chunked_simple_text(answer_text):
|
|||
chunks.append(current.strip())
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,217 +0,0 @@
|
|||
from core.core import RequestManager, wer_nltk
|
||||
from core.base_model import *
|
||||
from router.bale.base_model import *
|
||||
import requests
|
||||
|
||||
"""
|
||||
روند هر مرحله در اینجا مشخص می شود و داده و خروجی و پردازش در اینجا انجام می شود
|
||||
"""
|
||||
__all__ = ["Operation"]
|
||||
|
||||
|
||||
class Operation:
|
||||
def __init__(self, request_manager: RequestManager):
|
||||
self.request_manager = request_manager
|
||||
|
||||
async def search_in_law(
|
||||
self, query: str, limit: int, rerank_model: str, embed_model: str
|
||||
) -> BMNewSemanticSearchOutput:
|
||||
"""
|
||||
فقط منطق – بدون هیچ وابستگی به Bale یا User
|
||||
"""
|
||||
|
||||
result = await self.request_manager.get_result(
|
||||
payload={
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
"rerank_model": rerank_model,
|
||||
"embed_model": embed_model,
|
||||
},
|
||||
url="new/semantic_search",
|
||||
)
|
||||
|
||||
return BMNewSemanticSearchOutput.parse_obj(result)
|
||||
|
||||
async def stream_search_in_law(
|
||||
self, query: str, limit: int, rerank_model: str, embed_model: str
|
||||
):
|
||||
"""
|
||||
فقط منطق – بدون هیچ وابستگی به Bale یا User
|
||||
"""
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
"rerank_model": rerank_model,
|
||||
"embed_model": embed_model,
|
||||
},
|
||||
url="new/semantic_search",
|
||||
):
|
||||
yield data
|
||||
|
||||
async def stream_rule_making(self, query, llm_name, effort):
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"query": query,
|
||||
"query_id": "qs12357498",
|
||||
"llm_effort": effort,
|
||||
"llm_model_name": llm_name,
|
||||
# "llm_api_url"
|
||||
# "llm_api_key"
|
||||
},
|
||||
url="/single/rule_making",
|
||||
):
|
||||
|
||||
yield data
|
||||
|
||||
async def stream_chat_in_law(self, query, limit, effort, mode_type="bale"):
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
"limit": limit,
|
||||
"mode_type": mode_type,
|
||||
},
|
||||
url="/single/semantic_search/run_chat",
|
||||
):
|
||||
|
||||
yield data
|
||||
|
||||
async def stream_rule_semantic_search(
|
||||
self,
|
||||
queries: List,
|
||||
filter_qanon_ids: List,
|
||||
limit_rerank: int,
|
||||
embed_model="jinaai/jina-colbert-v2",
|
||||
rerank_model="BAAI/bge-reranker-v2-m3",
|
||||
metadata={},
|
||||
limit_cos=100,
|
||||
):
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"queries": queries,
|
||||
"filter_qanon_ids": filter_qanon_ids,
|
||||
"embed_model": embed_model,
|
||||
"rerank_model": rerank_model,
|
||||
"metadata": metadata,
|
||||
"limit_rerank": limit_rerank,
|
||||
"limit_cos": limit_cos,
|
||||
},
|
||||
url="/single/rule_semantic_search",
|
||||
):
|
||||
|
||||
yield data
|
||||
|
||||
async def chat_in_law(self, query, effort, limit, mode_type="bale") -> ChatLaw:
|
||||
result = await self.request_manager.get_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
"limit": limit,
|
||||
"mode_type": mode_type,
|
||||
},
|
||||
url="/single/semantic_search/run_chat",
|
||||
)
|
||||
print(f"chat_in_law {result}")
|
||||
return ChatLaw.parse_obj(result)
|
||||
|
||||
async def title_repeated(
|
||||
self,
|
||||
qanontitle,
|
||||
search_range: int = 10,
|
||||
# url=f"http://localhost:8010/v1/indices/qaqanon/search",
|
||||
url=f"http://localhost/api/elp/v1/indices/qaqanon/search",
|
||||
) -> List[TitleRepeat]:
|
||||
"""
|
||||
- باید با سرویس از حاج آقا گرفته شود
|
||||
Fetch similar titles from the custom Elasticsearch-like REST API.
|
||||
"""
|
||||
# "/majles/similar/title/qaqanon/0/10/none"
|
||||
# headers["Authorization"]="GuestAccess"
|
||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
||||
|
||||
body = {
|
||||
"query": qanontitle, #
|
||||
"from_": 0,
|
||||
"size": search_range + 10,
|
||||
"track_total_hits": True,
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, json=body, timeout=20)
|
||||
print(f"title_repeated -> {response}")
|
||||
if response.status_code != 200:
|
||||
print("ERROR:", response.status_code)
|
||||
print(response.text)
|
||||
else:
|
||||
data = response.json()
|
||||
ids = []
|
||||
# print('---------------------------------------> max_score', max_score)
|
||||
# print(data["hits"])
|
||||
|
||||
for i in data["hits"]["hits"]:
|
||||
title = i["_source"]["title"]
|
||||
ids.append(
|
||||
TitleRepeat(
|
||||
title=title,
|
||||
id=i["_source"]["id"],
|
||||
score=wer_nltk(exist=title, new=qanontitle),
|
||||
)
|
||||
)
|
||||
|
||||
return sorted(ids, key=lambda x: x.score, reverse=True)[:search_range]
|
||||
|
||||
async def talk(self, query) -> str:
|
||||
result = await self.request_manager.get_result(
|
||||
payload={
|
||||
"user_input": query,
|
||||
},
|
||||
url="/talk",
|
||||
)
|
||||
return result
|
||||
|
||||
async def conflict_qanon_asasi_low(self, query, effort, limit, mode_type="bale"):
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
"limit": limit,
|
||||
"mode_type": mode_type,
|
||||
},
|
||||
url="/new/conflict/constitution_low",
|
||||
):
|
||||
yield data
|
||||
|
||||
async def conflict_qanon_asasi_steps(self, query, effort, limit, mode_type="bale"):
|
||||
_result = await self.request_manager.get_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
"limit": limit,
|
||||
"mode_type": mode_type,
|
||||
},
|
||||
url="/new/conflict/constitution",
|
||||
)
|
||||
|
||||
return _result
|
||||
|
||||
async def stream_logical_chat_in_law(self, query, effort, metadata, limit):
|
||||
async for data in self.request_manager.stream_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
"metadata": metadata,
|
||||
"limit":limit
|
||||
},
|
||||
url="/new/stream/chat_logical",
|
||||
):
|
||||
yield data
|
||||
|
||||
async def conflict_law_writing_policy(self, query, effort):
|
||||
_result = await self.request_manager.get_result(
|
||||
payload={
|
||||
"section_content": query,
|
||||
"effort": effort,
|
||||
},
|
||||
url="/conflict/law_writing_policy",
|
||||
)
|
||||
return _result
|
||||
7
main.py
7
main.py
|
|
@ -6,7 +6,6 @@ from fastapi.middleware.cors import CORSMiddleware
|
|||
from router.bale.bale import router as bale_router
|
||||
from router.bale.bale import initialize_webhook
|
||||
from router.bale.bale_handle import BaleBot, UserManager
|
||||
from core.operation import Operation
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
|
|
@ -68,12 +67,8 @@ async def lifespan(app: FastAPI):
|
|||
app.state.request_manager = RequestManager(
|
||||
host_url=BACK_END_URL,
|
||||
)
|
||||
app.state.operation = Operation(
|
||||
request_manager=app.state.request_manager,
|
||||
)
|
||||
# بله بات
|
||||
bale_bot = BaleBot(
|
||||
operation=app.state.operation,
|
||||
user_manager=app.state.user_manager,
|
||||
es_helper=app.state.es_helper,
|
||||
es_index_name=app.state.es_index_name,
|
||||
|
|
@ -83,7 +78,7 @@ async def lifespan(app: FastAPI):
|
|||
request_manager = app.state.request_manager,
|
||||
)
|
||||
app.state.bale_bot = bale_bot
|
||||
print("✅✅✅ Bale-Bot Initialized ✅✅✅")
|
||||
print("=== Bale-Bot Initialized ===")
|
||||
|
||||
yield # برنامه در این حالت اجرا میشود
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
|
||||
Operation (logic layer)
|
||||
│
|
||||
├── handle_search_in_law(...)
|
||||
│ ├── دریافت ورودی خام
|
||||
│ ├── فراخوانی request_manager
|
||||
│ ├── ساخت خروجی منطقی (data)
|
||||
│ └── بدون وابستگی به بله
|
||||
│
|
||||
└── return OperationResult
|
||||
|
||||
BaleBot (delivery / interface layer)
|
||||
│
|
||||
├── دریافت user
|
||||
├── صدا زدن Operation
|
||||
├── تبدیل خروجی به متن + دکمه
|
||||
└── ارسال به بله
|
||||
|
||||
|
||||
# Logic & Flow
|
||||
Operation → منطق، پردازش، گرفتن دیتا، تصمیمگیری
|
||||
BaleBot → ورودی/خروجی، ارتباط با بله، فرمت پیام، دکمهها، مدیریت state کاربر
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
elasticsearch==8.13.2
|
||||
nltk
|
||||
pydantic
|
||||
fast-api
|
||||
|
|
@ -9,11 +9,6 @@ from dependencies import _get_bale_token, _get_bale_bot
|
|||
router = APIRouter(tags=["bale-bot"])
|
||||
##############
|
||||
|
||||
"""
|
||||
deleteMessage
|
||||
message_id
|
||||
chat_id
|
||||
"""
|
||||
|
||||
# @app.post(f"/webhook/{TOKEN}")
|
||||
@router.post("/webhook/{token}", description="ربات قانون یار")
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,92 +0,0 @@
|
|||
|
||||
from router.bale.base_model import *
|
||||
from core.core import *
|
||||
|
||||
def encode_uc(update: BaleUpdate) -> str:
|
||||
if update.message:
|
||||
user = update.message.from_user
|
||||
chat = update.message.chat
|
||||
|
||||
elif update.callback_query:
|
||||
user = update.callback_query.from_user
|
||||
chat = update.callback_query.message.chat
|
||||
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
username = user.username or user.id
|
||||
chat_id = chat.id # ✅ فقط chat_id
|
||||
|
||||
return f"{username}:{chat_id}"
|
||||
|
||||
|
||||
def decode_uc(uc_id: str) -> dict:
|
||||
"""
|
||||
ورودی: 'username:chat_id' یا 'user_id:chat_id'
|
||||
خروجی: {'username': ..., 'chat_id': ...}
|
||||
"""
|
||||
|
||||
try:
|
||||
username, chat_id = uc_id.split(":", 1)
|
||||
|
||||
return (username, int(chat_id) if chat_id.isdigit() else chat_id)
|
||||
|
||||
except ValueError:
|
||||
raise ValueError(f"decode_uc")
|
||||
|
||||
|
||||
def extract_user_info(update: BaleUpdate) -> Dict:
|
||||
uc_id = encode_uc(update)
|
||||
if update.message:
|
||||
u = update.message.from_user
|
||||
return {
|
||||
"uc_id": str(uc_id),
|
||||
"chat_id": update.message.chat.id,
|
||||
"user_id": u.id,
|
||||
"username": u.username,
|
||||
"first_name": u.first_name,
|
||||
"last_name": u.last_name or "",
|
||||
"is_bot": u.is_bot,
|
||||
"update": update,
|
||||
}
|
||||
|
||||
if update.callback_query:
|
||||
u = update.callback_query.from_user
|
||||
return {
|
||||
"uc_id": str(uc_id),
|
||||
"chat_id": update.callback_query.message.chat.id,
|
||||
"user_id": u.id,
|
||||
"username": u.username,
|
||||
"first_name": u.first_name,
|
||||
"last_name": "",
|
||||
"is_bot": u.is_bot,
|
||||
"update": update,
|
||||
}
|
||||
|
||||
raise ValueError("No user info in update")
|
||||
|
||||
|
||||
class UserManager:
|
||||
def __init__(self):
|
||||
self.users: Dict[str, BaleUser] = {}
|
||||
self.list_vip_username = load_orjson(
|
||||
"/home/sabr/back_new/mj_bale_chat_test/mj_bale_chat/vip_username.json"
|
||||
)
|
||||
self.temporary_data = load_orjson(
|
||||
"/home/sabr/back_new/mj_bale_chat_test/mj_bale_chat/temp.json"
|
||||
)
|
||||
|
||||
def get_or_create(self, update: BaleUpdate) -> BaleUser:
|
||||
user_data = extract_user_info(update)
|
||||
uc_id = user_data["uc_id"]
|
||||
|
||||
if user_data["username"] in self.list_vip_username:
|
||||
user_data["is_vip"] = True
|
||||
|
||||
if uc_id not in self.users:
|
||||
self.users[uc_id] = BaleUser(
|
||||
**user_data,
|
||||
)
|
||||
user = self.users[uc_id]
|
||||
user.update = update
|
||||
return user
|
||||
|
|
@ -6,45 +6,6 @@ from typing import Optional, Callable, List, Any
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class SingleSearchData(BaseModel):
|
||||
score:float
|
||||
id:str
|
||||
content:str
|
||||
|
||||
class BMNewSemanticSearchOutput(BaseModel):
|
||||
query: str
|
||||
result : List[SingleSearchData]
|
||||
metadata : Dict
|
||||
embed_model_time : float
|
||||
cosine_similarity_time : float
|
||||
rerank_time : float
|
||||
|
||||
|
||||
class DbRule(BaseModel):
|
||||
rule_id: str
|
||||
rule_content: str
|
||||
rule_type: str
|
||||
section_id: str
|
||||
section_content: str
|
||||
section_full_path :str
|
||||
qanon_id: str
|
||||
qanon_etebar: str
|
||||
qanon_title: str
|
||||
state_etebar: str
|
||||
|
||||
class InputRule(BaseModel):
|
||||
rule_id: str
|
||||
rule_content: str
|
||||
rule_type: str
|
||||
section_id: str
|
||||
section_content: str
|
||||
|
||||
class SemanticSearchP2P(BaseModel):
|
||||
in_rule: InputRule
|
||||
db_rule: DbRule
|
||||
score: float = 0
|
||||
metadata: Dict
|
||||
|
||||
class BaleStartMessageForm(BaseModel):
|
||||
id: int
|
||||
is_bot: bool = False
|
||||
|
|
@ -254,14 +215,13 @@ class StateDetail(BaseModel):
|
|||
class BaleUser(BaseModel):
|
||||
uc_id: str
|
||||
chat_id: int
|
||||
user_id: int
|
||||
user_id: str
|
||||
update: BaleUpdate
|
||||
username: str = None
|
||||
username: str
|
||||
is_bot: bool = False
|
||||
is_vip: bool = False
|
||||
first_name: str = ""
|
||||
last_name: str = ""
|
||||
message_limit:int = 0
|
||||
|
||||
rule_relation: RuleRelation | None = None
|
||||
subject_unities:Dict = {}
|
||||
|
||||
|
|
@ -274,10 +234,9 @@ class BaleUser(BaseModel):
|
|||
is_processing_lock : bool = False
|
||||
is_call_back_query : bool = False
|
||||
state_detail : StateDetail = None
|
||||
active_message_id : int = 0
|
||||
last_message_id : int = 0
|
||||
|
||||
input_query: str = "" # ورودی کاربر
|
||||
last_input_query: str = "" # ورودی کاربر
|
||||
call_back_query: str = "" # ورودی کاربر
|
||||
_query_type: str = "" # ورودی کاربر
|
||||
sub_state: str = "" # برای روندی ها
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from router.bale.base_model import StateDetail
|
|||
|
||||
|
||||
|
||||
BUSY_TEXT = "⏳ درخواست قبلی شما در حال پردازش هست، لطفا تا اتمام آن منتظر بمانید ⏳"
|
||||
BUSY_TEXT = ("""⏳ درخواست قبلی شما در حال پردازش هست، لطفا تا اتمام آن منتظر بمانید ⏳""",)
|
||||
|
||||
|
||||
class StateRegistry:
|
||||
|
|
@ -26,13 +26,6 @@ STATE = [
|
|||
message="""متن حقوقی برای جستجو در قوانین را وارد نمایید""",
|
||||
handler="handle_search_in_law",
|
||||
),
|
||||
StateDetail(
|
||||
state="search_in_law_rules",
|
||||
button_text="جستجو در اجزاء 🔎",
|
||||
end_buttons=[],
|
||||
message="""متن حقوقی برای جستجو در اجزاء قانونی را وارد نمایید""",
|
||||
handler="handle_search_in_law_rules",
|
||||
),
|
||||
StateDetail(
|
||||
state="chat_in_law",
|
||||
button_text="گفتگو طبق قوانین کشور",
|
||||
|
|
@ -101,11 +94,6 @@ STATE = [
|
|||
button_text="تماس با ما ☎️",
|
||||
message="""لطفا برای ارتباط با ما از طریق مرکز فناوری مجلس شورای اسلامی ایران اقدام فرمایید""",
|
||||
),
|
||||
StateDetail(
|
||||
state="not_yet",
|
||||
button_text="در دست توسعه",
|
||||
message="""این قسمت در دست توسعه قرار دارد.""",
|
||||
),
|
||||
StateDetail(
|
||||
state="about_us",
|
||||
button_text="درباره ما ⚡",
|
||||
|
|
@ -146,6 +134,7 @@ def build_buttons_form(button_form):
|
|||
main_button_form = [
|
||||
["chat_in_law"],
|
||||
["search_in_law"],
|
||||
["logical_chat_in_law"],
|
||||
["rule_making"],
|
||||
["qanon_title_repeat"],
|
||||
["conflict_law_writing_policy"],
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
[
|
||||
"init_mahdi", "hsafabale", "mmpouya"
|
||||
]
|
||||
Loading…
Reference in New Issue
Block a user