Compare commits
No commits in common. "a3163d6ec6cd8a4b754795fe53da0b846ff94d6b" and "f67a9194a31090bd170f268e9129bd975b066bc9" have entirely different histories.
a3163d6ec6
...
f67a9194a3
|
|
@ -1,16 +0,0 @@
|
||||||
import json
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing import List, Optional, Callable, Literal, Union, Dict, Any
|
|
||||||
|
|
||||||
|
|
||||||
class ChatLaw(BaseModel):
|
|
||||||
ref_ids: List
|
|
||||||
answer: str
|
|
||||||
answer_type: str
|
|
||||||
llm_reason: str = None
|
|
||||||
metadata: Dict
|
|
||||||
|
|
||||||
class TitleRepeat(BaseModel):
|
|
||||||
title: str
|
|
||||||
id: str
|
|
||||||
score: int
|
|
||||||
358
core/core.py
358
core/core.py
|
|
@ -1,22 +1,32 @@
|
||||||
################# modularity
|
################# modularity
|
||||||
### import from external-package
|
### import from external-package
|
||||||
import unicodedata, requests, logging, asyncio, httpx, os, json, uuid, traceback, orjson, copy, uvicorn, time, re
|
from fastapi import FastAPI, Request, HTTPException
|
||||||
|
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time, re
|
||||||
|
from dotenv import load_dotenv
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from collections import defaultdict
|
from enum import Enum
|
||||||
from typing import Dict, List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
from collections import defaultdict
|
||||||
|
from typing import Union, List
|
||||||
from elasticsearch import Elasticsearch, helpers
|
from elasticsearch import Elasticsearch, helpers
|
||||||
|
import requests, logging, asyncio, httpx, os, uuid, traceback, orjson, copy, uvicorn, time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from time import sleep
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
|
import httpx
|
||||||
|
import json
|
||||||
|
|
||||||
### import from internal-file
|
### import from internal-file
|
||||||
from router.bale.base_model import *
|
from router.bale.base_model import *
|
||||||
from router.bale.bale_buttons import *
|
from router.bale.bale_buttons import *
|
||||||
from router.bale.bale_massages import *
|
from router.bale.bale_massages import *
|
||||||
from core.static import *
|
from core.static import *
|
||||||
from core.base_model import *
|
|
||||||
|
|
||||||
|
|
||||||
############## Global-Params
|
############## Global-Params
|
||||||
|
|
||||||
DATA_DIR = os.path.join(".", "_data_json")
|
DATA_DIR = os.path.join(".", "_data_json")
|
||||||
if not os.path.exists(DATA_DIR):
|
if not os.path.exists(DATA_DIR):
|
||||||
os.makedirs(DATA_DIR)
|
os.makedirs(DATA_DIR)
|
||||||
|
|
@ -24,6 +34,36 @@ if not os.path.exists(DATA_DIR):
|
||||||
PERSIAN_BOUNDARIES = set(" \n،.؟!؛:")
|
PERSIAN_BOUNDARIES = set(" \n،.؟!؛:")
|
||||||
|
|
||||||
|
|
||||||
|
# f"https://YOUR_DOMAIN.com
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class DbRule(BaseModel):
|
||||||
|
rule_id: str
|
||||||
|
rule_content: str
|
||||||
|
rule_type: str
|
||||||
|
section_id: str
|
||||||
|
section_content: str
|
||||||
|
section_full_path :str
|
||||||
|
qanon_id: str
|
||||||
|
qanon_etebar: str
|
||||||
|
qanon_title: str
|
||||||
|
state_etebar: str
|
||||||
|
|
||||||
|
class InputRule(BaseModel):
|
||||||
|
rule_id: str
|
||||||
|
rule_content: str
|
||||||
|
rule_type: str
|
||||||
|
section_id: str
|
||||||
|
section_content: str
|
||||||
|
|
||||||
|
class SemanticSearchP2P(BaseModel):
|
||||||
|
in_rule: InputRule
|
||||||
|
db_rule: DbRule
|
||||||
|
score: float = 0
|
||||||
|
metadata: Dict
|
||||||
|
|
||||||
|
|
||||||
class Formatter:
|
class Formatter:
|
||||||
"""
|
"""
|
||||||
Formatting options
|
Formatting options
|
||||||
|
|
@ -42,8 +82,6 @@ class Formatter:
|
||||||
###########################################################
|
###########################################################
|
||||||
def __init__(self, max_len: int = 4000):
|
def __init__(self, max_len: int = 4000):
|
||||||
self.max_len = max_len
|
self.max_len = max_len
|
||||||
|
|
||||||
self.dash = "--------------------------------------------------------------------"
|
|
||||||
self._number_map = {
|
self._number_map = {
|
||||||
"0": "0️⃣",
|
"0": "0️⃣",
|
||||||
"1": "1️⃣",
|
"1": "1️⃣",
|
||||||
|
|
@ -128,72 +166,17 @@ class Formatter:
|
||||||
# توابع فرمت و ساختار
|
# توابع فرمت و ساختار
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
||||||
def form_search_in_law_rules(self, header: str, body: List[SemanticSearchP2P], footer: str = None) -> List[str]:
|
def form_search_in_law(self, title: str, sections: List) -> List:
|
||||||
"""
|
|
||||||
گروهبندی بر اساس in_rule.rule_id و ساخت بلوکهای مجزا برای هر گروه.
|
|
||||||
"""
|
|
||||||
if footer:
|
|
||||||
footer = '\n' + footer
|
|
||||||
|
|
||||||
# گروهبندی بر اساس in_rule.rule_id
|
|
||||||
grouped = defaultdict(list)
|
|
||||||
for item in body:
|
|
||||||
key = item.in_rule.rule_id
|
|
||||||
grouped[key].append(item)
|
|
||||||
|
|
||||||
print(f'form_search_in_law_rules -> {len(grouped)}')
|
|
||||||
|
|
||||||
chunks = []
|
|
||||||
current = header
|
|
||||||
|
|
||||||
# برای هر گروه (یعنی یک in_rule.rule_id منحصربهفرد)
|
|
||||||
for group_id, items in grouped.items():
|
|
||||||
# فرض: همهی in_rule در یک گروه، rule_content یکسانی دارند — از اولی استفاده میکنیم
|
|
||||||
in_rule_content = items[0].in_rule.rule_content
|
|
||||||
block_lines = [in_rule_content]
|
|
||||||
|
|
||||||
# لیست db_ruleها با شمارهگذاری
|
|
||||||
for i, item in enumerate(items, start=1):
|
|
||||||
db_content = item.db_rule.rule_content
|
|
||||||
section_id = self.__make_link_qs(src=item.db_rule.section_id)
|
|
||||||
block_lines.append(f"{self.number(i)} گزاره: {db_content} در {section_id}")
|
|
||||||
|
|
||||||
block = "\n".join(block_lines) + "\n\n"
|
|
||||||
|
|
||||||
# بررسی سایز و تقسیم در صورت نیاز
|
|
||||||
if len(current) + len(block) > self.max_len:
|
|
||||||
if current.strip() != header.strip():
|
|
||||||
chunks.append(current.rstrip())
|
|
||||||
current = header + block # شروع چانک جدید با header دوباره (یا بدون header؟)
|
|
||||||
else:
|
|
||||||
current += block
|
|
||||||
|
|
||||||
# اضافه کردن آخرین چانک
|
|
||||||
if current.strip() and current.strip() != header.strip():
|
|
||||||
chunks.append(current.rstrip())
|
|
||||||
|
|
||||||
# footer
|
|
||||||
if footer and chunks:
|
|
||||||
last = chunks[-1]
|
|
||||||
if len(last) + len(footer) <= self.max_len:
|
|
||||||
chunks[-1] = last + footer
|
|
||||||
else:
|
|
||||||
chunks.append(footer)
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
def form_search_in_law(self, header: str, sections: List[SingleSearchData], footer:str=None) -> List:
|
|
||||||
"""
|
"""
|
||||||
خروجی به صورت چانک بدون دکمه هر خروجی لینک دارد
|
خروجی به صورت چانک بدون دکمه هر خروجی لینک دارد
|
||||||
برش امن لینک ها و اده ها
|
برش امن لینک ها و اده ها
|
||||||
"""
|
"""
|
||||||
footer = '\n\n'+footer
|
|
||||||
chunks = []
|
chunks = []
|
||||||
current = header
|
current = f"برای پرسش: {title}\n\n"
|
||||||
|
|
||||||
for i, data in enumerate(sections, start=1):
|
for i, data in enumerate(sections, start=1):
|
||||||
sec_text = data.content
|
sec_text = data.get("content", "")
|
||||||
idx = data.id
|
idx = data.get("id")
|
||||||
|
|
||||||
# ساخت ref کامل
|
# ساخت ref کامل
|
||||||
ref = self.__make_link_qs(src=idx)
|
ref = self.__make_link_qs(src=idx)
|
||||||
|
|
@ -213,13 +196,6 @@ class Formatter:
|
||||||
if current.strip():
|
if current.strip():
|
||||||
chunks.append(current.rstrip())
|
chunks.append(current.rstrip())
|
||||||
|
|
||||||
if footer :
|
|
||||||
last = chunks[-1]
|
|
||||||
if len(last) + len(footer) <= self.max_len:
|
|
||||||
chunks[-1] = last + footer
|
|
||||||
else:
|
|
||||||
chunks.append(footer)
|
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
def form_law_chat(self, answer_text: str):
|
def form_law_chat(self, answer_text: str):
|
||||||
|
|
@ -271,24 +247,27 @@ class Formatter:
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
async def form_title_repeated(self, _input: List[TitleRepeat]) -> List:
|
async def form_title_repeated(self, data: List[Dict[str, str]]):
|
||||||
if len(_input) == 0:
|
if len(data) == 0:
|
||||||
return ["هیچ عنوان تکراری و یا حتی مشابه یافت نشد."]
|
return ["هیچ عنوان تکراری و یا حتی مشابه یافت نشد."]
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
current = "نزدیکترین عناوین مشابه عنوان قانون موارد زیر می باشد:\n\n"
|
current = "نزدیکترین عناوین مشابه عنوان قانون موارد زیر می باشد:\n\n"
|
||||||
|
|
||||||
for i, item in enumerate(_input, start=1):
|
for i, item in enumerate(data, start=1):
|
||||||
|
title = item.get("title", "")
|
||||||
|
sec_id = item.get("id", "")
|
||||||
|
score = item.get("score", "")
|
||||||
|
|
||||||
if not item.title or not item.id:
|
if not title or not sec_id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ref = self.__make_link_qq(src=item.id)
|
ref = self.__make_link_qq(src=sec_id)
|
||||||
|
|
||||||
# بلوک کامل: عنوان + لینک — هر دو در یک بلوک غیرقابل تقسیم
|
# بلوک کامل: عنوان + لینک — هر دو در یک بلوک غیرقابل تقسیم
|
||||||
# block = f"{i}. {title}(وزن {score})\n{ref}\n"
|
# block = f"{i}. {title}(وزن {score})\n{ref}\n"
|
||||||
block = (
|
block = (
|
||||||
f"{self.number(i)} {self.bold(item.title)}؛ میزان تشابه: %{item.score} ؛{ref}\n"
|
f"{self.number(i)} {self.bold(title)}؛ میزان تشابه: %{score} ؛{ref}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
# اگر اضافه کردن این بلوک باعث overflow شود → چانک قبلی را ذخیره و current را ریست کن
|
# اگر اضافه کردن این بلوک باعث overflow شود → چانک قبلی را ذخیره و current را ریست کن
|
||||||
|
|
@ -314,12 +293,11 @@ class Formatter:
|
||||||
# print(f'Found implicit source(s): {links}')
|
# print(f'Found implicit source(s): {links}')
|
||||||
return ", ".join(links) # جایگزینی همه کدها با لینکهایشان
|
return ", ".join(links) # جایگزینی همه کدها با لینکهایشان
|
||||||
|
|
||||||
async def form_chat(self, llm_text: str, header: str, footer: str=None):
|
async def form_chat(self, llm_text: str, header: str):
|
||||||
"""
|
"""
|
||||||
answer_text: متن خروجی مدل که داخلش عبارتهای مثل (منبع: qs2117427) وجود دارد
|
answer_text: متن خروجی مدل که داخلش عبارتهای مثل (منبع: qs2117427) وجود دارد
|
||||||
"""
|
"""
|
||||||
if footer:
|
|
||||||
footer = '\n\n'+footer
|
|
||||||
# الگو برای تشخیص هر پرانتز که شامل یک یا چند کد باشد
|
# الگو برای تشخیص هر پرانتز که شامل یک یا چند کد باشد
|
||||||
# مثلا: (qs123) یا (qs123, qs456, qs789)
|
# مثلا: (qs123) یا (qs123, qs456, qs789)
|
||||||
pattern = r"\((?:منبع[:: ]+)?([a-zA-Z0-9_, ]+)\)"
|
pattern = r"\((?:منبع[:: ]+)?([a-zA-Z0-9_, ]+)\)"
|
||||||
|
|
@ -350,25 +328,17 @@ class Formatter:
|
||||||
if current.strip():
|
if current.strip():
|
||||||
chunks.append(current.strip())
|
chunks.append(current.strip())
|
||||||
|
|
||||||
if footer and chunks:
|
|
||||||
last = chunks[-1]
|
|
||||||
if len(last) + len(footer) <= self.max_len:
|
|
||||||
chunks[-1] = last + footer
|
|
||||||
else:
|
|
||||||
chunks.append(footer)
|
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
async def form_llm_answer_chat(self, _input, header):
|
async def form_llm_answer_chat(self, _input, header):
|
||||||
if len(_input) > 0:
|
if len(_input) > 0:
|
||||||
return await self.form_chat(llm_text=_input, header=header)
|
return await self.form_chat(llm_text=_input["text"], header=header)
|
||||||
# _input['source']
|
# _input['source']
|
||||||
return ["هیچ ماده مرتبطی یافت نشد!"]
|
return ["هیچ ماده مرتبطی یافت نشد!"]
|
||||||
|
|
||||||
async def form_subject_unity(
|
async def form_subject_unity(self,
|
||||||
self,
|
|
||||||
_input:Union[List[RuleRelation], str],
|
_input:Union[List[RuleRelation], str],
|
||||||
header="نتایج اولیه مغایرت های احتمالی :\n",
|
header="نتایج اولیه مغایرت های احتمالی :\n"
|
||||||
):
|
):
|
||||||
if isinstance(_input, str):
|
if isinstance(_input, str):
|
||||||
_input = self.form_law_chat(_input)
|
_input = self.form_law_chat(_input)
|
||||||
|
|
@ -388,10 +358,7 @@ class Formatter:
|
||||||
block_lines = [f"{self.number(idx)} در قانون {self.bold(qanon_title)}"]
|
block_lines = [f"{self.number(idx)} در قانون {self.bold(qanon_title)}"]
|
||||||
sample_items_by_section = {}
|
sample_items_by_section = {}
|
||||||
for item in _input:
|
for item in _input:
|
||||||
if (
|
if item.db_rule.qanon_title == qanon_title and item.db_rule.section_id in section_ids:
|
||||||
item.db_rule.qanon_title == qanon_title
|
|
||||||
and item.db_rule.section_id in section_ids
|
|
||||||
):
|
|
||||||
sid = item.db_rule.section_id
|
sid = item.db_rule.section_id
|
||||||
if sid not in sample_items_by_section:
|
if sid not in sample_items_by_section:
|
||||||
sample_items_by_section[sid] = item
|
sample_items_by_section[sid] = item
|
||||||
|
|
@ -430,19 +397,15 @@ class Formatter:
|
||||||
# Button: add *once* per qanon_title
|
# Button: add *once* per qanon_title
|
||||||
if qanon_title and qanon_title not in seen_qanon_titles:
|
if qanon_title and qanon_title not in seen_qanon_titles:
|
||||||
seen_qanon_titles.add(qanon_title)
|
seen_qanon_titles.add(qanon_title)
|
||||||
buttons.append(
|
buttons.append([
|
||||||
[
|
|
||||||
{
|
{
|
||||||
"text": f"بررسی مغایرت با {qanon_title}",
|
"text": f"بررسی مغایرت با {qanon_title}",
|
||||||
"callback_data": f"subject_unities:qq:{qanon_title}",
|
"callback_data": f"subject_unities:qq:{qanon_title}"
|
||||||
}
|
}
|
||||||
]
|
])
|
||||||
)
|
|
||||||
|
|
||||||
# Final flush
|
# Final flush
|
||||||
if current.strip() and (
|
if current.strip() and (len(chunks) == 0 or current.strip() != header.rstrip()):
|
||||||
len(chunks) == 0 or current.strip() != header.rstrip()
|
|
||||||
):
|
|
||||||
chunks.append(current.rstrip())
|
chunks.append(current.rstrip())
|
||||||
|
|
||||||
input_dict = {item.db_rule.section_id : item for item in _input}
|
input_dict = {item.db_rule.section_id : item for item in _input}
|
||||||
|
|
@ -453,13 +416,11 @@ class Formatter:
|
||||||
|
|
||||||
return chunks, buttons, mapping_data
|
return chunks, buttons, mapping_data
|
||||||
|
|
||||||
|
|
||||||
async def form_rule_making(
|
async def form_rule_making(
|
||||||
self, _input, header="گزاره های حقوقی زیر استخراج شد:\n\n", footer=None
|
self, _input, header="گزاره های حقوقی زیر استخراج شد:\n\n"
|
||||||
):
|
):
|
||||||
if len(_input) > 0:
|
if len(_input) > 0:
|
||||||
if footer:
|
|
||||||
footer = '\n\n'+footer
|
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
current = header
|
current = header
|
||||||
|
|
||||||
|
|
@ -473,15 +434,8 @@ class Formatter:
|
||||||
if current.strip():
|
if current.strip():
|
||||||
chunks.append(current.rstrip())
|
chunks.append(current.rstrip())
|
||||||
|
|
||||||
if footer and chunks:
|
|
||||||
last = chunks[-1]
|
|
||||||
if len(last) + len(footer) <= self.max_len:
|
|
||||||
chunks[-1] = last + footer
|
|
||||||
else:
|
|
||||||
chunks.append(footer)
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
return ["هیچ گزاره حقوقی یافت و استخراج نشد!"]
|
return ["هیچ گزاره حقوقی یافت و استخراج نشد!"]
|
||||||
|
|
||||||
def get_asl(self, _in: str):
|
def get_asl(self, _in: str):
|
||||||
|
|
@ -535,29 +489,6 @@ class Formatter:
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
async def form_constitution_low(self, input: Dict, _id, _header='نتایچ بررسی مغایرت با اصول مهم قانون اساسی\n\n'):
|
|
||||||
""" """
|
|
||||||
|
|
||||||
chunks = []
|
|
||||||
current = ""
|
|
||||||
|
|
||||||
for k, v in input.items():
|
|
||||||
block = self.get_in_form_single(asl=k, _in_dict=v, _id=_id)
|
|
||||||
|
|
||||||
# اگر این بلاک جا نشد → چانک جدید
|
|
||||||
if len(current) + len(block) > self.max_len:
|
|
||||||
chunks.append(current.rstrip())
|
|
||||||
current = _header + block
|
|
||||||
else:
|
|
||||||
current += block
|
|
||||||
|
|
||||||
|
|
||||||
# آخرین چانک
|
|
||||||
if current.strip():
|
|
||||||
chunks.append(current.rstrip())
|
|
||||||
|
|
||||||
return ''.join(chunks)
|
|
||||||
|
|
||||||
async def form_ss_rules(self, _input:List[Dict], header):
|
async def form_ss_rules(self, _input:List[Dict], header):
|
||||||
|
|
||||||
if len(_input) > 1:
|
if len(_input) > 1:
|
||||||
|
|
@ -568,8 +499,8 @@ class Formatter:
|
||||||
# -------- 1. group by qanon_id / qanon_title
|
# -------- 1. group by qanon_id / qanon_title
|
||||||
groups = defaultdict(set)
|
groups = defaultdict(set)
|
||||||
for item in _input:
|
for item in _input:
|
||||||
key = item["db_rule"]["qanon_title"]
|
key = item['db_rule']['qanon_title']
|
||||||
groups[key].add(item["db_rule"]["section_id"])
|
groups[key].add(item['db_rule']['section_id'])
|
||||||
|
|
||||||
for qanon_title, ids in groups.items():
|
for qanon_title, ids in groups.items():
|
||||||
_i += 1
|
_i += 1
|
||||||
|
|
@ -591,8 +522,10 @@ class Formatter:
|
||||||
|
|
||||||
return ["هیچ ماده مرتبطی یافت نشد!"]
|
return ["هیچ ماده مرتبطی یافت نشد!"]
|
||||||
|
|
||||||
async def form_conflict_detection(
|
|
||||||
self, _input: RuleRelation, header="نتیجه تشخیص مغایرت :\n"
|
|
||||||
|
async def form_conflict_detection(self,
|
||||||
|
_input:RuleRelation, header="نتیجه تشخیص مغایرت :\n"
|
||||||
):
|
):
|
||||||
current = header
|
current = header
|
||||||
|
|
||||||
|
|
@ -603,8 +536,8 @@ class Formatter:
|
||||||
|
|
||||||
return current
|
return current
|
||||||
|
|
||||||
async def form_conflict_type_detection(
|
async def form_conflict_type_detection(self,
|
||||||
self, _input: RuleRelation, header="نتیجه تشخیص نوع مغایرت :\n"
|
_input:RuleRelation, header="نتیجه تشخیص نوع مغایرت :\n"
|
||||||
):
|
):
|
||||||
current = header
|
current = header
|
||||||
|
|
||||||
|
|
@ -615,8 +548,8 @@ class Formatter:
|
||||||
|
|
||||||
return current
|
return current
|
||||||
|
|
||||||
async def form_relation_identification(
|
async def form_relation_identification(self,
|
||||||
self, _input: RuleRelation, header="نتیجه رابطه مغایرت :\n"
|
_input:RuleRelation, header="نتیجه رابطه مغایرت :\n"
|
||||||
):
|
):
|
||||||
current = header
|
current = header
|
||||||
|
|
||||||
|
|
@ -627,8 +560,8 @@ class Formatter:
|
||||||
|
|
||||||
return current
|
return current
|
||||||
|
|
||||||
async def form_evaluation(
|
async def form_evaluation(self,
|
||||||
self, _input: Evaluation, header="نتیجه نهایی بررسی مغایرت :\n"
|
_input:Evaluation, header="نتیجه نهایی بررسی مغایرت :\n"
|
||||||
):
|
):
|
||||||
current = header
|
current = header
|
||||||
|
|
||||||
|
|
@ -637,18 +570,15 @@ class Formatter:
|
||||||
current += f"1. آیا ارزیابی وحدت موضوع صحیح است؟ {_input.is_subject_unity_assessment_correct}\n"
|
current += f"1. آیا ارزیابی وحدت موضوع صحیح است؟ {_input.is_subject_unity_assessment_correct}\n"
|
||||||
current += f"2. آیا ارزیابی تشخیص نوع درست است ؟ {_input.is_conflict_detection_correct}\n"
|
current += f"2. آیا ارزیابی تشخیص نوع درست است ؟ {_input.is_conflict_detection_correct}\n"
|
||||||
current += f"3. آیا ارزیابی نوع درست است ؟ {_input.is_conflict_type_detection_correct}\n"
|
current += f"3. آیا ارزیابی نوع درست است ؟ {_input.is_conflict_type_detection_correct}\n"
|
||||||
current += (
|
current += f"4. رابطه مغایرت چطور؟ {_input.is_relation_type_detection_correct}\n"
|
||||||
f"4. رابطه مغایرت چطور؟ {_input.is_relation_type_detection_correct}\n"
|
|
||||||
)
|
|
||||||
current += f"5. نوع رابطه ؟ {_input.valid_relation_type}\n"
|
current += f"5. نوع رابطه ؟ {_input.valid_relation_type}\n"
|
||||||
current += f"6.توضیح بیشتر: {_input.comments}\n"
|
current += f"6.توضیح بیشتر: {_input.comments}\n"
|
||||||
|
|
||||||
return current
|
return current
|
||||||
|
|
||||||
async def from_law_writing_policy(
|
async def from_law_writing_policy(self, _input_dict: Dict, header:str) -> List[str]:
|
||||||
self, _input_dict: Dict, header: str
|
f_list = [
|
||||||
) -> List[str]:
|
self.bold(header)]
|
||||||
f_list = [self.bold(header)]
|
|
||||||
_str = {
|
_str = {
|
||||||
"analyze": "گزارش تحلیلی بندبهبند",
|
"analyze": "گزارش تحلیلی بندبهبند",
|
||||||
"strength": "بیان نقاط قوت",
|
"strength": "بیان نقاط قوت",
|
||||||
|
|
@ -662,28 +592,31 @@ class Formatter:
|
||||||
f_list += [_title]
|
f_list += [_title]
|
||||||
# f_list += ['\n']
|
# f_list += ['\n']
|
||||||
f_list += [v]
|
f_list += [v]
|
||||||
f_list += ["\n"]
|
f_list += ['\n']
|
||||||
|
|
||||||
return ["\n".join(f_list)]
|
return ["\n".join(f_list)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
deleteMessage
|
||||||
|
message_id
|
||||||
|
chat_id
|
||||||
|
"""
|
||||||
|
|
||||||
class RequestManager:
|
class RequestManager:
|
||||||
def __init__(
|
def __init__(self,
|
||||||
self,
|
|
||||||
host_url:str,
|
host_url:str,
|
||||||
url_time_out=1200,
|
url_time_out=1200,
|
||||||
step_time_out=600,
|
step_time_out=600,
|
||||||
):
|
):
|
||||||
if host_url.endswith('/'):
|
|
||||||
self.host_url = host_url
|
self.host_url = host_url
|
||||||
else:
|
|
||||||
self.host_url = host_url + '/'
|
|
||||||
|
|
||||||
self.url_time_out = url_time_out
|
self.url_time_out = url_time_out
|
||||||
self.step_time_out = step_time_out
|
self.step_time_out = step_time_out
|
||||||
TASK_URL ={
|
TASK_URL ={
|
||||||
# stream
|
# stream
|
||||||
"":"/stream/chat_logical",
|
"":"/stream/chat_logical",
|
||||||
|
|
||||||
# none-stream
|
# none-stream
|
||||||
"":"/conflict/general_policy/qs_unity",
|
"":"/conflict/general_policy/qs_unity",
|
||||||
"":"/conflict/all_qanon/qs_unity",
|
"":"/conflict/all_qanon/qs_unity",
|
||||||
|
|
@ -694,25 +627,28 @@ class RequestManager:
|
||||||
"":"/chat",
|
"":"/chat",
|
||||||
"":"/talk",
|
"":"/talk",
|
||||||
"":"/semantic_search/chat_logical",
|
"":"/semantic_search/chat_logical",
|
||||||
"": "/semantic_search/run_semantic_search",
|
|
||||||
"":"/semantic_search/run_chat",
|
"":"/semantic_search/run_chat",
|
||||||
|
"":"/semantic_search/run_semantic_search",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def get_result(
|
async def get_result(
|
||||||
self,
|
self,
|
||||||
payload,
|
payload,
|
||||||
url :str,
|
url :str,
|
||||||
section_id: str = "qs_10001",
|
section_id:str='qs_10001',
|
||||||
mode_type="bale",
|
mode_type='bale',
|
||||||
):
|
):
|
||||||
if url.startswith('/'):
|
|
||||||
url = url[1:]
|
|
||||||
|
|
||||||
_url = self.host_url+url
|
_url = self.host_url+url
|
||||||
print(f"get_result _url {_url}")
|
print(
|
||||||
|
f'get_result _url {_url}'
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=self.url_time_out) as client:
|
async with httpx.AsyncClient(timeout=self.url_time_out) as client:
|
||||||
response = await client.post(url=_url, json=payload)
|
response = await client.post(
|
||||||
|
url=_url, json=payload
|
||||||
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
result = data.get("result", "❌ پاسخی دریافت نشد")
|
result = data.get("result", "❌ پاسخی دریافت نشد")
|
||||||
|
|
@ -723,6 +659,7 @@ class RequestManager:
|
||||||
print(f"❌ خطای RAG:\n{str(e)}")
|
print(f"❌ خطای RAG:\n{str(e)}")
|
||||||
return "❌ ارتباط با سرور قطع میباشد"
|
return "❌ ارتباط با سرور قطع میباشد"
|
||||||
|
|
||||||
|
|
||||||
async def stream_result(
|
async def stream_result(
|
||||||
self,
|
self,
|
||||||
url :str,
|
url :str,
|
||||||
|
|
@ -735,15 +672,16 @@ class RequestManager:
|
||||||
data : "داده در این مرحله"
|
data : "داده در این مرحله"
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
if url.startswith('/'):
|
|
||||||
url = url[1:]
|
|
||||||
|
|
||||||
timeout = httpx.Timeout(self.step_time_out, read=self.url_time_out)
|
timeout = httpx.Timeout(self.step_time_out, read=self.url_time_out)
|
||||||
_url = self.host_url+url
|
_url = self.host_url+url
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
# ارسال درخواست به صورت Stream
|
# ارسال درخواست به صورت Stream
|
||||||
async with client.stream("POST", url=_url, json=payload) as r:
|
async with client.stream(
|
||||||
|
"POST",
|
||||||
|
url=_url,
|
||||||
|
json=payload
|
||||||
|
) as r:
|
||||||
# بررسی وضعیت پاسخ
|
# بررسی وضعیت پاسخ
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
print(f"Error: {r.status_code}")
|
print(f"Error: {r.status_code}")
|
||||||
|
|
@ -895,6 +833,38 @@ def make_link_qs(src, ref_text=REF_TEXT):
|
||||||
return f"[{ref_text}]({QS_WEB_LINK}{src})"
|
return f"[{ref_text}]({QS_WEB_LINK}{src})"
|
||||||
|
|
||||||
|
|
||||||
|
def encode_uc(update: BaleUpdate) -> str:
|
||||||
|
if update.message:
|
||||||
|
user = update.message.from_user
|
||||||
|
chat = update.message.chat
|
||||||
|
|
||||||
|
elif update.callback_query:
|
||||||
|
user = update.callback_query.from_user
|
||||||
|
chat = update.callback_query.message.chat
|
||||||
|
|
||||||
|
else:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
username = user.username or user.id
|
||||||
|
chat_id = chat.id # ✅ فقط chat_id
|
||||||
|
|
||||||
|
return f"{username}:{chat_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def decode_uc(uc_id: str) -> dict:
|
||||||
|
"""
|
||||||
|
ورودی: 'username:chat_id' یا 'user_id:chat_id'
|
||||||
|
خروجی: {'username': ..., 'chat_id': ...}
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
username, chat_id = uc_id.split(":", 1)
|
||||||
|
|
||||||
|
return (username, int(chat_id) if chat_id.isdigit() else chat_id)
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"decode_uc")
|
||||||
|
|
||||||
|
|
||||||
async def get_from_gpl(in_dict: Dict) -> List[str]:
|
async def get_from_gpl(in_dict: Dict) -> List[str]:
|
||||||
f_list = []
|
f_list = []
|
||||||
|
|
@ -927,17 +897,15 @@ def cer(ref: str, hyp: str) -> float:
|
||||||
dp[j] = min(
|
dp[j] = min(
|
||||||
dp[j] + 1, # deletion
|
dp[j] + 1, # deletion
|
||||||
dp[j - 1] + 1, # insertion
|
dp[j - 1] + 1, # insertion
|
||||||
prev + (ref[i - 1] != hyp[j - 1]), # substitution
|
prev + (ref[i - 1] != hyp[j - 1]) # substitution
|
||||||
)
|
)
|
||||||
prev = cur
|
prev = cur
|
||||||
|
|
||||||
return (dp[n] / m) * 100
|
return (dp[n] / m) * 100
|
||||||
|
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
from nltk.metrics import edit_distance
|
from nltk.metrics import edit_distance
|
||||||
|
|
||||||
|
|
||||||
def cer_ntlk(exist: str, new: str) -> float:
|
def cer_ntlk(exist: str, new: str) -> float:
|
||||||
"""
|
"""
|
||||||
این روش دقیقتر است، چون تعداد کاراکترهای اضافی یا کم در متن طولانی،
|
این روش دقیقتر است، چون تعداد کاراکترهای اضافی یا کم در متن طولانی،
|
||||||
|
|
@ -946,7 +914,6 @@ def cer_ntlk(exist: str, new: str) -> float:
|
||||||
# edit distance روی کلمات
|
# edit distance روی کلمات
|
||||||
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
||||||
|
|
||||||
|
|
||||||
def wer_nltk(new: str, exist: str) -> float:
|
def wer_nltk(new: str, exist: str) -> float:
|
||||||
new = new.split()
|
new = new.split()
|
||||||
exist = exist.split()
|
exist = exist.split()
|
||||||
|
|
@ -954,6 +921,43 @@ def wer_nltk(new: str, exist: str) -> float:
|
||||||
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
return round(float(1 - edit_distance(new, exist) / len(exist)) * 100, 2)
|
||||||
|
|
||||||
|
|
||||||
|
async def title_repeated(
|
||||||
|
qanontitle, search_range: int = 10, url=f"http://localhost:8010/v1/indices/qaqanon/search"
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
- باید با سرویس از حاج آقا گرفته شود
|
||||||
|
Fetch similar titles from the custom Elasticsearch-like REST API.
|
||||||
|
"""
|
||||||
|
# "/majles/similar/title/qaqanon/0/10/none"
|
||||||
|
# headers["Authorization"]="GuestAccess"
|
||||||
|
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"query": qanontitle, #
|
||||||
|
"from_": 0,
|
||||||
|
"size": search_range+10,
|
||||||
|
"track_total_hits": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.request("POST", url, headers=headers, json=body, timeout=20)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
print("ERROR:", response.status_code)
|
||||||
|
print(response.text)
|
||||||
|
else:
|
||||||
|
data = response.json()
|
||||||
|
ids = []
|
||||||
|
# print('---------------------------------------> max_score', max_score)
|
||||||
|
# print(data["hits"])
|
||||||
|
|
||||||
|
for i in data["hits"]["hits"]:
|
||||||
|
title = i["_source"]["title"]
|
||||||
|
ids.append(
|
||||||
|
{"title": title, "id": i["_source"]["id"], "score" :wer_nltk(exist=title, new=qanontitle)}
|
||||||
|
# {"title": title, "id": i["_source"]["id"], "score" :cer_ntlk(exist=title, new=qanontitle)}
|
||||||
|
)
|
||||||
|
|
||||||
|
return sorted(ids, key=lambda x: x['score'], reverse=True)[:search_range]
|
||||||
|
|
||||||
|
|
||||||
def normalize_persian(text: str) -> str:
|
def normalize_persian(text: str) -> str:
|
||||||
|
|
@ -1435,3 +1439,5 @@ def chunked_simple_text(answer_text):
|
||||||
chunks.append(current.strip())
|
chunks.append(current.strip())
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,217 +0,0 @@
|
||||||
from core.core import RequestManager, wer_nltk
|
|
||||||
from core.base_model import *
|
|
||||||
from router.bale.base_model import *
|
|
||||||
import requests
|
|
||||||
|
|
||||||
"""
|
|
||||||
روند هر مرحله در اینجا مشخص می شود و داده و خروجی و پردازش در اینجا انجام می شود
|
|
||||||
"""
|
|
||||||
__all__ = ["Operation"]
|
|
||||||
|
|
||||||
|
|
||||||
class Operation:
|
|
||||||
def __init__(self, request_manager: RequestManager):
|
|
||||||
self.request_manager = request_manager
|
|
||||||
|
|
||||||
async def search_in_law(
|
|
||||||
self, query: str, limit: int, rerank_model: str, embed_model: str
|
|
||||||
) -> BMNewSemanticSearchOutput:
|
|
||||||
"""
|
|
||||||
فقط منطق – بدون هیچ وابستگی به Bale یا User
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = await self.request_manager.get_result(
|
|
||||||
payload={
|
|
||||||
"query": query,
|
|
||||||
"limit": limit,
|
|
||||||
"rerank_model": rerank_model,
|
|
||||||
"embed_model": embed_model,
|
|
||||||
},
|
|
||||||
url="new/semantic_search",
|
|
||||||
)
|
|
||||||
|
|
||||||
return BMNewSemanticSearchOutput.parse_obj(result)
|
|
||||||
|
|
||||||
async def stream_search_in_law(
|
|
||||||
self, query: str, limit: int, rerank_model: str, embed_model: str
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
فقط منطق – بدون هیچ وابستگی به Bale یا User
|
|
||||||
"""
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"query": query,
|
|
||||||
"limit": limit,
|
|
||||||
"rerank_model": rerank_model,
|
|
||||||
"embed_model": embed_model,
|
|
||||||
},
|
|
||||||
url="new/semantic_search",
|
|
||||||
):
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def stream_rule_making(self, query, llm_name, effort):
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"query": query,
|
|
||||||
"query_id": "qs12357498",
|
|
||||||
"llm_effort": effort,
|
|
||||||
"llm_model_name": llm_name,
|
|
||||||
# "llm_api_url"
|
|
||||||
# "llm_api_key"
|
|
||||||
},
|
|
||||||
url="/single/rule_making",
|
|
||||||
):
|
|
||||||
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def stream_chat_in_law(self, query, limit, effort, mode_type="bale"):
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
"limit": limit,
|
|
||||||
"mode_type": mode_type,
|
|
||||||
},
|
|
||||||
url="/single/semantic_search/run_chat",
|
|
||||||
):
|
|
||||||
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def stream_rule_semantic_search(
|
|
||||||
self,
|
|
||||||
queries: List,
|
|
||||||
filter_qanon_ids: List,
|
|
||||||
limit_rerank: int,
|
|
||||||
embed_model="jinaai/jina-colbert-v2",
|
|
||||||
rerank_model="BAAI/bge-reranker-v2-m3",
|
|
||||||
metadata={},
|
|
||||||
limit_cos=100,
|
|
||||||
):
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"queries": queries,
|
|
||||||
"filter_qanon_ids": filter_qanon_ids,
|
|
||||||
"embed_model": embed_model,
|
|
||||||
"rerank_model": rerank_model,
|
|
||||||
"metadata": metadata,
|
|
||||||
"limit_rerank": limit_rerank,
|
|
||||||
"limit_cos": limit_cos,
|
|
||||||
},
|
|
||||||
url="/single/rule_semantic_search",
|
|
||||||
):
|
|
||||||
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def chat_in_law(self, query, effort, limit, mode_type="bale") -> ChatLaw:
|
|
||||||
result = await self.request_manager.get_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
"limit": limit,
|
|
||||||
"mode_type": mode_type,
|
|
||||||
},
|
|
||||||
url="/single/semantic_search/run_chat",
|
|
||||||
)
|
|
||||||
print(f"chat_in_law {result}")
|
|
||||||
return ChatLaw.parse_obj(result)
|
|
||||||
|
|
||||||
async def title_repeated(
|
|
||||||
self,
|
|
||||||
qanontitle,
|
|
||||||
search_range: int = 10,
|
|
||||||
# url=f"http://localhost:8010/v1/indices/qaqanon/search",
|
|
||||||
url=f"http://localhost/api/elp/v1/indices/qaqanon/search",
|
|
||||||
) -> List[TitleRepeat]:
|
|
||||||
"""
|
|
||||||
- باید با سرویس از حاج آقا گرفته شود
|
|
||||||
Fetch similar titles from the custom Elasticsearch-like REST API.
|
|
||||||
"""
|
|
||||||
# "/majles/similar/title/qaqanon/0/10/none"
|
|
||||||
# headers["Authorization"]="GuestAccess"
|
|
||||||
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
|
||||||
|
|
||||||
body = {
|
|
||||||
"query": qanontitle, #
|
|
||||||
"from_": 0,
|
|
||||||
"size": search_range + 10,
|
|
||||||
"track_total_hits": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.request("POST", url, headers=headers, json=body, timeout=20)
|
|
||||||
print(f"title_repeated -> {response}")
|
|
||||||
if response.status_code != 200:
|
|
||||||
print("ERROR:", response.status_code)
|
|
||||||
print(response.text)
|
|
||||||
else:
|
|
||||||
data = response.json()
|
|
||||||
ids = []
|
|
||||||
# print('---------------------------------------> max_score', max_score)
|
|
||||||
# print(data["hits"])
|
|
||||||
|
|
||||||
for i in data["hits"]["hits"]:
|
|
||||||
title = i["_source"]["title"]
|
|
||||||
ids.append(
|
|
||||||
TitleRepeat(
|
|
||||||
title=title,
|
|
||||||
id=i["_source"]["id"],
|
|
||||||
score=wer_nltk(exist=title, new=qanontitle),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return sorted(ids, key=lambda x: x.score, reverse=True)[:search_range]
|
|
||||||
|
|
||||||
async def talk(self, query) -> str:
|
|
||||||
result = await self.request_manager.get_result(
|
|
||||||
payload={
|
|
||||||
"user_input": query,
|
|
||||||
},
|
|
||||||
url="/talk",
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
async def conflict_qanon_asasi_low(self, query, effort, limit, mode_type="bale"):
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
"limit": limit,
|
|
||||||
"mode_type": mode_type,
|
|
||||||
},
|
|
||||||
url="/new/conflict/constitution_low",
|
|
||||||
):
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def conflict_qanon_asasi_steps(self, query, effort, limit, mode_type="bale"):
|
|
||||||
_result = await self.request_manager.get_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
"limit": limit,
|
|
||||||
"mode_type": mode_type,
|
|
||||||
},
|
|
||||||
url="/new/conflict/constitution",
|
|
||||||
)
|
|
||||||
|
|
||||||
return _result
|
|
||||||
|
|
||||||
async def stream_logical_chat_in_law(self, query, effort, metadata, limit):
|
|
||||||
async for data in self.request_manager.stream_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
"metadata": metadata,
|
|
||||||
"limit":limit
|
|
||||||
},
|
|
||||||
url="/new/stream/chat_logical",
|
|
||||||
):
|
|
||||||
yield data
|
|
||||||
|
|
||||||
async def conflict_law_writing_policy(self, query, effort):
|
|
||||||
_result = await self.request_manager.get_result(
|
|
||||||
payload={
|
|
||||||
"section_content": query,
|
|
||||||
"effort": effort,
|
|
||||||
},
|
|
||||||
url="/conflict/law_writing_policy",
|
|
||||||
)
|
|
||||||
return _result
|
|
||||||
7
main.py
7
main.py
|
|
@ -6,7 +6,6 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||||
from router.bale.bale import router as bale_router
|
from router.bale.bale import router as bale_router
|
||||||
from router.bale.bale import initialize_webhook
|
from router.bale.bale import initialize_webhook
|
||||||
from router.bale.bale_handle import BaleBot, UserManager
|
from router.bale.bale_handle import BaleBot, UserManager
|
||||||
from core.operation import Operation
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
@ -68,12 +67,8 @@ async def lifespan(app: FastAPI):
|
||||||
app.state.request_manager = RequestManager(
|
app.state.request_manager = RequestManager(
|
||||||
host_url=BACK_END_URL,
|
host_url=BACK_END_URL,
|
||||||
)
|
)
|
||||||
app.state.operation = Operation(
|
|
||||||
request_manager=app.state.request_manager,
|
|
||||||
)
|
|
||||||
# بله بات
|
# بله بات
|
||||||
bale_bot = BaleBot(
|
bale_bot = BaleBot(
|
||||||
operation=app.state.operation,
|
|
||||||
user_manager=app.state.user_manager,
|
user_manager=app.state.user_manager,
|
||||||
es_helper=app.state.es_helper,
|
es_helper=app.state.es_helper,
|
||||||
es_index_name=app.state.es_index_name,
|
es_index_name=app.state.es_index_name,
|
||||||
|
|
@ -83,7 +78,7 @@ async def lifespan(app: FastAPI):
|
||||||
request_manager = app.state.request_manager,
|
request_manager = app.state.request_manager,
|
||||||
)
|
)
|
||||||
app.state.bale_bot = bale_bot
|
app.state.bale_bot = bale_bot
|
||||||
print("✅✅✅ Bale-Bot Initialized ✅✅✅")
|
print("=== Bale-Bot Initialized ===")
|
||||||
|
|
||||||
yield # برنامه در این حالت اجرا میشود
|
yield # برنامه در این حالت اجرا میشود
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
|
|
||||||
Operation (logic layer)
|
|
||||||
│
|
|
||||||
├── handle_search_in_law(...)
|
|
||||||
│ ├── دریافت ورودی خام
|
|
||||||
│ ├── فراخوانی request_manager
|
|
||||||
│ ├── ساخت خروجی منطقی (data)
|
|
||||||
│ └── بدون وابستگی به بله
|
|
||||||
│
|
|
||||||
└── return OperationResult
|
|
||||||
|
|
||||||
BaleBot (delivery / interface layer)
|
|
||||||
│
|
|
||||||
├── دریافت user
|
|
||||||
├── صدا زدن Operation
|
|
||||||
├── تبدیل خروجی به متن + دکمه
|
|
||||||
└── ارسال به بله
|
|
||||||
|
|
||||||
|
|
||||||
# Logic & Flow
|
|
||||||
Operation → منطق، پردازش، گرفتن دیتا، تصمیمگیری
|
|
||||||
BaleBot → ورودی/خروجی، ارتباط با بله، فرمت پیام، دکمهها، مدیریت state کاربر
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
elasticsearch==8.13.2
|
elasticsearch==8.13.2
|
||||||
nltk
|
nltk
|
||||||
pydantic
|
pydantic
|
||||||
fast-api
|
|
||||||
|
|
@ -9,11 +9,6 @@ from dependencies import _get_bale_token, _get_bale_bot
|
||||||
router = APIRouter(tags=["bale-bot"])
|
router = APIRouter(tags=["bale-bot"])
|
||||||
##############
|
##############
|
||||||
|
|
||||||
"""
|
|
||||||
deleteMessage
|
|
||||||
message_id
|
|
||||||
chat_id
|
|
||||||
"""
|
|
||||||
|
|
||||||
# @app.post(f"/webhook/{TOKEN}")
|
# @app.post(f"/webhook/{TOKEN}")
|
||||||
@router.post("/webhook/{token}", description="ربات قانون یار")
|
@router.post("/webhook/{token}", description="ربات قانون یار")
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,92 +0,0 @@
|
||||||
|
|
||||||
from router.bale.base_model import *
|
|
||||||
from core.core import *
|
|
||||||
|
|
||||||
def encode_uc(update: BaleUpdate) -> str:
|
|
||||||
if update.message:
|
|
||||||
user = update.message.from_user
|
|
||||||
chat = update.message.chat
|
|
||||||
|
|
||||||
elif update.callback_query:
|
|
||||||
user = update.callback_query.from_user
|
|
||||||
chat = update.callback_query.message.chat
|
|
||||||
|
|
||||||
else:
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
username = user.username or user.id
|
|
||||||
chat_id = chat.id # ✅ فقط chat_id
|
|
||||||
|
|
||||||
return f"{username}:{chat_id}"
|
|
||||||
|
|
||||||
|
|
||||||
def decode_uc(uc_id: str) -> dict:
|
|
||||||
"""
|
|
||||||
ورودی: 'username:chat_id' یا 'user_id:chat_id'
|
|
||||||
خروجی: {'username': ..., 'chat_id': ...}
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
username, chat_id = uc_id.split(":", 1)
|
|
||||||
|
|
||||||
return (username, int(chat_id) if chat_id.isdigit() else chat_id)
|
|
||||||
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"decode_uc")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_user_info(update: BaleUpdate) -> Dict:
|
|
||||||
uc_id = encode_uc(update)
|
|
||||||
if update.message:
|
|
||||||
u = update.message.from_user
|
|
||||||
return {
|
|
||||||
"uc_id": str(uc_id),
|
|
||||||
"chat_id": update.message.chat.id,
|
|
||||||
"user_id": u.id,
|
|
||||||
"username": u.username,
|
|
||||||
"first_name": u.first_name,
|
|
||||||
"last_name": u.last_name or "",
|
|
||||||
"is_bot": u.is_bot,
|
|
||||||
"update": update,
|
|
||||||
}
|
|
||||||
|
|
||||||
if update.callback_query:
|
|
||||||
u = update.callback_query.from_user
|
|
||||||
return {
|
|
||||||
"uc_id": str(uc_id),
|
|
||||||
"chat_id": update.callback_query.message.chat.id,
|
|
||||||
"user_id": u.id,
|
|
||||||
"username": u.username,
|
|
||||||
"first_name": u.first_name,
|
|
||||||
"last_name": "",
|
|
||||||
"is_bot": u.is_bot,
|
|
||||||
"update": update,
|
|
||||||
}
|
|
||||||
|
|
||||||
raise ValueError("No user info in update")
|
|
||||||
|
|
||||||
|
|
||||||
class UserManager:
|
|
||||||
def __init__(self):
|
|
||||||
self.users: Dict[str, BaleUser] = {}
|
|
||||||
self.list_vip_username = load_orjson(
|
|
||||||
"/home/sabr/back_new/mj_bale_chat_test/mj_bale_chat/vip_username.json"
|
|
||||||
)
|
|
||||||
self.temporary_data = load_orjson(
|
|
||||||
"/home/sabr/back_new/mj_bale_chat_test/mj_bale_chat/temp.json"
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_or_create(self, update: BaleUpdate) -> BaleUser:
|
|
||||||
user_data = extract_user_info(update)
|
|
||||||
uc_id = user_data["uc_id"]
|
|
||||||
|
|
||||||
if user_data["username"] in self.list_vip_username:
|
|
||||||
user_data["is_vip"] = True
|
|
||||||
|
|
||||||
if uc_id not in self.users:
|
|
||||||
self.users[uc_id] = BaleUser(
|
|
||||||
**user_data,
|
|
||||||
)
|
|
||||||
user = self.users[uc_id]
|
|
||||||
user.update = update
|
|
||||||
return user
|
|
||||||
|
|
@ -6,45 +6,6 @@ from typing import Optional, Callable, List, Any
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class SingleSearchData(BaseModel):
|
|
||||||
score:float
|
|
||||||
id:str
|
|
||||||
content:str
|
|
||||||
|
|
||||||
class BMNewSemanticSearchOutput(BaseModel):
|
|
||||||
query: str
|
|
||||||
result : List[SingleSearchData]
|
|
||||||
metadata : Dict
|
|
||||||
embed_model_time : float
|
|
||||||
cosine_similarity_time : float
|
|
||||||
rerank_time : float
|
|
||||||
|
|
||||||
|
|
||||||
class DbRule(BaseModel):
|
|
||||||
rule_id: str
|
|
||||||
rule_content: str
|
|
||||||
rule_type: str
|
|
||||||
section_id: str
|
|
||||||
section_content: str
|
|
||||||
section_full_path :str
|
|
||||||
qanon_id: str
|
|
||||||
qanon_etebar: str
|
|
||||||
qanon_title: str
|
|
||||||
state_etebar: str
|
|
||||||
|
|
||||||
class InputRule(BaseModel):
|
|
||||||
rule_id: str
|
|
||||||
rule_content: str
|
|
||||||
rule_type: str
|
|
||||||
section_id: str
|
|
||||||
section_content: str
|
|
||||||
|
|
||||||
class SemanticSearchP2P(BaseModel):
|
|
||||||
in_rule: InputRule
|
|
||||||
db_rule: DbRule
|
|
||||||
score: float = 0
|
|
||||||
metadata: Dict
|
|
||||||
|
|
||||||
class BaleStartMessageForm(BaseModel):
|
class BaleStartMessageForm(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
is_bot: bool = False
|
is_bot: bool = False
|
||||||
|
|
@ -254,14 +215,13 @@ class StateDetail(BaseModel):
|
||||||
class BaleUser(BaseModel):
|
class BaleUser(BaseModel):
|
||||||
uc_id: str
|
uc_id: str
|
||||||
chat_id: int
|
chat_id: int
|
||||||
user_id: int
|
user_id: str
|
||||||
update: BaleUpdate
|
update: BaleUpdate
|
||||||
username: str = None
|
username: str
|
||||||
is_bot: bool = False
|
is_bot: bool = False
|
||||||
is_vip: bool = False
|
|
||||||
first_name: str = ""
|
first_name: str = ""
|
||||||
last_name: str = ""
|
last_name: str = ""
|
||||||
message_limit:int = 0
|
|
||||||
rule_relation: RuleRelation | None = None
|
rule_relation: RuleRelation | None = None
|
||||||
subject_unities:Dict = {}
|
subject_unities:Dict = {}
|
||||||
|
|
||||||
|
|
@ -274,10 +234,9 @@ class BaleUser(BaseModel):
|
||||||
is_processing_lock : bool = False
|
is_processing_lock : bool = False
|
||||||
is_call_back_query : bool = False
|
is_call_back_query : bool = False
|
||||||
state_detail : StateDetail = None
|
state_detail : StateDetail = None
|
||||||
active_message_id : int = 0
|
last_message_id : int = 0
|
||||||
|
|
||||||
input_query: str = "" # ورودی کاربر
|
input_query: str = "" # ورودی کاربر
|
||||||
last_input_query: str = "" # ورودی کاربر
|
|
||||||
call_back_query: str = "" # ورودی کاربر
|
call_back_query: str = "" # ورودی کاربر
|
||||||
_query_type: str = "" # ورودی کاربر
|
_query_type: str = "" # ورودی کاربر
|
||||||
sub_state: str = "" # برای روندی ها
|
sub_state: str = "" # برای روندی ها
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ from router.bale.base_model import StateDetail
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BUSY_TEXT = "⏳ درخواست قبلی شما در حال پردازش هست، لطفا تا اتمام آن منتظر بمانید ⏳"
|
BUSY_TEXT = ("""⏳ درخواست قبلی شما در حال پردازش هست، لطفا تا اتمام آن منتظر بمانید ⏳""",)
|
||||||
|
|
||||||
|
|
||||||
class StateRegistry:
|
class StateRegistry:
|
||||||
|
|
@ -26,13 +26,6 @@ STATE = [
|
||||||
message="""متن حقوقی برای جستجو در قوانین را وارد نمایید""",
|
message="""متن حقوقی برای جستجو در قوانین را وارد نمایید""",
|
||||||
handler="handle_search_in_law",
|
handler="handle_search_in_law",
|
||||||
),
|
),
|
||||||
StateDetail(
|
|
||||||
state="search_in_law_rules",
|
|
||||||
button_text="جستجو در اجزاء 🔎",
|
|
||||||
end_buttons=[],
|
|
||||||
message="""متن حقوقی برای جستجو در اجزاء قانونی را وارد نمایید""",
|
|
||||||
handler="handle_search_in_law_rules",
|
|
||||||
),
|
|
||||||
StateDetail(
|
StateDetail(
|
||||||
state="chat_in_law",
|
state="chat_in_law",
|
||||||
button_text="گفتگو طبق قوانین کشور",
|
button_text="گفتگو طبق قوانین کشور",
|
||||||
|
|
@ -101,11 +94,6 @@ STATE = [
|
||||||
button_text="تماس با ما ☎️",
|
button_text="تماس با ما ☎️",
|
||||||
message="""لطفا برای ارتباط با ما از طریق مرکز فناوری مجلس شورای اسلامی ایران اقدام فرمایید""",
|
message="""لطفا برای ارتباط با ما از طریق مرکز فناوری مجلس شورای اسلامی ایران اقدام فرمایید""",
|
||||||
),
|
),
|
||||||
StateDetail(
|
|
||||||
state="not_yet",
|
|
||||||
button_text="در دست توسعه",
|
|
||||||
message="""این قسمت در دست توسعه قرار دارد.""",
|
|
||||||
),
|
|
||||||
StateDetail(
|
StateDetail(
|
||||||
state="about_us",
|
state="about_us",
|
||||||
button_text="درباره ما ⚡",
|
button_text="درباره ما ⚡",
|
||||||
|
|
@ -146,6 +134,7 @@ def build_buttons_form(button_form):
|
||||||
main_button_form = [
|
main_button_form = [
|
||||||
["chat_in_law"],
|
["chat_in_law"],
|
||||||
["search_in_law"],
|
["search_in_law"],
|
||||||
|
["logical_chat_in_law"],
|
||||||
["rule_making"],
|
["rule_making"],
|
||||||
["qanon_title_repeat"],
|
["qanon_title_repeat"],
|
||||||
["conflict_law_writing_policy"],
|
["conflict_law_writing_policy"],
|
||||||
|
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
[
|
|
||||||
"init_mahdi", "hsafabale", "mmpouya"
|
|
||||||
]
|
|
||||||
Loading…
Reference in New Issue
Block a user