Compare commits
No commits in common. "0975a65347731a12b1ce0f8fe029a7a2f199be16" and "55d0dbb62cfae84d54b5e5ab52a840d7d40d5ac9" have entirely different histories.
0975a65347
...
55d0dbb62c
|
|
@ -22,7 +22,6 @@ from langchain_openai import ChatOpenAI # pip install -U langchain_openai
|
|||
import requests
|
||||
# from FlagEmbedding import FlagReranker # deldar-reranker-v2
|
||||
import aiofiles
|
||||
import oss
|
||||
|
||||
# chatbot = FastAPI()
|
||||
# origins = ["*"]
|
||||
|
|
@ -54,7 +53,7 @@ messages = [
|
|||
{"role": "developer", "content": determine_refrence},
|
||||
]
|
||||
|
||||
models = ["gpt-4o-mini", "gemini-2.5-flash-lite", "deepseek-chat"]
|
||||
models = ["gpt-4o-mini","gemini-2.5-flash-lite", "deepseek-chat"]
|
||||
normalizer_obj = PersianVectorAnalyzer()
|
||||
pipe = None
|
||||
content_list, ids, prefix_list, faiss_index = [], [], [], []
|
||||
|
|
@ -149,76 +148,6 @@ def llm_base_request2(system_prompt, user_prompt):
|
|||
|
||||
return answer, cost
|
||||
|
||||
async def oss_base_request(sys_prompt, user_prompt):
|
||||
base_messages = []
|
||||
try:
|
||||
if sys_prompt:
|
||||
base_messages.append({
|
||||
"role": "system",
|
||||
"content": sys_prompt
|
||||
})
|
||||
|
||||
base_messages.append({
|
||||
"role": "user",
|
||||
"content": user_prompt
|
||||
})
|
||||
response = await oss.process_item(base_messages, reasoning_effort='low', temperature=0.1, max_tokens=40)
|
||||
|
||||
if response[0]:
|
||||
answer = response[1]
|
||||
else:
|
||||
answer = ''
|
||||
cost = 0
|
||||
|
||||
except Exception as error:
|
||||
# برای مدیریت خطاها، میتوانید فایلنویسی را به صورت async انجام دهید (در صورت نیاز)
|
||||
async with aiofiles.open('./llm-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file:
|
||||
error_message = f'\n\nquery: {user_prompt.strip()}\nerror:{error} \n------------------------------\n'
|
||||
await file.write(error_message) # فایلنویسی async
|
||||
|
||||
return '', 0
|
||||
|
||||
return answer, cost
|
||||
|
||||
async def oss_request(query):
|
||||
|
||||
if query == '':
|
||||
return 'لطفا متن سوال را وارد نمائید', 0
|
||||
|
||||
try:
|
||||
messages.append({"role": "user", "content": query})
|
||||
print(f'final prompt request attmpt')
|
||||
response = await oss.process_item(messages= messages) # reasoning_effort='high'
|
||||
print(response)
|
||||
if response[0]:
|
||||
answer = response[1]
|
||||
else:
|
||||
answer = 'متاسفانه پاسخی دریافت نشد'
|
||||
cost_prompt = 0
|
||||
# پاسخ را هم به سابقه اضافه میکنیم
|
||||
messages.append({"role": "assistant", "content": answer})
|
||||
|
||||
response_dict = {}
|
||||
response_dict['output'] = str(response)
|
||||
async with aiofiles. open('./llm-answer/messages.json', mode='w', encoding='utf-8') as output:
|
||||
await output.write(json.dumps(response_dict, ensure_ascii=False, indent=2))
|
||||
print('response created')
|
||||
async with aiofiles.open('./llm-answer/chat-objs.txt', mode='a+', encoding='utf-8') as file:
|
||||
response_value = '0'
|
||||
await file.write(response_value) # estimated_cost
|
||||
|
||||
except Exception as error:
|
||||
print(f'error-in-llm.txt writing ...')
|
||||
async with aiofiles.open('./llm-answer/error-in-llm.txt', mode='a+', encoding='utf-8') as file:
|
||||
error_message = f'\n\nquery: {query.strip()}\nerror:{error} \n-------------------------------\n'
|
||||
await file.write(error_message)
|
||||
|
||||
return 'با عرض پوزش؛ متاسفانه خطایی رخ داده است. لطفا لحظاتی دیگر دوباره تلاش نمائید', 0
|
||||
print('================')
|
||||
print(f'len messages: {len(messages)}')
|
||||
print('================')
|
||||
return answer, cost_prompt
|
||||
|
||||
async def llm_request(query, model):
|
||||
|
||||
if query == '':
|
||||
|
|
@ -560,7 +489,7 @@ async def get_title_user_prompt(query: str):
|
|||
"""
|
||||
get a query and prepare a prompt to generate title based on that
|
||||
"""
|
||||
title_prompt = f'برای متن {query} یک عنوان با معنا که بین 3 تا 6 کلمه داشته باشد، در قالب یک رشته متن ایجاد کن. سبک و لحن عنوان، حقوقی و کاملا رسمی باشد. عنوان تولید شده کاملا ساده و بدون هیچ مارک داون یا علائم افزوده ای باشد. غیر از عنوان، به هیچ وجه توضیح اضافه ای در قبل یا بعد آن اضافه نکن.'
|
||||
title_prompt = f'برای متن {query} زیر، یک موضوع با معنا که بین 3 تا 6 کلمه داشته باشد، در قالب یک رشته متن ایجاد کن. سبک ادبی عنوان، حقوقی و کاملا رسمی باشد. عنوان تولید شده کاملا ساده و بدون هیچ مارک داون یا علائم افزوده ای باشد. غیر از عنوان، به هیچ وجه توضیح اضافه ای در قبل یا بعد آن اضافه نکن.'
|
||||
return title_prompt
|
||||
|
||||
async def get_title_system_prompt():
|
||||
|
|
@ -588,11 +517,7 @@ async def run_chatbot(query:str, chat_id:str):
|
|||
before_title_time = datetime.datetime.now()
|
||||
title_system_prompt = await get_title_system_prompt()
|
||||
title_user_prompt = await get_title_user_prompt(query)
|
||||
# title, cost_title = await llm_base_request(title_system_prompt, title_user_prompt)
|
||||
title, cost_title = await oss_base_request(title_system_prompt, title_user_prompt)
|
||||
if not title:
|
||||
title = query
|
||||
|
||||
title, cost_title = await llm_base_request(title_system_prompt, title_user_prompt)
|
||||
title_prompt_duration = (datetime.datetime.now() - before_title_time).total_seconds()
|
||||
|
||||
if title == '':
|
||||
|
|
@ -608,26 +533,26 @@ async def run_chatbot(query:str, chat_id:str):
|
|||
|
||||
prompt = f'برای پرسش "{query}" از میان مواد قانونی "{result_passages_text}" .پاسخ مناسب و دقیق را استخراج کن. درصورتی که مطلبی مرتبط با پرسش در متن پیدا نشد، فقط پاسخ بده: "متاسفانه در منابع، پاسخی پیدا نشد!"'
|
||||
|
||||
# for model in models:
|
||||
for model in models:
|
||||
# before_prompt_credit = credit_refresh()
|
||||
try:
|
||||
# llm_model = model
|
||||
# print(f'using model: {llm_model}')
|
||||
# llm_answer, cost_prompt = await llm_request(prompt, model)
|
||||
llm_answer, cost_prompt = await oss_request(prompt)
|
||||
|
||||
except Exception as error:
|
||||
# after_prompt_credit = credit_refresh()
|
||||
# prompt_cost = int(before_prompt_credit) - int(after_prompt_credit)
|
||||
error = f'model: gpt.oss.120b \n{error}\n\n'
|
||||
print('+++++++++++++++++')
|
||||
print(f'llm-error.txt writing error: {error}')
|
||||
print('+++++++++++++++++')
|
||||
async with aiofiles.open('./llm-answer/llm-error.txt', mode='a+', encoding='utf-8') as file:
|
||||
await file.write(error)
|
||||
prompt_status = False
|
||||
status_text = 'با عرض پوزش، سرویس موقتا در دسترس نیست. لطفا دقایقی دیگر دوباره تلاش نمائید!'
|
||||
|
||||
try:
|
||||
llm_model = model
|
||||
print(f'using model: {llm_model}')
|
||||
llm_answer, cost_prompt = await llm_request(prompt, model)
|
||||
|
||||
break
|
||||
except Exception as error:
|
||||
# after_prompt_credit = credit_refresh()
|
||||
# prompt_cost = int(before_prompt_credit) - int(after_prompt_credit)
|
||||
error = f'model: {model} \n{error}\n\n'
|
||||
print('+++++++++++++++++')
|
||||
print(f'llm-error.txt writing error: {error}')
|
||||
print('+++++++++++++++++')
|
||||
async with aiofiles.open('./llm-answer/llm-error.txt', mode='a+', encoding='utf-8') as file:
|
||||
await file.write(error)
|
||||
prompt_status = False
|
||||
status_text = 'با عرض پوزش، سرویس موقتا در دسترس نیست. لطفا دقایقی دیگر دوباره تلاش نمائید!'
|
||||
|
||||
# حالتی که وضعیت پرامپت، نامعتبر باشد، یک شی با مقادیر زیر برگردانده می شود
|
||||
else:
|
||||
chat_obj = {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
FROM docker.tavasi.ir/tavasi/qachat_base:1.0.0
|
||||
RUN pip install uvicorn[standard]
|
||||
RUN pip install FlagEmbedding
|
||||
RUN pip install aiofiles
|
||||
RUN pip install openai
|
||||
RUN pip install aiofiles
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -539,40 +539,3 @@ Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessa
|
|||
31.68
|
||||
-------------------------------
|
||||
|
||||
45.93
|
||||
-------------------------------
|
||||
|
||||
74.96
|
||||
-------------------------------
|
||||
|
||||
100.02
|
||||
-------------------------------
|
||||
|
||||
129.08
|
||||
-------------------------------
|
||||
|
||||
123.44
|
||||
-------------------------------
|
||||
|
||||
236.05
|
||||
-------------------------------
|
||||
|
||||
259.93
|
||||
-------------------------------
|
||||
|
||||
291.77
|
||||
-------------------------------
|
||||
|
||||
319.9
|
||||
-------------------------------
|
||||
|
||||
348.04
|
||||
-------------------------------
|
||||
|
||||
383.19
|
||||
-------------------------------
|
||||
|
||||
235.45
|
||||
-------------------------------
|
||||
|
||||
0000000000000000000000000000
|
||||
|
|
@ -372,46 +372,3 @@
|
|||
44238.73
|
||||
44238.73
|
||||
44238.73
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
44201.47
|
||||
43951.09
|
||||
42258.63
|
||||
42258.63
|
||||
42258.63
|
||||
42258.63
|
||||
42258.63
|
||||
42258.63
|
||||
42258.63
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
63
oss.py
63
oss.py
|
|
@ -1,63 +0,0 @@
|
|||
from openai import AsyncOpenAI
|
||||
|
||||
LLM_URL = "http://172.16.29.102:8001/v1/"
|
||||
|
||||
# item structure:
|
||||
# item = {
|
||||
# 'id' : '',
|
||||
# 'system_prompt' : '',
|
||||
# 'user_prompt' : '',
|
||||
# 'assistant_prompt' : '',
|
||||
# }
|
||||
|
||||
async def process_item(messages, reasoning_effort= 'medium', temperature= 0.4, top_p= 0.9, max_tokens= 2048):
|
||||
"""
|
||||
generates answer with gpt-oss-120b model
|
||||
|
||||
**Args:
|
||||
reasoning_effort = 'medium' # -> low / high / medium
|
||||
temperature = 0.4 # 0-1 -> creativity
|
||||
top_p = 0.9 # 0-1 -> logic
|
||||
max_tokens = 2048 # -> ... 128K
|
||||
** Returns(tuple):
|
||||
returns True, generated answer / False, failed message
|
||||
"""
|
||||
try:
|
||||
async with AsyncOpenAI(base_url= LLM_URL, api_key="EMPTY") as client:
|
||||
|
||||
model_name = 'gpt-oss-120b'
|
||||
|
||||
# messages = [
|
||||
# {"role": "system", "content": prompt_params.get("system_prompt", "")},
|
||||
# {"role": "user", "content": prompt_params.get("user_prompt", "")},
|
||||
# ]
|
||||
# if prompt_params.get("assistant_prompt"):
|
||||
# messages.append(
|
||||
# {"role": "assistant", "content": prompt_params["assistant_prompt"]}
|
||||
# )
|
||||
# print(f'==== max_token {max_token}')
|
||||
|
||||
response = await client.chat.completions.parse(
|
||||
model= model_name,
|
||||
messages= messages,
|
||||
temperature= temperature, # 0-1
|
||||
top_p=top_p, # 0-1
|
||||
reasoning_effort= reasoning_effort, # low , high , medium
|
||||
# max_tokens= max_tokens, # ... 128K
|
||||
stop= None,
|
||||
)
|
||||
|
||||
# print('666666666666666666666666666666666')
|
||||
# print(f"response.choices[0].message.parsed: {response.choices[0].message.parsed}")
|
||||
# print('666666666666666666666666666666666')
|
||||
|
||||
if response and response.choices : # and response.choices[0].message.parsed:
|
||||
response_message = response.choices[0].message.content
|
||||
return True, response_message
|
||||
|
||||
except Exception as e:
|
||||
response_message = 'error in llm response generation!'
|
||||
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
|
||||
print(e)
|
||||
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
|
||||
return False, response_message
|
||||
Loading…
Reference in New Issue
Block a user