rag_qavanin_api/_old/oss.py
2025-11-19 11:50:41 +00:00

64 lines
2.3 KiB
Python
Executable File

from openai import AsyncOpenAI
LLM_URL = "http://172.16.29.102:8001/v1/"
# item structure:
# item = {
# 'id' : '',
# 'system_prompt' : '',
# 'user_prompt' : '',
# 'assistant_prompt' : '',
# }
async def process_item(messages, reasoning_effort= 'medium', temperature= 0.4, top_p= 0.9, max_tokens= 2048):
"""
generates answer with gpt-oss-120b model
**Args:
reasoning_effort = 'medium' # -> low / high / medium
temperature = 0.4 # 0-1 -> creativity
top_p = 0.9 # 0-1 -> logic
max_tokens = 2048 # -> ... 128K
** Returns(tuple):
returns True, generated answer / False, failed message
"""
try:
async with AsyncOpenAI(base_url= LLM_URL, api_key="EMPTY") as client:
model_name = 'gpt-oss-120b'
# messages = [
# {"role": "system", "content": prompt_params.get("system_prompt", "")},
# {"role": "user", "content": prompt_params.get("user_prompt", "")},
# ]
# if prompt_params.get("assistant_prompt"):
# messages.append(
# {"role": "assistant", "content": prompt_params["assistant_prompt"]}
# )
# print(f'==== max_token {max_token}')
response = await client.chat.completions.parse(
model= model_name,
messages= messages,
temperature= temperature, # 0-1
top_p=top_p, # 0-1
reasoning_effort= reasoning_effort, # low , high , medium
# max_tokens= max_tokens, # ... 128K
stop= None,
)
# print('666666666666666666666666666666666')
# print(f"response.choices[0].message.parsed: {response.choices[0].message.parsed}")
# print('666666666666666666666666666666666')
if response and response.choices : # and response.choices[0].message.parsed:
response_message = response.choices[0].message.content
return True, response_message
except Exception as e:
response_message = 'error in llm response generation!'
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
print(e)
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
return False, response_message