64 lines
2.3 KiB
Python
Executable File
64 lines
2.3 KiB
Python
Executable File
from openai import AsyncOpenAI
|
|
|
|
LLM_URL = "http://172.16.29.102:8001/v1/"
|
|
|
|
# item structure:
|
|
# item = {
|
|
# 'id' : '',
|
|
# 'system_prompt' : '',
|
|
# 'user_prompt' : '',
|
|
# 'assistant_prompt' : '',
|
|
# }
|
|
|
|
|
|
async def process_item(messages, reasoning_effort= 'medium', temperature= 0.4, top_p= 0.9, max_tokens= 2048):
|
|
"""
|
|
generates answer with gpt-oss-120b model
|
|
|
|
**Args:
|
|
reasoning_effort = 'medium' # -> low / high / medium
|
|
temperature = 0.4 # 0-1 -> creativity
|
|
top_p = 0.9 # 0-1 -> logic
|
|
max_tokens = 2048 # -> ... 128K
|
|
** Returns(tuple):
|
|
returns True, generated answer / False, failed message
|
|
"""
|
|
try:
|
|
async with AsyncOpenAI(base_url= LLM_URL, api_key="EMPTY") as client:
|
|
|
|
model_name = 'gpt-oss-120b'
|
|
|
|
# messages = [
|
|
# {"role": "system", "content": prompt_params.get("system_prompt", "")},
|
|
# {"role": "user", "content": prompt_params.get("user_prompt", "")},
|
|
# ]
|
|
# if prompt_params.get("assistant_prompt"):
|
|
# messages.append(
|
|
# {"role": "assistant", "content": prompt_params["assistant_prompt"]}
|
|
# )
|
|
# print(f'==== max_token {max_token}')
|
|
|
|
response = await client.chat.completions.parse(
|
|
model= model_name,
|
|
messages= messages,
|
|
temperature= temperature, # 0-1
|
|
top_p=top_p, # 0-1
|
|
reasoning_effort= reasoning_effort, # low , high , medium
|
|
# max_tokens= max_tokens, # ... 128K
|
|
stop= None,
|
|
)
|
|
|
|
# print('666666666666666666666666666666666')
|
|
# print(f"response.choices[0].message.parsed: {response.choices[0].message.parsed}")
|
|
# print('666666666666666666666666666666666')
|
|
|
|
if response and response.choices : # and response.choices[0].message.parsed:
|
|
response_message = response.choices[0].message.content
|
|
return True, response_message
|
|
|
|
except Exception as e:
|
|
response_message = 'error in llm response generation!'
|
|
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
|
|
print(e)
|
|
print('!!!!!!!!!!!!!!!!!!!!!!!!!')
|
|
return False, response_message |