from openai import AsyncOpenAI LLM_URL = "http://172.16.29.102:8001/v1/" # item structure: # item = { # 'id' : '', # 'system_prompt' : '', # 'user_prompt' : '', # 'assistant_prompt' : '', # } async def process_item(messages, reasoning_effort= 'medium', temperature= 0.4, top_p= 0.9, max_tokens= 2048): """ generates answer with gpt-oss-120b model **Args: reasoning_effort = 'medium' # -> low / high / medium temperature = 0.4 # 0-1 -> creativity top_p = 0.9 # 0-1 -> logic max_tokens = 2048 # -> ... 128K ** Returns(tuple): returns True, generated answer / False, failed message """ try: async with AsyncOpenAI(base_url= LLM_URL, api_key="EMPTY") as client: model_name = 'gpt-oss-120b' # messages = [ # {"role": "system", "content": prompt_params.get("system_prompt", "")}, # {"role": "user", "content": prompt_params.get("user_prompt", "")}, # ] # if prompt_params.get("assistant_prompt"): # messages.append( # {"role": "assistant", "content": prompt_params["assistant_prompt"]} # ) # print(f'==== max_token {max_token}') response = await client.chat.completions.parse( model= model_name, messages= messages, temperature= temperature, # 0-1 top_p=top_p, # 0-1 reasoning_effort= reasoning_effort, # low , high , medium # max_tokens= max_tokens, # ... 128K stop= None, ) # print('666666666666666666666666666666666') # print(f"response.choices[0].message.parsed: {response.choices[0].message.parsed}") # print('666666666666666666666666666666666') if response and response.choices : # and response.choices[0].message.parsed: response_message = response.choices[0].message.content return True, response_message except Exception as e: response_message = 'error in llm response generation!' print('!!!!!!!!!!!!!!!!!!!!!!!!!') print(e) print('!!!!!!!!!!!!!!!!!!!!!!!!!') return False, response_message