wordbank/main.py
2025-12-03 15:22:54 +03:30

294 lines
8.7 KiB
Python

from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
from fastapi.middleware.cors import CORSMiddleware
import asyncio
import requests
import time
from base_model import (
word_class_dict,
tags_dict,
lang_dict,
scope_dict,
WordCorrector,
IDRequest,
)
from fastapi import Request
from fastapi import Query, Request
import orjson
from multiprocessing import Pool
import orjson
import requests
import asyncio
import time
# ساخت برنامه
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # یا لیست دامنه‌هایی که می‌خوای اجازه داشته باشن
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def sample_data_with_id(n_out):
result = []
for i in range(n_out):
sample_data = {
"id": "11111111111",
"word": "تست",
"is_correct": True,
"nearest_correct_word": "اصلاح شده",
"origin": "اصلاحی",
"stem": "اصلاح",
"is_proper_noun": False,
"word_class": "نادر",
"tags": ["عامیانه", "محاوره‌ای"],
"ner_description": "شسیبشسیبیسشبش",
"llm_description": "شسیبشسیبسشبش",
"user_description": "شیسبشسبیشسیب",
"admin_description": "شسیبشسب",
"confidence": 0.000005,
"lang": "فارسی",
"scope": "سایر",
"edited_at": i + 100,
}
result.append(sample_data)
return result
# -------------------------
# GET endpoints
# -------------------------
@app.get("/")
async def root():
return {"message": "API is running successfully!"}
@app.get("/api/words")
async def get_words(request: Request, page_size: int = Query(100)):
print("Headers:", request.headers) # هدرهای درخواست
print("Query Params:", request.query_params)
# یک لیست استاتیک از کلمات
result = sample_data_with_id(page_size)
print(f"Returning {len(result)} words")
return {"result": result}
@app.get("/api/word/{word_id}")
async def get_words(request: Request, page_size: int = Query(100)):
print("Headers:", request.headers) # هدرهای درخواست
print("Query Params:", request.query_params)
# یک لیست استاتیک از کلمات
result = sample_data_with_id(page_size)
print(f"Returning {len(result)} words")
return {"result": result}
# -------------------------
# POST endpoints
# -------------------------
@app.post("/api/search")
async def search_word(data: IDRequest) -> WordCorrector:
# (request: Request):
# body = await request.json()
# print("Request Body:", body) # نمایش JSON ورودی
# return {"status": "ok", "received": body}
return sample_data
@app.post("/api/update")
async def update_word(data: IDRequest) -> WordCorrector:
return {
"status": "updated",
"updated_id": data.id,
"name": data.name or "no name provided",
"message": "Word updated successfully!",
}
async def get_from_es():
pass
async def add_from_es():
pass
async def update_from_es():
pass
async def insert_from_json_file(data, es_name="aiword"):
# باید با الاستیک هلپر نوشته شود
output_format = {
"title": "",
"is_correct": False,
"nearest_correct_word": "",
"stem": "",
"origin": "",
"word_classes": [],
"word_tags": [],
"is_proper_noun": False,
"ner_description": "",
"llm_description": "",
"user_description": "",
"admin_description": "",
"confidence": 0.0,
"language_key": "persian",
"domain_tags": [],
}
input_format = {
"word": "آب",
"result": {
"word": "آب",
"is_correct": False,
"origin": "آب",
"word_class": "noun",
"proper_noun": False,
"tags": [],
"description": "A common noun meaning 'water' in Persian.",
"confidence": 1.0,
},
}
# URL مقصد
url = f"http://localhost:8010/v1/indices/{es_name}/insert"
# هدر برای ارسال JSON
headers = {"Content-Type": "application/json"}
result = []
for item in data:
if item['result'] != "model-failed" and isinstance(item["result"], dict):
# result.append(
if item["result"]["is_correct"] is True:
is_correct = 'صحیح'
elif item["result"]["is_correct"] is False:
is_correct = 'غلط'
else:
is_correct = 'نامشخص'
if item["result"]["proper_noun"] is True:
is_proper_noun = 'خاص'
if item["result"]["proper_noun"] is False:
is_proper_noun = 'عام'
else:
is_proper_noun = 'نامشخص'
payload = {
"document": {
"title": item["word"],
"is_correct": is_correct,
"nearest_correct_word": item["result"]["word"],
"stem": "",
"origin": item["result"]["origin"],
"word_classes": [item["result"]["word_class"]],
"word_tags": item["result"]["tags"],
"is_proper_noun": is_proper_noun,
"ner_description": item["result"]["description"],
"llm_description": "",
"user_description": "",
"admin_description": "",
"time_create":int(time.time()),
"confidence": item["result"]["confidence"] if "confidence" in item["result"] else 0.0,
"language_key": "persian",
"domain_tags": [],
"ref_key":"llm_msl_v1",
}
}
# break
else:
print(f'Unable to process item: {item}')
payload = {
"document": {
"title": item["word"],
"is_correct": 'نامشخص',
"nearest_correct_word": '',
"stem": "",
"origin": '',
"word_classes": [],
"word_tags": [],
"is_proper_noun": 'نامشخص',
"ner_description": '',
"llm_description": "",
"user_description": "",
"admin_description": "",
"time_create":int(time.time()),
"confidence": 0.0,
"language_key": "persian",
"domain_tags": [],
"ref_key":"llm_msl_v1",
}
}
try:
print(f'payload {payload}')
response = requests.post(url, json=payload, headers=headers)
print(f"وضعیت ارسال برای '{item['word']}': {response.status_code}")
if response.status_code not in (200, 201):
print("خطا در بدنه پاسخ:", response.text)
except requests.exceptions.RequestException as e:
print(f"خطا در ارسال درخواست برای '{item['word']}': {e}")
def wrapper(item_index_pair):
i, item = item_index_pair
print(f"--- {i}/ item -> {item}")
asyncio.run(insert_from_json_file([item]))
if __name__ == "__main__":
json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
with open(json_file, "rb") as f:
data = orjson.loads(f.read())
# data = data[8881:]
for i, j in enumerate(data):
if j['word'] == 'آوانویسیهایش':
print(f'found it ->{i} ->{j}')
data = data[i+1:]
total = len(data)
print(
f'data-> {data[0]}',
f'total {total}',
sep='\n'
)
# برای هر آیتم، ایندکس رو هم ارسال می‌کنیم
indexed_data = list(enumerate(data, start=4393))
with Pool(6) as p: # 6 هسته CPU
p.map(wrapper, indexed_data)
################################################
# json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
# with open(json_file, "rb") as f:
# data = orjson.loads(f.read())
# for i, j in enumerate(data, start=1):
# if i >= 4393:
# print(f"--- {i}/ {len(data)} --- j-> {j}")
# # break
# asyncio.run(insert_from_json_file([j]))
# import uvicorn
# uvicorn.run("main:app", host="0.0.0.0", port=8020, reload=True)