294 lines
8.7 KiB
Python
294 lines
8.7 KiB
Python
from fastapi import FastAPI
|
|
from pydantic import BaseModel
|
|
from typing import Optional
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
import asyncio
|
|
import requests
|
|
import time
|
|
from base_model import (
|
|
word_class_dict,
|
|
tags_dict,
|
|
lang_dict,
|
|
scope_dict,
|
|
WordCorrector,
|
|
IDRequest,
|
|
)
|
|
from fastapi import Request
|
|
from fastapi import Query, Request
|
|
import orjson
|
|
from multiprocessing import Pool
|
|
import orjson
|
|
import requests
|
|
import asyncio
|
|
import time
|
|
|
|
# ساخت برنامه
|
|
app = FastAPI()
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # یا لیست دامنههایی که میخوای اجازه داشته باشن
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
def sample_data_with_id(n_out):
|
|
result = []
|
|
for i in range(n_out):
|
|
sample_data = {
|
|
"id": "11111111111",
|
|
"word": "تست",
|
|
"is_correct": True,
|
|
"nearest_correct_word": "اصلاح شده",
|
|
"origin": "اصلاحی",
|
|
"stem": "اصلاح",
|
|
"is_proper_noun": False,
|
|
"word_class": "نادر",
|
|
"tags": ["عامیانه", "محاورهای"],
|
|
"ner_description": "شسیبشسیبیسشبش",
|
|
"llm_description": "شسیبشسیبسشبش",
|
|
"user_description": "شیسبشسبیشسیب",
|
|
"admin_description": "شسیبشسب",
|
|
"confidence": 0.000005,
|
|
"lang": "فارسی",
|
|
"scope": "سایر",
|
|
"edited_at": i + 100,
|
|
}
|
|
result.append(sample_data)
|
|
|
|
return result
|
|
|
|
|
|
# -------------------------
|
|
# GET endpoints
|
|
# -------------------------
|
|
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
return {"message": "API is running successfully!"}
|
|
|
|
|
|
@app.get("/api/words")
|
|
async def get_words(request: Request, page_size: int = Query(100)):
|
|
print("Headers:", request.headers) # هدرهای درخواست
|
|
print("Query Params:", request.query_params)
|
|
|
|
# یک لیست استاتیک از کلمات
|
|
result = sample_data_with_id(page_size)
|
|
print(f"Returning {len(result)} words")
|
|
return {"result": result}
|
|
|
|
|
|
@app.get("/api/word/{word_id}")
|
|
async def get_words(request: Request, page_size: int = Query(100)):
|
|
print("Headers:", request.headers) # هدرهای درخواست
|
|
print("Query Params:", request.query_params)
|
|
|
|
# یک لیست استاتیک از کلمات
|
|
result = sample_data_with_id(page_size)
|
|
print(f"Returning {len(result)} words")
|
|
return {"result": result}
|
|
|
|
|
|
# -------------------------
|
|
# POST endpoints
|
|
# -------------------------
|
|
|
|
|
|
@app.post("/api/search")
|
|
async def search_word(data: IDRequest) -> WordCorrector:
|
|
# (request: Request):
|
|
# body = await request.json()
|
|
# print("Request Body:", body) # نمایش JSON ورودی
|
|
# return {"status": "ok", "received": body}
|
|
|
|
return sample_data
|
|
|
|
|
|
@app.post("/api/update")
|
|
async def update_word(data: IDRequest) -> WordCorrector:
|
|
|
|
return {
|
|
"status": "updated",
|
|
"updated_id": data.id,
|
|
"name": data.name or "no name provided",
|
|
"message": "Word updated successfully!",
|
|
}
|
|
|
|
|
|
async def get_from_es():
|
|
pass
|
|
|
|
|
|
async def add_from_es():
|
|
pass
|
|
|
|
|
|
async def update_from_es():
|
|
pass
|
|
|
|
|
|
|
|
async def insert_from_json_file(data, es_name="aiword"):
|
|
# باید با الاستیک هلپر نوشته شود
|
|
output_format = {
|
|
"title": "",
|
|
"is_correct": False,
|
|
"nearest_correct_word": "",
|
|
"stem": "",
|
|
"origin": "",
|
|
"word_classes": [],
|
|
"word_tags": [],
|
|
"is_proper_noun": False,
|
|
"ner_description": "",
|
|
"llm_description": "",
|
|
"user_description": "",
|
|
"admin_description": "",
|
|
"confidence": 0.0,
|
|
"language_key": "persian",
|
|
"domain_tags": [],
|
|
}
|
|
input_format = {
|
|
"word": "آب",
|
|
"result": {
|
|
"word": "آب",
|
|
"is_correct": False,
|
|
"origin": "آب",
|
|
"word_class": "noun",
|
|
"proper_noun": False,
|
|
"tags": [],
|
|
"description": "A common noun meaning 'water' in Persian.",
|
|
"confidence": 1.0,
|
|
},
|
|
}
|
|
|
|
|
|
# URL مقصد
|
|
url = f"http://localhost:8010/v1/indices/{es_name}/insert"
|
|
|
|
# هدر برای ارسال JSON
|
|
headers = {"Content-Type": "application/json"}
|
|
|
|
result = []
|
|
for item in data:
|
|
if item['result'] != "model-failed" and isinstance(item["result"], dict):
|
|
|
|
# result.append(
|
|
if item["result"]["is_correct"] is True:
|
|
is_correct = 'صحیح'
|
|
elif item["result"]["is_correct"] is False:
|
|
is_correct = 'غلط'
|
|
else:
|
|
is_correct = 'نامشخص'
|
|
|
|
if item["result"]["proper_noun"] is True:
|
|
is_proper_noun = 'خاص'
|
|
if item["result"]["proper_noun"] is False:
|
|
is_proper_noun = 'عام'
|
|
else:
|
|
is_proper_noun = 'نامشخص'
|
|
|
|
payload = {
|
|
"document": {
|
|
"title": item["word"],
|
|
"is_correct": is_correct,
|
|
"nearest_correct_word": item["result"]["word"],
|
|
"stem": "",
|
|
"origin": item["result"]["origin"],
|
|
"word_classes": [item["result"]["word_class"]],
|
|
"word_tags": item["result"]["tags"],
|
|
"is_proper_noun": is_proper_noun,
|
|
"ner_description": item["result"]["description"],
|
|
"llm_description": "",
|
|
"user_description": "",
|
|
"admin_description": "",
|
|
"time_create":int(time.time()),
|
|
"confidence": item["result"]["confidence"] if "confidence" in item["result"] else 0.0,
|
|
"language_key": "persian",
|
|
"domain_tags": [],
|
|
"ref_key":"llm_msl_v1",
|
|
}
|
|
}
|
|
|
|
# break
|
|
else:
|
|
print(f'Unable to process item: {item}')
|
|
payload = {
|
|
"document": {
|
|
"title": item["word"],
|
|
"is_correct": 'نامشخص',
|
|
"nearest_correct_word": '',
|
|
"stem": "",
|
|
"origin": '',
|
|
"word_classes": [],
|
|
"word_tags": [],
|
|
"is_proper_noun": 'نامشخص',
|
|
"ner_description": '',
|
|
"llm_description": "",
|
|
"user_description": "",
|
|
"admin_description": "",
|
|
"time_create":int(time.time()),
|
|
"confidence": 0.0,
|
|
"language_key": "persian",
|
|
"domain_tags": [],
|
|
"ref_key":"llm_msl_v1",
|
|
}
|
|
}
|
|
|
|
try:
|
|
print(f'payload {payload}')
|
|
response = requests.post(url, json=payload, headers=headers)
|
|
print(f"وضعیت ارسال برای '{item['word']}': {response.status_code}")
|
|
if response.status_code not in (200, 201):
|
|
print("خطا در بدنه پاسخ:", response.text)
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"خطا در ارسال درخواست برای '{item['word']}': {e}")
|
|
|
|
def wrapper(item_index_pair):
|
|
i, item = item_index_pair
|
|
print(f"--- {i}/ item -> {item}")
|
|
asyncio.run(insert_from_json_file([item]))
|
|
|
|
if __name__ == "__main__":
|
|
json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
|
|
with open(json_file, "rb") as f:
|
|
data = orjson.loads(f.read())
|
|
|
|
# data = data[8881:]
|
|
for i, j in enumerate(data):
|
|
if j['word'] == 'آوانویسیهایش':
|
|
print(f'found it ->{i} ->{j}')
|
|
data = data[i+1:]
|
|
|
|
|
|
total = len(data)
|
|
print(
|
|
f'data-> {data[0]}',
|
|
f'total {total}',
|
|
sep='\n'
|
|
)
|
|
|
|
# برای هر آیتم، ایندکس رو هم ارسال میکنیم
|
|
indexed_data = list(enumerate(data, start=4393))
|
|
|
|
with Pool(6) as p: # 6 هسته CPU
|
|
p.map(wrapper, indexed_data)
|
|
################################################
|
|
|
|
# json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
|
|
# with open(json_file, "rb") as f:
|
|
# data = orjson.loads(f.read())
|
|
|
|
# for i, j in enumerate(data, start=1):
|
|
# if i >= 4393:
|
|
# print(f"--- {i}/ {len(data)} --- j-> {j}")
|
|
# # break
|
|
# asyncio.run(insert_from_json_file([j]))
|
|
|
|
|
|
# import uvicorn
|
|
# uvicorn.run("main:app", host="0.0.0.0", port=8020, reload=True)
|