wordbank/main.py

from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
from fastapi.middleware.cors import CORSMiddleware
import asyncio
import requests
import time
from base_model import (
    word_class_dict,
    tags_dict,
    lang_dict,
    scope_dict,
    WordCorrector,
    IDRequest,
)
from fastapi import Request
from fastapi import Query, Request
import orjson
from multiprocessing import Pool
import orjson
import requests
import asyncio
import time

# ساخت برنامه
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # یا لیست دامنه‌هایی که می‌خوای اجازه داشته باشن
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


def sample_data_with_id(n_out):
    result = []
    for i in range(n_out):
        sample_data = {
            "id": "11111111111",
            "word": "تست",
            "is_correct": True,
            "nearest_correct_word": "اصلاح شده",
            "origin": "اصلاحی",
            "stem": "اصلاح",
            "is_proper_noun": False,
            "word_class": "نادر",
            "tags": ["عامیانه", "محاوره‌ای"],
            "ner_description": "شسیبشسیبیسشبش",
            "llm_description": "شسیبشسیبسشبش",
            "user_description": "شیسبشسبیشسیب",
            "admin_description": "شسیبشسب",
            "confidence": 0.000005,
            "lang": "فارسی",
            "scope": "سایر",
            "edited_at": i + 100,
        }
        result.append(sample_data)

    return result


# -------------------------
# GET endpoints
# -------------------------


@app.get("/")
async def root():
    return {"message": "API is running successfully!"}


@app.get("/api/words")
async def get_words(request: Request, page_size: int = Query(100)):
    print("Headers:", request.headers)  # هدرهای درخواست
    print("Query Params:", request.query_params)

    # یک لیست استاتیک از کلمات
    result = sample_data_with_id(page_size)
    print(f"Returning {len(result)} words")
    return {"result": result}


@app.get("/api/word/{word_id}")
async def get_words(request: Request, page_size: int = Query(100)):
    print("Headers:", request.headers)  # هدرهای درخواست
    print("Query Params:", request.query_params)

    # یک لیست استاتیک از کلمات
    result = sample_data_with_id(page_size)
    print(f"Returning {len(result)} words")
    return {"result": result}


# -------------------------
# POST endpoints
# -------------------------


@app.post("/api/search")
async def search_word(data: IDRequest) -> WordCorrector:
    # (request: Request):
    # body = await request.json()
    # print("Request Body:", body)  # نمایش JSON ورودی
    # return {"status": "ok", "received": body}

    return sample_data


@app.post("/api/update")
async def update_word(data: IDRequest) -> WordCorrector:

    return {
        "status": "updated",
        "updated_id": data.id,
        "name": data.name or "no name provided",
        "message": "Word updated successfully!",
    }


async def get_from_es():
    pass


async def add_from_es():
    pass


async def update_from_es():
    pass


async def insert_from_json_file(data, es_name="aiword"):
    # باید با الاستیک هلپر نوشته شود
    output_format = {
        "title": "",
        "is_correct": False,
        "nearest_correct_word": "",
        "stem": "",
        "origin": "",
        "word_classes": [],
        "word_tags": [],
        "is_proper_noun": False,
        "ner_description": "",
        "llm_description": "",
        "user_description": "",
        "admin_description": "",
        "confidence": 0.0,
        "language_key": "persian",
        "domain_tags": [],
    }
    input_format = {
        "word": "آب",
        "result": {
            "word": "آب",
            "is_correct": False,
            "origin": "آب",
            "word_class": "noun",
            "proper_noun": False,
            "tags": [],
            "description": "A common noun meaning 'water' in Persian.",
            "confidence": 1.0,
        },
    }


    # URL مقصد
    url = f"http://localhost:8010/v1/indices/{es_name}/insert"

    # هدر برای ارسال JSON
    headers = {"Content-Type": "application/json"}

    result = []
    for item in data:
        if item['result'] != "model-failed" and isinstance(item["result"], dict):

            # result.append(
            if item["result"]["is_correct"] is True:
                is_correct = 'صحیح'
            elif item["result"]["is_correct"] is False:
                is_correct = 'غلط'
            else:
                is_correct = 'نامشخص'

            if item["result"]["proper_noun"] is True:
                is_proper_noun = 'خاص'
            if item["result"]["proper_noun"] is False:
                is_proper_noun = 'عام'
            else:
                is_proper_noun = 'نامشخص'

            payload = {
                "document": {
                    "title": item["word"],
                    "is_correct": is_correct,
                    "nearest_correct_word": item["result"]["word"],
                    "stem": "",
                    "origin": item["result"]["origin"],
                    "word_classes": [item["result"]["word_class"]],
                    "word_tags": item["result"]["tags"],
                    "is_proper_noun": is_proper_noun,
                    "ner_description": item["result"]["description"],
                    "llm_description": "",
                    "user_description": "",
                    "admin_description": "",
                    "time_create":int(time.time()),
                    "confidence": item["result"]["confidence"] if "confidence" in item["result"] else 0.0,
                    "language_key": "persian",
                    "domain_tags": [],
                    "ref_key":"llm_msl_v1",
                }
            }

            # break
        else:
            print(f'Unable to process item: {item}')
            payload = {
                "document": {
                    "title": item["word"],
                    "is_correct": 'نامشخص',
                    "nearest_correct_word": '',
                    "stem": "",
                    "origin": '',
                    "word_classes": [],
                    "word_tags": [],
                    "is_proper_noun": 'نامشخص',
                    "ner_description": '',
                    "llm_description": "",
                    "user_description": "",
                    "admin_description": "",
                    "time_create":int(time.time()),
                    "confidence":  0.0,
                    "language_key": "persian",
                    "domain_tags": [],
                    "ref_key":"llm_msl_v1",
                }
            }

        try:
            print(f'payload {payload}')
            response = requests.post(url, json=payload, headers=headers)
            print(f"وضعیت ارسال برای '{item['word']}': {response.status_code}")
            if response.status_code not in (200, 201):
                print("خطا در بدنه پاسخ:", response.text)
        except requests.exceptions.RequestException as e:
            print(f"خطا در ارسال درخواست برای '{item['word']}': {e}")

def wrapper(item_index_pair):
    i, item = item_index_pair
    print(f"--- {i}/ item -> {item}")
    asyncio.run(insert_from_json_file([item]))

if __name__ == "__main__":
    json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
    with open(json_file, "rb") as f:
        data = orjson.loads(f.read())

    # data = data[8881:]
    for i, j  in enumerate(data):
        if j['word'] == 'آوانویسیهایش':
            print(f'found it ->{i} ->{j}')
            data = data[i+1:]


    total = len(data)
    print(
        f'data-> {data[0]}',
        f'total {total}',
        sep='\n'
    )

    # برای هر آیتم، ایندکس رو هم ارسال می‌کنیم
    indexed_data = list(enumerate(data, start=4393))

    with Pool(6) as p:  # 6 هسته CPU
        p.map(wrapper, indexed_data)
    ################################################

    # json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json"
    # with open(json_file, "rb") as f:
    #     data = orjson.loads(f.read())

    # for i, j in enumerate(data, start=1):
    #     if i >= 4393:
    #         print(f"--- {i}/ {len(data)} --- j-> {j}")
    #         # break
    #         asyncio.run(insert_from_json_file([j]))


    # import uvicorn
    # uvicorn.run("main:app", host="0.0.0.0", port=8020, reload=True)