from fastapi import FastAPI from pydantic import BaseModel from typing import Optional from fastapi.middleware.cors import CORSMiddleware import asyncio import requests import time from base_model import ( word_class_dict, tags_dict, lang_dict, scope_dict, WordCorrector, IDRequest, ) from fastapi import Request from fastapi import Query, Request import orjson from multiprocessing import Pool import orjson import requests import asyncio import time # ساخت برنامه app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], # یا لیست دامنه‌هایی که می‌خوای اجازه داشته باشن allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) def sample_data_with_id(n_out): result = [] for i in range(n_out): sample_data = { "id": "11111111111", "word": "تست", "is_correct": True, "nearest_correct_word": "اصلاح شده", "origin": "اصلاحی", "stem": "اصلاح", "is_proper_noun": False, "word_class": "نادر", "tags": ["عامیانه", "محاوره‌ای"], "ner_description": "شسیبشسیبیسشبش", "llm_description": "شسیبشسیبسشبش", "user_description": "شیسبشسبیشسیب", "admin_description": "شسیبشسب", "confidence": 0.000005, "lang": "فارسی", "scope": "سایر", "edited_at": i + 100, } result.append(sample_data) return result # ------------------------- # GET endpoints # ------------------------- @app.get("/") async def root(): return {"message": "API is running successfully!"} @app.get("/api/words") async def get_words(request: Request, page_size: int = Query(100)): print("Headers:", request.headers) # هدرهای درخواست print("Query Params:", request.query_params) # یک لیست استاتیک از کلمات result = sample_data_with_id(page_size) print(f"Returning {len(result)} words") return {"result": result} @app.get("/api/word/{word_id}") async def get_words(request: Request, page_size: int = Query(100)): print("Headers:", request.headers) # هدرهای درخواست print("Query Params:", request.query_params) # یک لیست استاتیک از کلمات result = sample_data_with_id(page_size) print(f"Returning {len(result)} words") return {"result": result} # ------------------------- # POST endpoints # ------------------------- @app.post("/api/search") async def search_word(data: IDRequest) -> WordCorrector: # (request: Request): # body = await request.json() # print("Request Body:", body) # نمایش JSON ورودی # return {"status": "ok", "received": body} return sample_data @app.post("/api/update") async def update_word(data: IDRequest) -> WordCorrector: return { "status": "updated", "updated_id": data.id, "name": data.name or "no name provided", "message": "Word updated successfully!", } async def get_from_es(): pass async def add_from_es(): pass async def update_from_es(): pass async def insert_from_json_file(data, es_name="aiword"): # باید با الاستیک هلپر نوشته شود output_format = { "title": "", "is_correct": False, "nearest_correct_word": "", "stem": "", "origin": "", "word_classes": [], "word_tags": [], "is_proper_noun": False, "ner_description": "", "llm_description": "", "user_description": "", "admin_description": "", "confidence": 0.0, "language_key": "persian", "domain_tags": [], } input_format = { "word": "آب", "result": { "word": "آب", "is_correct": False, "origin": "آب", "word_class": "noun", "proper_noun": False, "tags": [], "description": "A common noun meaning 'water' in Persian.", "confidence": 1.0, }, } # URL مقصد url = f"http://localhost:8010/v1/indices/{es_name}/insert" # هدر برای ارسال JSON headers = {"Content-Type": "application/json"} result = [] for item in data: if item['result'] != "model-failed" and isinstance(item["result"], dict): # result.append( if item["result"]["is_correct"] is True: is_correct = 'صحیح' elif item["result"]["is_correct"] is False: is_correct = 'غلط' else: is_correct = 'نامشخص' if item["result"]["proper_noun"] is True: is_proper_noun = 'خاص' if item["result"]["proper_noun"] is False: is_proper_noun = 'عام' else: is_proper_noun = 'نامشخص' payload = { "document": { "title": item["word"], "is_correct": is_correct, "nearest_correct_word": item["result"]["word"], "stem": "", "origin": item["result"]["origin"], "word_classes": [item["result"]["word_class"]], "word_tags": item["result"]["tags"], "is_proper_noun": is_proper_noun, "ner_description": item["result"]["description"], "llm_description": "", "user_description": "", "admin_description": "", "time_create":int(time.time()), "confidence": item["result"]["confidence"] if "confidence" in item["result"] else 0.0, "language_key": "persian", "domain_tags": [], "ref_key":"llm_msl_v1", } } # break else: print(f'Unable to process item: {item}') payload = { "document": { "title": item["word"], "is_correct": 'نامشخص', "nearest_correct_word": '', "stem": "", "origin": '', "word_classes": [], "word_tags": [], "is_proper_noun": 'نامشخص', "ner_description": '', "llm_description": "", "user_description": "", "admin_description": "", "time_create":int(time.time()), "confidence": 0.0, "language_key": "persian", "domain_tags": [], "ref_key":"llm_msl_v1", } } try: print(f'payload {payload}') response = requests.post(url, json=payload, headers=headers) print(f"وضعیت ارسال برای '{item['word']}': {response.status_code}") if response.status_code not in (200, 201): print("خطا در بدنه پاسخ:", response.text) except requests.exceptions.RequestException as e: print(f"خطا در ارسال درخواست برای '{item['word']}': {e}") def wrapper(item_index_pair): i, item = item_index_pair print(f"--- {i}/ item -> {item}") asyncio.run(insert_from_json_file([item])) if __name__ == "__main__": json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json" with open(json_file, "rb") as f: data = orjson.loads(f.read()) # data = data[8881:] for i, j in enumerate(data): if j['word'] == 'آوانویسیهایش': print(f'found it ->{i} ->{j}') data = data[i+1:] total = len(data) print( f'data-> {data[0]}', f'total {total}', sep='\n' ) # برای هر آیتم، ایندکس رو هم ارسال می‌کنیم indexed_data = list(enumerate(data, start=4393)) with Pool(6) as p: # 6 هسته CPU p.map(wrapper, indexed_data) ################################################ # json_file = r"D:\init_mahdi\project\bank_words_per\make_data_llm\llm_data\temp.json" # with open(json_file, "rb") as f: # data = orjson.loads(f.read()) # for i, j in enumerate(data, start=1): # if i >= 4393: # print(f"--- {i}/ {len(data)} --- j-> {j}") # # break # asyncio.run(insert_from_json_file([j])) # import uvicorn # uvicorn.run("main:app", host="0.0.0.0", port=8020, reload=True)