elastic_backend_fast/app/routes/voice/voice.py
2025-11-29 15:48:25 +03:30

123 lines
3.6 KiB
Python

from __future__ import annotations
from fastapi import APIRouter, Depends, HTTPException, Request
from app.core.map_index_reader import MapIndexReader
from app.core.elastic_query_builder import ElasticQueryBuilder
from app.core.field_processor import FieldProcessor
from app.core.response_helper import ResponseHelper
from app.routes.v1.models import (
SearchRequest,
InsertRequest,
UpdateByQueryRequest,
DeleteByQueryRequest,
)
# from app.routes.voice.models import (
# voiceSearchRequest,
# )
from typing import Any, Dict, List, Optional
import time
from app.routes.v1.elastic import (
search,
)
import uuid
import requests
from app.config.settings import get_settings, Settings
from bs4 import BeautifulSoup
router = APIRouter(tags=["voice"])
@router.post("/{type_name}/search")
async def search_voice(type_name: str, payload: SearchRequest, request: Request):
payload.track_total_hits = True
payload.search_type = "phrase"
# payload.size = 2
response = await search(type_name, payload, request )
# print(response)
took = response.get("took", 0)
aggregations = response.get("aggregations", {})
hits_section = response.get("hits", {})
items = hits_section.get("hits", [])
total = hits_section.get("total", {})
# print(total)
res = []
highlight_ext = []
for item in items:
_id = item.get("_id", 0)
source = item.get("_source", {})
content = source.get("content", "")
time_words = source.get("time_words", "")
highlight = item.get("highlight", [])
highlight_content = []
h_key = ''
if 'content.ph' in highlight :
highlight_content = highlight["content.ph"]
h_key = 'content.ph'
elif 'content.fa' in highlight :
highlight_content = highlight["content.fa"]
h_key = 'content.fa'
elif 'content' in highlight :
highlight_content = highlight["content"]
h_key = 'content'
highlight_ext = []
for i in highlight_content:
soup = BeautifulSoup(i, 'html.parser')
clean = soup.get_text()
start_word = {}
end_word = {}
start_pos = content.find(clean)
if start_pos != -1:
end_pos = start_pos + len(clean)
else :
continue
for word in time_words:
if word["start_offset"] == start_pos:
start_word = word
break
for word in time_words:
if word["end_offset"] == end_pos:
end_word = word
break
highlight_ext.append({
"start": start_word.get("start",""),
"end": end_word.get("end",""),
"start_offset": start_pos,
"end_offset": end_pos
})
if "content" in source :
del source["content"]
if "time_words" in source :
del source["time_words"]
new_item = {
"_id" : _id,
"_source" : source,
"highlight": { 'content' : highlight[h_key] },
# "highlight" : highlight,
"voice_times" : highlight_ext
}
res.append(new_item)
result_end = {
"took" : took,
"status" : 0,
"success": True,
"message" : "",
"hits" : {
"hits": res,
"total": total
}
}
return result_end