from __future__ import annotations from fastapi import APIRouter, Depends, HTTPException, Request from app.core.map_index_reader import MapIndexReader from app.core.elastic_query_builder import ElasticQueryBuilder from app.core.field_processor import FieldProcessor from app.core.response_helper import ResponseHelper from app.routes.v1.models import ( SearchRequest, InsertRequest, UpdateByQueryRequest, DeleteByQueryRequest, ) # from app.routes.voice.models import ( # voiceSearchRequest, # ) from typing import Any, Dict, List, Optional import time from app.routes.v1.elastic import ( search, ) import uuid import requests from app.config.settings import get_settings, Settings from bs4 import BeautifulSoup router = APIRouter(tags=["voice"]) @router.post("/{type_name}/search") async def search_voice(type_name: str, payload: SearchRequest, request: Request): payload.track_total_hits = True payload.search_type = "phrase" # payload.size = 2 response = await search(type_name, payload, request ) # print(response) took = response.get("took", 0) aggregations = response.get("aggregations", {}) hits_section = response.get("hits", {}) items = hits_section.get("hits", []) total = hits_section.get("total", {}) # print(total) res = [] highlight_ext = [] for item in items: _id = item.get("_id", 0) source = item.get("_source", {}) content = source.get("content", "") time_words = source.get("time_words", "") highlight = item.get("highlight", []) highlight_content = [] h_key = '' if 'content.ph' in highlight : highlight_content = highlight["content.ph"] h_key = 'content.ph' elif 'content.fa' in highlight : highlight_content = highlight["content.fa"] h_key = 'content.fa' elif 'content' in highlight : highlight_content = highlight["content"] h_key = 'content' highlight_ext = [] for i in highlight_content: soup = BeautifulSoup(i, 'html.parser') clean = soup.get_text() start_word = {} end_word = {} start_pos = content.find(clean) if start_pos != -1: end_pos = start_pos + len(clean) else : continue for word in time_words: if word["start_offset"] == start_pos: start_word = word break for word in time_words: if word["end_offset"] == end_pos: end_word = word break highlight_ext.append({ "start": start_word.get("start",""), "end": end_word.get("end",""), "start_offset": start_pos, "end_offset": end_pos }) if "content" in source : del source["content"] if "time_words" in source : del source["time_words"] new_item = { "_id" : _id, "_source" : source, "highlight": { 'content' : highlight[h_key] }, # "highlight" : highlight, "voice_times" : highlight_ext } res.append(new_item) result_end = { "took" : took, "status" : 0, "success": True, "message" : "", "hits" : { "hits": res, "total": total } } return result_end