elastic_backend_fast/app/routes/voice/voice.py

from __future__ import annotations

from fastapi import APIRouter, Depends, HTTPException, Request

from app.core.map_index_reader import MapIndexReader
from app.core.elastic_query_builder import ElasticQueryBuilder
from app.core.field_processor import FieldProcessor
from app.core.response_helper import ResponseHelper
from app.routes.v1.models import (
    SearchRequest,
    InsertRequest,
    UpdateByQueryRequest,
    DeleteByQueryRequest,
)
# from app.routes.voice.models import (
#     voiceSearchRequest,
# )
from typing import Any, Dict, List, Optional
import time
from app.routes.v1.elastic import (
    search,
)
import uuid
import requests
from app.config.settings import get_settings, Settings
from bs4 import BeautifulSoup

router = APIRouter(tags=["voice"])


@router.post("/{type_name}/search")
async def search_voice(type_name: str, payload: SearchRequest, request: Request):

    payload.track_total_hits = True
    payload.search_type = "phrase"
    # payload.size = 2

    response = await search(type_name, payload, request )

    # print(response)
    took = response.get("took", 0)
    aggregations = response.get("aggregations", {})
    hits_section = response.get("hits", {})
    items = hits_section.get("hits", [])
    total = hits_section.get("total", {})
    # print(total)
    res = []
    highlight_ext = []
    for item in items:
        _id = item.get("_id", 0)
        source = item.get("_source", {})
        content = source.get("content", "")
        time_words = source.get("time_words", "")
        highlight = item.get("highlight", [])
        highlight_content = []
        h_key = ''
        if 'content.ph' in highlight :
            highlight_content = highlight["content.ph"]
            h_key = 'content.ph'
        elif  'content.fa' in highlight :
            highlight_content = highlight["content.fa"]
            h_key = 'content.fa'
        elif  'content' in highlight :
            highlight_content = highlight["content"]
            h_key = 'content'

        highlight_ext = []
        for i in highlight_content:
            soup = BeautifulSoup(i, 'html.parser')
            clean = soup.get_text()
            start_word = {}
            end_word = {}
            start_pos = content.find(clean)
            if start_pos != -1:
                end_pos = start_pos + len(clean)
            else :
                continue

            for word in time_words:
                if word["start_offset"] == start_pos:
                    start_word = word
                    break
            for word in time_words:
                if word["end_offset"] == end_pos:
                    end_word = word
                    break

            highlight_ext.append({
                "start": start_word.get("start",""),
                "end": end_word.get("end",""),
                "start_offset": start_pos,
                "end_offset": end_pos
            })

        if "content" in source :
            del source["content"]
        if "time_words" in source :
            del source["time_words"]

        new_item = {
            "_id" : _id,
            "_source" : source,
            "highlight": { 'content' : highlight[h_key] },
            # "highlight" : highlight,
            "voice_times" : highlight_ext
        }

        res.append(new_item)

    result_end = {
        "took" : took,
        "status" : 0,
        "success": True,
        "message" : "",
        "hits" : {
            "hits": res,
            "total": total
            }
    }

    return result_end