123 lines
3.6 KiB
Python
123 lines
3.6 KiB
Python
from __future__ import annotations
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
|
|
from app.core.map_index_reader import MapIndexReader
|
|
from app.core.elastic_query_builder import ElasticQueryBuilder
|
|
from app.core.field_processor import FieldProcessor
|
|
from app.core.response_helper import ResponseHelper
|
|
from app.routes.v1.models import (
|
|
SearchRequest,
|
|
InsertRequest,
|
|
UpdateByQueryRequest,
|
|
DeleteByQueryRequest,
|
|
)
|
|
# from app.routes.voice.models import (
|
|
# voiceSearchRequest,
|
|
# )
|
|
from typing import Any, Dict, List, Optional
|
|
import time
|
|
from app.routes.v1.elastic import (
|
|
search,
|
|
)
|
|
import uuid
|
|
import requests
|
|
from app.config.settings import get_settings, Settings
|
|
from bs4 import BeautifulSoup
|
|
|
|
router = APIRouter(tags=["voice"])
|
|
|
|
|
|
@router.post("/{type_name}/search")
|
|
async def search_voice(type_name: str, payload: SearchRequest, request: Request):
|
|
|
|
payload.track_total_hits = True
|
|
payload.search_type = "phrase"
|
|
# payload.size = 2
|
|
|
|
response = await search(type_name, payload, request )
|
|
|
|
# print(response)
|
|
took = response.get("took", 0)
|
|
aggregations = response.get("aggregations", {})
|
|
hits_section = response.get("hits", {})
|
|
items = hits_section.get("hits", [])
|
|
total = hits_section.get("total", {})
|
|
# print(total)
|
|
res = []
|
|
highlight_ext = []
|
|
for item in items:
|
|
_id = item.get("_id", 0)
|
|
source = item.get("_source", {})
|
|
content = source.get("content", "")
|
|
time_words = source.get("time_words", "")
|
|
highlight = item.get("highlight", [])
|
|
highlight_content = []
|
|
h_key = ''
|
|
if 'content.ph' in highlight :
|
|
highlight_content = highlight["content.ph"]
|
|
h_key = 'content.ph'
|
|
elif 'content.fa' in highlight :
|
|
highlight_content = highlight["content.fa"]
|
|
h_key = 'content.fa'
|
|
elif 'content' in highlight :
|
|
highlight_content = highlight["content"]
|
|
h_key = 'content'
|
|
|
|
highlight_ext = []
|
|
for i in highlight_content:
|
|
soup = BeautifulSoup(i, 'html.parser')
|
|
clean = soup.get_text()
|
|
start_word = {}
|
|
end_word = {}
|
|
start_pos = content.find(clean)
|
|
if start_pos != -1:
|
|
end_pos = start_pos + len(clean)
|
|
else :
|
|
continue
|
|
|
|
for word in time_words:
|
|
if word["start_offset"] == start_pos:
|
|
start_word = word
|
|
break
|
|
for word in time_words:
|
|
if word["end_offset"] == end_pos:
|
|
end_word = word
|
|
break
|
|
|
|
highlight_ext.append({
|
|
"start": start_word.get("start",""),
|
|
"end": end_word.get("end",""),
|
|
"start_offset": start_pos,
|
|
"end_offset": end_pos
|
|
})
|
|
|
|
if "content" in source :
|
|
del source["content"]
|
|
if "time_words" in source :
|
|
del source["time_words"]
|
|
|
|
new_item = {
|
|
"_id" : _id,
|
|
"_source" : source,
|
|
"highlight": { 'content' : highlight[h_key] },
|
|
# "highlight" : highlight,
|
|
"voice_times" : highlight_ext
|
|
}
|
|
|
|
res.append(new_item)
|
|
|
|
result_end = {
|
|
"took" : took,
|
|
"status" : 0,
|
|
"success": True,
|
|
"message" : "",
|
|
"hits" : {
|
|
"hits": res,
|
|
"total": total
|
|
}
|
|
}
|
|
|
|
return result_end
|
|
|