From 5a984da74c312e52112d6ebc6dd65cd64094401f Mon Sep 17 00:00:00 2001 From: init_mahdi Date: Sat, 29 Nov 2025 15:48:25 +0330 Subject: [PATCH] for iman --- app/__init__.py | 2 + app/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 164 bytes app/config/__init__.py | 2 + .../__pycache__/__init__.cpython-311.pyc | Bin 0 -> 171 bytes .../__pycache__/settings.cpython-311.pyc | Bin 0 -> 1489 bytes app/config/settings.py | 71 + app/core/__init__.py | 2 + app/core/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 169 bytes .../map_index_reader.cpython-311.pyc | Bin 0 -> 13421 bytes app/core/elastic_client_helper.py | 1315 +++++++++++++++++ app/core/elastic_helper.py | 1071 ++++++++++++++ app/core/elastic_query_builder.py | 925 ++++++++++++ app/core/elastic_wrapper.py | 55 + app/core/field_processor.py | 418 ++++++ app/core/map_index_reader.py | 705 +++++++++ app/core/request_helper.py | 266 ++++ app/core/response_helper.py | 260 ++++ app/main.py | 126 ++ app/permit/__init__.py | 0 app/permit/engine.py | 41 + app/permit/models.py | 189 +++ app/permit/permit.py | 208 +++ app/routes/__init__.py | 2 + app/routes/rag/models.py | 12 + app/routes/rag/rag_chat.py | 175 +++ app/routes/tree/base_models.py | 39 + app/routes/tree/tree_base.py | 346 +++++ app/routes/v1/__init__.py | 2 + app/routes/v1/elastic.py | 223 +++ app/routes/v1/elastic_admin.py | 62 + app/routes/v1/elastic_map.py | 159 ++ app/routes/v1/models.py | 229 +++ app/routes/voice/models.py | 16 + app/routes/voice/voice.py | 122 ++ app/schemas/aiword_backend.json | 127 ++ app/schemas/mappings/aiword_map.json | 216 +++ app/schemas/mappings/mjplan_map.json | 217 +++ app/schemas/mappings/mjpsection_map.json | 243 +++ app/schemas/mappings/mn_term_map.json | 224 +++ app/schemas/mappings/mnvoice_map.json | 249 ++++ app/schemas/mappings/qachat_map.json | 201 +++ app/schemas/mappings/qaqanon_map.json | 778 ++++++++++ app/schemas/mappings/test_map.json | 774 ++++++++++ app/schemas/mappings/use_for_test_map.json | 777 ++++++++++ app/schemas/mjdomain_backend.json | 170 +++ app/schemas/mjplan_backend.json | 115 ++ app/schemas/mjpsection_backend.json | 141 ++ app/schemas/mn_term_backend.json | 86 ++ app/schemas/mnvoice_backend.json | 154 ++ app/schemas/monir/mn_meet.json | 275 ++++ app/schemas/monir/mn_meet_entity.json | 130 ++ app/schemas/monir/mn_sections.json | 208 +++ app/schemas/monir/mn_term.json | 79 + app/schemas/monir/mngroup_backend.json | 49 + app/schemas/monir/mnmeet_backend.json | 49 + app/schemas/old1/1_qanon_info_old_schema.json | 207 +++ app/schemas/old1/2_test_backend_old.json | 345 +++++ app/schemas/old1/qaqanon_backend_old1.json | 479 ++++++ app/schemas/qachat_backend.json | 121 ++ app/schemas/qaplan_backend.json | 135 ++ app/schemas/qaqanon.text | 17 + app/schemas/qaqanon_backend.json | 879 +++++++++++ app/schemas/qasection_backend.json | 963 ++++++++++++ app/schemas/settings.json | 140 ++ app/schemas/test_backend.json | 888 +++++++++++ app/schemas/use_for_test_backend.json | 890 +++++++++++ 66 files changed, 17369 insertions(+) create mode 100644 app/__init__.py create mode 100644 app/__pycache__/__init__.cpython-311.pyc create mode 100644 app/config/__init__.py create mode 100644 app/config/__pycache__/__init__.cpython-311.pyc create mode 100644 app/config/__pycache__/settings.cpython-311.pyc create mode 100644 app/config/settings.py create mode 100644 app/core/__init__.py create mode 100644 app/core/__pycache__/__init__.cpython-311.pyc create mode 100644 app/core/__pycache__/map_index_reader.cpython-311.pyc create mode 100644 app/core/elastic_client_helper.py create mode 100644 app/core/elastic_helper.py create mode 100644 app/core/elastic_query_builder.py create mode 100644 app/core/elastic_wrapper.py create mode 100644 app/core/field_processor.py create mode 100644 app/core/map_index_reader.py create mode 100644 app/core/request_helper.py create mode 100644 app/core/response_helper.py create mode 100644 app/main.py create mode 100644 app/permit/__init__.py create mode 100644 app/permit/engine.py create mode 100644 app/permit/models.py create mode 100644 app/permit/permit.py create mode 100644 app/routes/__init__.py create mode 100644 app/routes/rag/models.py create mode 100644 app/routes/rag/rag_chat.py create mode 100644 app/routes/tree/base_models.py create mode 100644 app/routes/tree/tree_base.py create mode 100644 app/routes/v1/__init__.py create mode 100644 app/routes/v1/elastic.py create mode 100644 app/routes/v1/elastic_admin.py create mode 100644 app/routes/v1/elastic_map.py create mode 100644 app/routes/v1/models.py create mode 100644 app/routes/voice/models.py create mode 100644 app/routes/voice/voice.py create mode 100644 app/schemas/aiword_backend.json create mode 100644 app/schemas/mappings/aiword_map.json create mode 100644 app/schemas/mappings/mjplan_map.json create mode 100644 app/schemas/mappings/mjpsection_map.json create mode 100644 app/schemas/mappings/mn_term_map.json create mode 100644 app/schemas/mappings/mnvoice_map.json create mode 100644 app/schemas/mappings/qachat_map.json create mode 100644 app/schemas/mappings/qaqanon_map.json create mode 100644 app/schemas/mappings/test_map.json create mode 100644 app/schemas/mappings/use_for_test_map.json create mode 100644 app/schemas/mjdomain_backend.json create mode 100644 app/schemas/mjplan_backend.json create mode 100644 app/schemas/mjpsection_backend.json create mode 100644 app/schemas/mn_term_backend.json create mode 100644 app/schemas/mnvoice_backend.json create mode 100644 app/schemas/monir/mn_meet.json create mode 100644 app/schemas/monir/mn_meet_entity.json create mode 100644 app/schemas/monir/mn_sections.json create mode 100644 app/schemas/monir/mn_term.json create mode 100644 app/schemas/monir/mngroup_backend.json create mode 100644 app/schemas/monir/mnmeet_backend.json create mode 100644 app/schemas/old1/1_qanon_info_old_schema.json create mode 100644 app/schemas/old1/2_test_backend_old.json create mode 100644 app/schemas/old1/qaqanon_backend_old1.json create mode 100644 app/schemas/qachat_backend.json create mode 100644 app/schemas/qaplan_backend.json create mode 100644 app/schemas/qaqanon.text create mode 100644 app/schemas/qaqanon_backend.json create mode 100644 app/schemas/qasection_backend.json create mode 100644 app/schemas/settings.json create mode 100644 app/schemas/test_backend.json create mode 100644 app/schemas/use_for_test_backend.json diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4031da59312beb4f5c13cdec69c0688729a86ca GIT binary patch literal 164 zcmZ3^%ge<81c%?u%wPf1k3k$5V1zP0vjG{?8B!R688jLFRx%VZ0r{UnlE2K|tzrU- z^0QKtON?UD6H8JP%M&Z(3o2t$a}tY7GLz$z5|gu2^HO3G3kqW5<1_OzOXB183Mzkb j*yQG?l;)(`6|n-%0ohc{4!KtL!q literal 0 HcmV?d00001 diff --git a/app/config/__init__.py b/app/config/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/app/config/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/app/config/__pycache__/__init__.cpython-311.pyc b/app/config/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edea28179037851c4c802806be790c041c8ff579 GIT binary patch literal 171 zcmZ3^%ge<81jpXY%wPf1k3k$5V1zP0vjG{?8B!R688jLFRx%VZ0r{UnlE3WStzrU- z^0QKtON?UD6H8JP%M&Z(3o2t$a}tY7GLz$z5|gu2^HO3G3kqVA^YhX&(_`Y}GxIV_ q;^XxSDt~d<y?)Sy;>uBt9@RGBSQ(fDuK^KrsNVBP$XB literal 0 HcmV?d00001 diff --git a/app/config/__pycache__/settings.cpython-311.pyc b/app/config/__pycache__/settings.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c3df7372922bd2411a88b1929f2ea386a0174c5 GIT binary patch literal 1489 zcmZux&1)M+6rbH4$+9ikR+QM(AvP4NLBStEq|ln+LsJ?k#S~1TEQ0BHXC!UA+Er#| zoyzp!gAZ-#!8bcM_mZ}x^snfl#})J-=306P^q}CLa!KFp%CY5UcJ|Gi-}{(1Z{EyL zrP36F_UfM}?UIAgUuJUNawF!<1?Ctr#H;{yEZ4FOZwGeAah(o!v4w2(5HaTvG0dH( z;O80qS#xvTVmY2`+AM#Fp91*oT!h>_o7h3M!gGVvEIk>Bn0VA{^O~)TA5h76xKe)D zlKR>?_aF+Je(OWuQ{HPoz`Tz2H<%nFj$Df&*ZvG5;|{Z!&75EHmB=v5L3DXO!SeUP zicPo$4iW87AOu)pgiT(;CR1$6o#NA=F+I{KUeYM0SjjD=erM9ztUJSJL4R&!J(pUS zGi%uYHC;I0-9BFD;og2OR?W5dblz`;k>E8)7XT$qKj6B2&JiAZ5rgFD9RFGgx__gs zRCjY@BZxd2v?HlD->=`j3kCs|%J(Fv!fTUQ1lxN43MRP-X@_6MyHv`@kzgO`#c_i@ zF8pSXcw8u{Z;s16n&G5NReOJ$cDoy1dS+73WenMclTF3Xmmu$YLGVX0mx?IA!=qRc z$w869p)+67-GCD=iBwdmxLYgh0wE??Li7|NoruMtKy;CiM==dDi7@4$iy-6y$rcqw z136L&Jxd4;!$?u(N1-G{lt3hA44E}bcsGgyF=yswLn;Pz{7{MO!2N>GfTEXM4>ud1 zis(!3sd}SD6{nABk92#DQ959k-d8+i4ah(vi=iP$<-FeQ38*rZ5)-TZ9mv6@Z&0Zw zmFmxXe=H=Qf00yZze4}4(1TBgd9?Iyf^Qtj1mA<+$M*)cwFEDoJUCr>@^ONJU+d$w zH-ooV6TEba6Aa^OAFmG9>j_>yt(>YK7ZVJ8y^rg##H~9CzJBB-xCXtCYXGghli-^t zI|;5r@8jxVc_qQ~C*TXlm3~Tr5#~hGZRxz=Di-0`HMK!%o`y4{c8rt-!>Elv4R9^2 z?V6Qk=@zJDj+cSc#TGX;lD!P#ZGaZwm*#*E(BIRQ?>2wfgcPL~+qDTj*^EO^!I?^7 z*4E{2kI@jW#Dx{sg2ty9U-f0-epYK+q6$(IxcnZ-(6KCQfR>Z-K0sT^oBA+6 rYgLEn|K!_mUKsS>3(*R)r-!y>%^$Ug2!;#t!U56YTdD8@P51jB>CTz$ literal 0 HcmV?d00001 diff --git a/app/config/settings.py b/app/config/settings.py new file mode 100644 index 0000000..0920e90 --- /dev/null +++ b/app/config/settings.py @@ -0,0 +1,71 @@ +import os +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict +from pathlib import Path +from typing import Optional + + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") + jwtKey: Optional[str] = "" + is_mode_develop: int = 0 + + # Elasticsearch connection + elasticsearch_url: str = "http://localhost:9200" + elasticsearch_username: Optional[str] = None + elasticsearch_password: Optional[str] = None + elasticsearch_verify_certs: Optional[bool] = False + elasticsearch_ca_certs_path: Optional[str] = None + + ai_rag_host: Optional[str] = None + ai_rag_host_gpu: Optional[str] = None + + backup_file: Optional[str] = None + + + # Config base path where per-type configs live + config_base_path: str = "app/configs" + enable_back_permition: bool = False + + # Networking + request_timeout_seconds: int = 30 + disable_es_startup: bool = False + base_project:Path = Path(__file__).resolve().parent.parent.parent + + current_user_id:Optional[int] = 0 + + + permit_type:Optional[str] = "DB" + permit_db_type:Optional[str] = "mysql" + permit_database_user:Optional[str] = None + permit_database_password:Optional[str] = None + permit_database_host:Optional[str] = None + permit_database_port:Optional[int] = 8090 + permit_database_name:Optional[str] = None + + + MYSQL_host:Optional[str] = None + MYSQL_username:Optional[str] = None + MYSQL_port:Optional[str] = None + MYSQL_password:Optional[str] = None + +@lru_cache +def get_settings() -> Settings: + settings = Settings(env_file="config.env",env_file_encoding="utf-8",extra="ignore") + settings.elasticsearch_username = os.environ.get("ELASTIC_username") + settings.elasticsearch_password = os.environ.get("ELASTIC_password") + settings.elasticsearch_url = os.environ.get("ELASTIC_URL") + settings.ai_rag_host = os.environ.get("ai_rag_host") + settings.ai_rag_host_gpu = os.environ.get("ai_rag_host_gpu") + settings.jwtKey = os.environ.get("TAVASI_jwtKey") + settings.enable_back_permition = os.environ.get("CONFIG_enable_back_permition") + settings.is_mode_develop = os.environ.get("CONFIG_develop") + + settings.MYSQL_host = os.environ.get("MYSQL_host") + settings.MYSQL_username = os.environ.get("MYSQL_username") + settings.MYSQL_port = os.environ.get("MYSQL_port") + settings.MYSQL_password = os.environ.get("MYSQL_password") + + return settings + diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/app/core/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/app/core/__pycache__/__init__.cpython-311.pyc b/app/core/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..767ba32ba38ad490ee2c3eb41d78092faf5beac6 GIT binary patch literal 169 zcmZ3^%ge<81gGE3%wPf1k3k$5V1zP0vjG{?8B!R688jLFRx%VZ0r{UnlD}-+tzrU- z^0QKtON?UD6H8JP%M&Z(3o2t$a}tY7GLz$z5|gu2^HO3G3kqVA^NUhr;^Q;(GE3s) o^$IF~aoFVMr((i*nTaT6S^1EX;XEu-#&W zlCLcc^9}sz$3(ecd6V&#{tsUISgj%uiA3dq9EwJyak%DQ56CkEz^WrJ`=cmc|N!hrB1l>4E+4a!E~YlrdxlpRnu(YZaVxn2#-k46M>@eMH`h$PSs zlT*&aaWNVYBz|5b?efyR$Oog5snGNSq0`S#k?0&R--z;KZ@xCpPldvw)JI#j^K*gu z`A}rKV^)er`uIyTB0n4sNOCABi2)Lv;Y0YaG#5V=C47*GwAWMzlY!thF(RZocvX}G zLO>2sPB8!-Ooha-zy~6N-c@^8ye@|MNpU7{JrpJ7w3nd}5rcA+Eb)>|7J~8u5%uxG z;Eqbc8F4Nk9ep2P%pVENiOWZGM@@Su#+W-g$~dl-W%|*q@Hnk(G9Zcmc~H*B&a^(h z2RE*2B}9awL^N(Z8WDpjh0G`?T(b`+;c}gcvvJ0sQ9C(@Gj5q@1@>!@(l_v@A8|HT z!7$JwV>$v}Wm?gzl(8~knqX9x@mNL9EBb5%dSD1L5hoDM?oUk#oTc8`XVo}K3{UB- z$R*(;Kvy0Qo$tHyI*HE0q#wUB9gxMqjlhzBe(8#+%Rb%36?o>AV3degpm#r21wTk1 z6t81`37!O1A=&Sb)xB!a%8v964?eUT$W12M@Z1Ww>Uz*{U?Y@h>fJh{G@Mc!PTd<- z8iw&k@@N+n&Q!7DTyZ{xc8tcMz#qHNU7%3fc<>%qgPEBFmJ;}*viM&(0a-A!Q0hmV z{T!U!7Z#lFWlP+$nUO)7152aRkg8={_O1Y}K4-qL7$x{23w>qemE{I$Sq1J}o6#|z zV3w_NS(a-y(|_D*>^F!G82%aCv!DTonPZH^-LNcLFEck-mbuKZ@Q><7$#~30tuI#h zGPQbY;5_(VFv5k1(8p^miP;w9srFuQ!*($ej0#}#z7kz^O2kF*y^>Zn0)AmUDvw~J zVM3xrc-J34_r?@*-IH>vf{oC(4a8mVA#~e?*e4 zD|Ghg(W64u**d8jUg3U)cd*TRzl|I0EWO{w0uAOW8Y&7^&^O@3sGv9Z#hJoNnddZe z989i?Q|F+SrmWv*Avhq?4CF3LWhLQIbkUF_a2l&M$t2a3hkFm72Xd47Lxp?wOrnBM zHu7J-_StJ&qe|mRwejT2(CVuwtL22fk<?L-l2VPdHht%q!gkuO4YNdjT<8c{cK)Ui86e}Z{ZFtzN8MLUj_;0cy*ze1PRWSRvHM00B1;98B)1+q^fmcPKtbh55p|L0*^8z?bx(D zB!wc99Eb!(@WODHbU<}Voa89-bpbJovnngE^pp9b5>0W?UfzVRiVL;6X1(Q3%Z6|B z4W+tOt!`Z@ec*1`X!(b3#oem9TZ@RecEv?Std&zVNI{%o4KIaw$-nQ$UQeqo%0O)EwaHf~J_g z-G}Yp0s>O2s`<2Wx9ZSN)uGMSt#eA%QMKymN=dS!X5;Nd#bNk8sNR>@Kf5^r1b>&W zs+Xrz=lf@s>T7ECwS?nZ(p|GQtGW**?1%C_CE5d()pa;oXXKov4rV!XwV8|2fKxvN z*6cMmGjd7uVEHxtWdk{*tlnhqTE^+L!4#hW#+-TPu8B*Mc}>tN&}#fCVeT`TLC(FG zpfd@L>n8KS?O7Z8^y03!dB@wl`R*1`yvJ4V@m+7vj<-kgo>IN16nCHM?n~JF=p0?r zobv?m5|NjverqmqaUKA+DEP0V4lhVVR&oXdEiNgldMMG7T%ey7>GzLh_71J=3wnCSew0dKr3> zW`KZ!G&X@b@XcR2cKOa7zH__&-f@K=Q2BvferSguQuq;-AK7CpWiL>oI2u>RHYgrl zyuG;LRNO~Y_mQoR9d~cS-J7heU3cDbZp&RwY=BYm-qh;LWUnBm#s4mPYPc$tXOZ^drFq=B`B-2Ts0(B2Q1bJtX;Q zo4f!oePaxp-C1HPsD#&B+Ma0I&EXY*@OwL$P44z|lE!eG{1u7Zb@?hObUQkr?Uf)$GB9R7ils9kBIFzsZ?$kZv%sL9Ef>! zerIb^@g7sX$9BCZcDyIHBj3UGsOlY6++(VHEMXthtt^90ppVSr`xkKW;}B4Xrl)=EgR$=Zj2`kqsXw`GCNZ2N8RG z*L`rueQ>jMt6FilsqVI2cju0~GjVKCai3G&=MwgFx|-Ez5};2~wVz_{%4R~-Ghz6H z#4Nm5-h5T1=PSb(Gsio599M9bFdgv3S=!#OxX-BWGYR{dd}ocIj)LJ|!pS;QMpMDF zaIPOkR3s0>U8gBN87DS%G(&b5(|nlszrpBfKc6-B(#tbq@ERYQ;vvOOgAzS^rzhAY z9-wB3#w9Xj^7Y)kVJo16;^q;s#*FG|Wy6a>k#5A~uY4|WVDvc%Vnae+C}O-FfWb>6 zKyEU7oVBbW={d0J{i<>I(6OCE$F^JVol_2-RS%s_c+MsqXCFOq*JHfckR?e^{o0AO z6PU4UfYh7hfUZ7lJji@m-fsPtZMPcGR7ML|fs0N(vk7PUhZ)!w&jbss*}Y-rHCHUT z*?=hv2+76*a1UB8g8(yPvcyiG7cn&h@!*`k0~45xEnv^ zP!*E&+Z|ddR{Q1XT`r5!$Z2>lA^-vSM!_?M%Bgso0%nY(GHAIH!+%|H`a@y|O^(CN zfB;!~2MEfJE2~X59N2AW+i7Uqnot^g)rQ`cF%akaz@33j-_{$7r&INGCLEoQ9#qzU z;Y+xiQHr%$lJwN9jN(5OW-SU6fyIX}v@`$c8MIpOTUek5d1e-{uYmD~$?7WEym#XwMo-84BkgPU%e zld}6$oFjK_y2L3yM`)fEI|;$1F-_n#H1_L{$%}{zG0kIN=tgx}Hx!UHB+xg?zjS@( z+Wg?2O{pJH>jxCiSv6(3IWjM9z7MIyd-|Avtr~P&@4Hx_2FQ|eo__;FL5U0OEyMmz z5%+1y%OMm5D_=ve*&M=Hr7vv7ZOXDLwxOF&0UwRr|`04=~o3qq0L2dp4n?kHH!2A9FIQxlZwF%&b zMaS%r@2An6RkL(-(ny+|hn@^N^SHbp-y|5Fn;(dE7w`y>vjDi8N|kizn zbgTEV%dWJJ-gtjP#5yrF1a@ zH_bIx>>I-<=Ai{U*Xi}@7jDE^U z&=r##KwuAp?l(DTeafX|{gPIp*XteQ90PbsXP#=&lyiDbwa$xv3bU<1XVS+&U@Nn( zVa2g$DFr;!-1^nU-RAC{=I-rx?-8Z>oZ5VDxB0?O^97}OOl<}SWp$pWM5*N0$~T6R zP0xKf^V!Vi@^(OJ>Q$S1*D8{prp>B5XV=aqkM^xy+}x*l+a4u72NRBiX*ByJ-#|Bu zZw=NA?qlxna}4^d_gh(@h1~JS!he~LoYH>VJv7W51$e0oIj_*9QC;qd(+ zr{HU2zO5Q$t@jxgs6i^YXj7;C)en}+PlioT6?5j`$TLB*zKNBBD|!-M>L3`$jS$RI z7jWve8K0f$1=C}Bwpp0%Z=us?hMwX^q+zO1H zEPyA|EuVlWe}}zg5oH%5FP)eWWo+w&QhQ9TJ%)&~OGlJB+*w3Pm!CWU=|O*D8c*!G zr;iy;-V7r3$37E?>_QOPg&-0eIlG<(M3N?rLR9z0Ul;k;YdCA--bCZ=gG>`mm|FfajQeq0yCA znSBvD9x44b+)esfjcyse1+*-uosPK*MO*mE3$=|$dbrh(#-S6Faa>nwkEpdr5-;>6 zYVo&cWoiy4=_y>9Yy(0@nnPN{?h&x@FXR zxFIHg(p_UdE`5$eO`qe(#xPjZBWZuB!Nv=6&^`|0m%Et4@cZFoP5)d}SP0{}!w--< z5Ki4G_4|cr(C;U;&=dXCPP3=C-RXyfdeq*DcTXbeNAeO9v_3M9jd))1ht6VLh@TAw~>4ThTzeL)?Eu69De}D)>TbQO*Oou+zRl*vK68o7W~;-x zT;k$v&*7kQiG#I4_+ca^HX8&K1&E8ajk3sXq9v?tjMked)SEc3Sc6Lg$NHsE5&c3!=Xv~X}(qhD-@(jbd$#zg5Za41QnKQ zE?LL3IwUnf9^3nMV;mr#!zBxU(j6dxcUU&bJeM&3{=ih+r2mpkeZu%lGHr>c{q{<( TSy*`4!t{Gu|HmSC>6rfyMG? AsyncElasticsearch: + client_args: Dict[str, Any] = { + "hosts": [settings.elasticsearch_url], + "verify_certs": settings.elasticsearch_verify_certs, + "request_timeout": settings.request_timeout_seconds, + } + if settings.elasticsearch_username and settings.elasticsearch_password: + client_args["basic_auth"] = (settings.elasticsearch_username, settings.elasticsearch_password) + if settings.elasticsearch_ca_certs_path: + client_args["ca_certs"] = settings.elasticsearch_ca_certs_path + return AsyncElasticsearch(**client_args) + + +class ElasticClientHelper: + def __init__(self, client: AsyncElasticsearch) -> None: + self.client = client + + async def create_index_if_not_exists(self, index_name: str, body: Dict[str, Any]) -> Dict[str, Any]: + exists = await self.client.indices.exists(index=index_name) + if not exists: + return await self.client.indices.create(index=index_name, **body) + + return {"acknowledged": True, "index": index_name, "already_exists": True} + + async def delete_index_if_exists(self, index_name: str) -> Dict[str, Any]: + exists = await self.client.indices.exists(index=index_name) + if exists: + return await self.client.indices.delete(index=index_name) + return {"acknowledged": True, "index": index_name, "already_exists": False} + + async def index_document(self, index: str, document: Dict[str, Any], id: Optional[str] = None, + refresh: Optional[str] = None) -> Dict[str, Any]: + return await self.client.index(index=index, id=id, document=document, refresh=refresh) + + async def delete_document(self, index: str, id: str = None, refresh: Optional[str] = None) -> Dict[str, Any]: + return await self.client.delete(index=index, id=id, refresh=refresh) + + async def search(self, index: str, body: Dict[str, Any]) -> Dict[str, Any]: + return await self.client.search(index=index, **body) + + async def update_by_query(self, index: str, body: Dict[str, Any], refresh: bool = False, + conflicts: str = "proceed") -> Dict[str, Any]: + return await self.client.update_by_query(index=index, refresh=refresh, conflicts=conflicts, **body) + + async def delete_by_query(self, index: str, body: Dict[str, Any], refresh: bool = False, + conflicts: str = "proceed") -> Dict[str, Any]: + return await self.client.delete_by_query(index=index, refresh=refresh, conflicts=conflicts, **body) + + async def get_by_id(self, index: str, id: str) -> Dict[str, Any]: + # print(f'id {id}') + return await self.client.get(index=index, id=id) + + async def delete_index(self, index: str) -> Dict[str, Any]: + return await self.client.indices.delete(index=index, ignore_unavailable=True) + + async def bulk_update( + self, + index_name: str, + data: List[Dict[str, Any]], + refresh: Optional[str] = None, + raise_on_error: bool = False, + chunk_size: int = 1000, + delay_between_chunks: float = 0.0 +) -> Dict[str, Any]: + """ + Send pre-prepared bulk actions to Elasticsearch. + + Args: + index_name: Target index name (not used if data has _index) + data: List of pre-prepared bulk actions with format: + [ + { + "_op_type": "update", + "_index": "my_index", + "_id": "1", + "doc": {"field": "value"}, + "doc_as_upsert": True + }, + ... + ] + refresh: Refresh policy ("true", "false", "wait_for") + raise_on_error: Whether to raise exception on errors + chunk_size: Number of documents per chunk + delay_between_chunks: Delay between chunks in seconds + + Returns: + Dict with success count and errors + """ + + if not data: + return {"success": 0, "errors": [], "total": 0} + + total_success = 0 + total_errors = [] + total_processed = 0 + + async for chunked_data in self.chunk_list_async_generator( + data, + chunk_size, + delay_between_chunks + ): + try: + success, errors = await async_bulk( + self.client, + chunked_data, + refresh=refresh, + raise_on_error=raise_on_error + ) + + total_success += success + total_processed += len(chunked_data) + + if errors: + total_errors.extend(errors) + + # نمایش پیشرفت + progress = (total_processed / len(data)) * 100 + print(f"Progress: {progress:.2f}% ({total_processed}/{len(data)})") + + except Exception as e: + error_info = { + "error": str(e), + "chunk_size": len(chunked_data), + "chunk_start": total_processed + } + total_errors.append(error_info) + + if raise_on_error: + raise + + return { + "success": total_success, + "errors": total_errors, + "total": len(data), + "failed": len(total_errors) + } + + + async def update_or_index_document( + self, + index: str, + document_id: str, + data: Dict[str, Any], + operation_type: str = "auto", # "auto", "update", "index" + refresh: Optional[str] = None + ) -> Dict[str, Any]: + """ + Update existing document or index new document. + + Args: + index: Target index name + document_id: Document ID + data: Document data + operation_type: + "auto" - automatically detect (update if exists, else index) + "update" - force update operation + "index" - force index operation + refresh: Refresh policy + + Returns: + Operation result + """ + + if operation_type == "update": + # Force update + return await self.client.update( + index=index, + id=document_id, + doc=data, + refresh=refresh + ) + + elif operation_type == "index": + # Force index (create or replace) + return await self.client.index( + index=index, + id=document_id, + document=data, + refresh=refresh + ) + + else: # auto mode + try: + # First try to update + return await self.client.update( + index=index, + id=document_id, + doc=data, + refresh=refresh + ) + except Exception: + # If update fails (document doesn't exist), index new document + return await self.client.index( + index=index, + id=document_id, + document=data, + refresh=refresh + ) + + async def query_yes_no(self, question: str, default: Optional[str] = "no") -> bool: + """ + Prompt the user for a yes/no question and return boolean result. + + Args: + question: The question to ask the user + default: Default answer if user just presses enter ("yes", "no", or None) + + Returns: + bool: True for yes, False for no + """ + valid_responses = {"yes": True, "y": True, "ye": True, "no": False, "n": False} + + # Validate default parameter + if default is not None and default not in ["yes", "no"]: + raise ValueError(f"invalid default answer: '{default}'") + + # Set prompt based on default + prompt_mapping = { + None: " [y/n] ", + "yes": " [Y/n] ", + "no": " [y/N] " + } + prompt = prompt_mapping[default] + + while True: + print('\n' + '🔔 ' + '=' * 48) + print(f" {question}") + print(' ' + '=' * 48) + + try: + user_input = input(f" 👉 {question}{prompt}").strip().lower() + + # Handle empty input (use default) + if not user_input and default is not None: + result = valid_responses[default] + print(f" ✅ Using default: {'yes' if result else 'no'}") + return result + + # Validate user input + if user_input in valid_responses: + return valid_responses[user_input] + else: + print(" ❌ Please respond with: 'yes', 'no', 'y', or 'n'") + print(" ❌ لطفاً با یکی از موارد زیر پاسخ دهید: 'yes', 'no', 'y', 'n'") + + except KeyboardInterrupt: + print("\n ⚠️ Operation cancelled by user.") + return False + except Exception as e: + print(f" ❌ Error reading input: {e}") + return False + + async def bulk_insert( + self, + index: str, + documents: List[Dict[str, Any]], + ids: Optional[List[str]] = None, + refresh: Optional[str] = None, + ) -> Dict[str, Any]: + """ + Insert multiple documents into Elasticsearch using bulk API. + + Args: + index: Target index name + documents: List of documents to insert + ids: Optional list of document IDs (must match length of documents) + refresh: Refresh policy ("true", "false", "wait_for") + + docs = [ + {"title": "No id doc 1", "year": 2010}, + {"title": "No id doc 2", "year": 2015}, + ] + ids = ["1", "2", "3"] + + ## or : + docs = [ + {"id":1, "title": "No id doc 1", "year": 2010}, + {"id":1, "title": "No id doc 2", "year": 2015}, + ] + + + Returns: + Bulk API response + """ + actions = [] + for i, doc in enumerate(documents): + action: Dict[str, Any] = { + "_op_type": "index", + "_index": index, + "_source": doc, + } + if ids and i < len(ids): + action["_id"] = ids[i] + elif "id" in doc: + action["_id"] = doc["id"] + else: + action["_id"] = str(uuid.uuid4()) + actions.append(action) + + success, errors = await async_bulk(self.client, actions, refresh=refresh) + return {"success": success, "err ors": errors} + + async def exist_doctument(self, index_name, id): + res = await self.client.exists(index=index_name, id=id) + return res + async def es_iterate_all_documents_advanced( + self, + index: str, + body: Optional[Dict[str, Any]] = {}, + pagesize: int = 250, + scroll_timeout: str = '25m', + show_progress: bool = True, + max_documents: Optional[int] = None, + **kwargs +) -> AsyncGenerator[Dict[str, Any], None]: + """ + Advanced async iterator for all documents in an index with more features. + + Args: + index: Index name to iterate + body: Optional search body + pagesize: Number of documents per page + scroll_timeout: Scroll timeout + show_progress: Whether to show progress updates + max_documents: Maximum number of documents to yield + **kwargs: Additional search parameters + + Yields: + Dict with document source and id + + Example: + async for doc in helper.es_iterate_all_documents_advanced( + "my_index", + body={"query": {"match_all": {}}}, + max_documents=1000 + ): + print(doc["id"]) + """ + is_first = True + scroll_id = None + total = 0 + counter = 0 + + try: + while True: + # Check if we've reached the maximum documents + if max_documents and counter >= max_documents: + if show_progress: + print(f"Reached maximum documents limit: {max_documents}") + break + + # Scroll next + if is_first: # Initialize scroll + # Prepare search parameters + search_params = { + "index": index, + "scroll": scroll_timeout, + "size": min(pagesize, max_documents) if max_documents else pagesize, + **kwargs + } + + # Add body if provided + if body: + search_params["body"] = body + + # Execute initial search + result = await self.client.search(**search_params) + + total = result["hits"]["total"]["value"] + if show_progress and total > 0: + print(f"Total documents: {total}") + if max_documents: + print(f"Limited to: {max_documents}") + + is_first = False + else: + # Continue scrolling + result = await self.client.scroll( + scroll_id=scroll_id, + scroll=scroll_timeout + ) + + scroll_id = result["_scroll_id"] + hits = result["hits"]["hits"] + + if not hits: + break + + # Yield each entry + for hit in hits: + if max_documents and counter >= max_documents: + break + + yield {"source": hit["_source"], "id": hit["_id"]} + counter += 1 + + # Show progress + if show_progress and total > 0: + actual_total = min(total, max_documents) if max_documents else total + progress_percent = (counter / actual_total) * 100 + display_total = f"{actual_total}" if not max_documents else f"min({total},{max_documents})" + print(f"Progress: {progress_percent:.2f}% ({counter}/{display_total})") + + if max_documents and counter >= max_documents: + break + + except Exception as e: + print(f"Error during document iteration: {e}") + raise + finally: + # Clean up scroll context + if scroll_id: + try: + await self.client.clear_scroll(scroll_id=scroll_id) + except Exception as e: + print(f"Warning: Failed to clear scroll context: {e}") + async def delete_by_ids(self, index_name, body) -> Dict[str,Any]: + try: + + result = await self.client.delete_by_query(index=index_name, body=body) + return { + "deleted": result.get("deleted", 0), + "total": result.get("total", 0), + "failures": result.get("failures", []), + "timed_out": result.get("timed_out", False), + "cancelled": False + } + + except Exception as e: + return { + "deleted": 0, + "error": str(e), + "cancelled": False + } + async def export_to_file( + self, + path_back: str, + index_name: str, + out_name: str = '', + body: Optional[Dict[str, Any]] = None, + fields: Optional[List[str]] = None, + chunk_size: int = 1000, + scroll_timeout: str = '5m', + max_documents: Optional[int] = None, + delay_between_chunks: float = 0.1, + to_zip: bool = False +) -> Dict[str, Any] | None: + """ + Optimized export function using chunk processing with optional zip compression. + """ + info = await self.client.info() + out = out_name if out_name else index_name + base_path = pathlib.Path(__file__).parent.resolve() + # Determine file paths + path_back = os.path.join(base_path,path_back) + if to_zip: + json_file_name = f"{out}.json" + zip_file_path = os.path.join(path_back, f"{out}.zip") + # Create a temporary directory for JSON file before zipping + # temp_dir = tempfile.mkdtemp() + file_path = os.path.join( json_file_name) + else: + file_path = os.path.join(path_back, f"{out}.json") + zip_file_path = None + print(file_path) + # Create backup directory if not exists + os.makedirs(path_back, exist_ok=True) + + print('🚀' * 5, f' Starting export: {index_name}', '🚀' * 5) + if to_zip: + print('🗜️ Export will be compressed to ZIP file') + + total_documents = 0 + exported_count = 0 + scroll_id = None + + try: + with open(file_path, 'w', encoding='utf-8') as fout: + # Initial search with provided body (no modification) + search_params = { + "index": index_name, + "scroll": scroll_timeout, + "size": chunk_size + } + + if body: + search_params["body"] = body + + s_res = await self.client.search(**search_params) + total_documents = s_res['hits']['total']['value'] + scroll_id = s_res['_scroll_id'] + + actual_total = min(total_documents, max_documents) if max_documents else total_documents + + print(f'📊 Index: {index_name}') + print(f'📈 Total documents: {total_documents}') + if max_documents: + print(f'🎯 Export limit: {max_documents}') + + # Process all batches + hits = s_res['hits']['hits'] + + while hits and (not max_documents or exported_count < max_documents): + # Process current batch using chunk generator + async for chunk in self.chunk_list_async_generator(hits, 50, delay_between_chunks): + batch_data = [] + + for item in chunk: + if max_documents and exported_count >= max_documents: + break + + # Process document based on fields filter + if fields: + item_data = {"id": item["_id"]} + for field in fields: + if field in item["_source"]: + item_data[field] = item["_source"][field] + # Only add if we have data + if len(item_data) > 1: # More than just 'id' + batch_data.append(item_data) + exported_count += 1 + else: + item_data = { + "id": item["_id"], + **item["_source"] + } + batch_data.append(item_data) + exported_count += 1 + + # Write chunk to file + if batch_data: + json_line = json.dumps(batch_data, ensure_ascii=False) + fout.write(json_line + '\n') + + # Progress update + if actual_total > 0: + progress = (exported_count / actual_total) * 100 + print(f"📦 Progress: {progress:.2f}% ({exported_count}/{actual_total})") + + # Check limit + if max_documents and exported_count >= max_documents: + break + + # Next scroll + s_res = await self.client.scroll( + scroll_id=scroll_id, + scroll=scroll_timeout + ) + scroll_id = s_res['_scroll_id'] + hits = s_res['hits']['hits'] + + # If to_zip is True, compress the JSON file + final_file_path = file_path + if to_zip: + print("🗜️ Compressing JSON file to ZIP...") + with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipf.write(file_path, json_file_name) + + # Get file sizes for both + json_size = os.path.getsize(file_path) + zip_size = os.path.getsize(zip_file_path) + + # Calculate compression ratio + compression_ratio = (1 - (zip_size / json_size)) * 100 if json_size > 0 else 0 + + print(f"💾 Original JSON size: {json_size / 1024 / 1024:.2f} MB") + print(f"🗜️ Compressed ZIP size: {zip_size / 1024 / 1024:.2f} MB") + print(f"📊 Compression ratio: {compression_ratio:.1f}%") + + # Clean up temporary JSON file + os.remove(file_path) + os.rmdir(temp_dir) + + final_file_path = zip_file_path + file_size = zip_size + else: + file_size = os.path.getsize(file_path) + + print(f"✅ Export completed: {exported_count} documents") + print(f"💾 File: {final_file_path}") + print(f"📏 File size: {file_size / 1024 / 1024:.2f} MB") + + return { + "success": True, + "exported_count": exported_count, + "total_documents": total_documents, + "file_path": final_file_path, + "file_size": file_size, + "compressed": to_zip, + "file_type": "zip" if to_zip else "json" + } + + except Exception as e: + print(f"❌ Export failed: {e}") + # Clean up temporary files in case of error + if to_zip and 'temp_dir' in locals(): + try: + if os.path.exists(file_path): + os.remove(file_path) + if os.path.exists(temp_dir): + os.rmdir(temp_dir) + except: + pass + return { + "success": False, + "error": str(e), + "exported_count": exported_count + } + + finally: + if scroll_id: + try: + await self.client.clear_scroll(scroll_id=scroll_id) + except Exception: + pass + async def chunk_list_async_generator( + self, + lst: List[T], + chunk_size: int, + delay_between_chunks: float = 0.0 + ) -> AsyncGenerator[List[T], None]: + """ + Split a list into chunks and yield them asynchronously. + + Args: + lst: Input list to split + chunk_size: Size of each chunk + delay_between_chunks: Delay between yielding chunks in seconds + + Yields: + Chunks of the original list + + Example: + async for chunk in helper.chunk_list_async_generator(documents, 100): + await helper.bulk_insert("index", chunk) + """ + if chunk_size <= 0: + raise ValueError("chunk_size must be greater than 0") + + if not lst: + return + + for i in range(0, len(lst), chunk_size): + chunk = lst[i:i + chunk_size] + yield chunk + + if delay_between_chunks > 0 and i + chunk_size < len(lst): + await asyncio.sleep(delay_between_chunks) + + async def chunk_list_simple(self, lst: List[T], chunk_size: int) -> List[List[T]]: + """ + Split a list into chunks of specified size (async wrapper). + + Args: + lst: Input list to split + chunk_size: Size of each chunk + + Returns: + List of chunks + + Example: + >>> chunks = await helper.chunk_list_simple([1, 2, 3, 4, 5], 2) + >>> print(chunks) + [[1, 2], [3, 4], [5]] + """ + if chunk_size <= 0: + raise ValueError("chunk_size must be greater than 0") + + if not lst: + return [] + + chunks = [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)] + + await asyncio.sleep(0) + + return chunks + async def restore_file_to_elastic( + self, + path_back: str, + index_name: str, + app_key: str = '', + query_delete: bool = True, + map_name: str = '', + body: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Restore data from zip file to Elasticsearch index. + + Args: + path_back: Backup directory path + index_name: Target index name + app_key: Application key for mapping + query_delete: Whether to prompt before deleting existing index + map_name: Mapping name + body: Additional body parameters + + Returns: + Operation result + """ + if not os.path.exists(path_back): + return { + "success": False, + "error": f"Path does not exist: {path_back}", + "restored_count": 0 + } + + file_path = os.path.join(path_back, f"{index_name}.zip") + if not os.path.exists(file_path): + return { + "success": False, + "error": f"Backup file not found: {file_path}", + "restored_count": 0 + } + + try: + # Handle existing index + index_exists = await self.client.indices.exists(index=index_name) + + if index_exists and query_delete: + should_delete = await self.query_yes_no( + f"Delete existing index '{index_name}' before restore?" + ) + if should_delete: + await self.delete_index_if_exists(index_name) + else: + return { + "success": False, + "error": "Operation cancelled by user", + "restored_count": 0 + } + elif index_exists: + return { + "success": False, + "error": f"Index already exists: {index_name}", + "restored_count": 0 + } + + # Create index with mapping + mapping_result = await self.create_index(index_name, app_key, map_name) + if not mapping_result.get("success", False): + return mapping_result + + # Import data from zip file + restore_result = await self.zip_file_to_elastic(file_path, index_name) + return restore_result + + except Exception as e: + return { + "success": False, + "error": str(e), + "restored_count": 0 + } + + async def rename_elastic_index( + self, + index_name_i: str, + index_name_o: str, + app_key: str = '', + map_name: str = '', + body: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Rename Elasticsearch index using reindex API. + + Args: + index_name_i: Source index name + index_name_o: Target index name + app_key: Application key for mapping + map_name: Mapping name + body: Additional reindex parameters + + Returns: + Reindex result + """ + try: + # Create target index + create_result = await self.create_index(index_name_o, app_key, map_name) + if not create_result.get("success", False): + return create_result + + # Prepare reindex body + reindex_body = { + "source": {"index": index_name_i}, + "dest": {"index": index_name_o} + } + + if body: + reindex_body.update(body) + + # Start reindex task + reindex_result = await self.client.reindex( + body=reindex_body, + wait_for_completion=False + ) + + task_id = reindex_result.get('task') + if not task_id: + return { + "success": False, + "error": "Failed to start reindex task", + "task_id": None + } + + # Monitor task progress + tasks_client = self.client.tasks + while True: + task_status = await tasks_client.get(task_id=task_id) + + if task_status['completed']: + break + + total = task_status['task']['status']['total'] + created = task_status['task']['status']['created'] + updated = task_status['task']['status'].get('updated', 0) + + print(f'🔄 {index_name_o} - Progress: {created + updated}/{total}') + await asyncio.sleep(1) + + return { + "success": True, + "task_id": task_id, + "source_index": index_name_i, + "target_index": index_name_o, + "status": "completed" + } + + except Exception as e: + return { + "success": False, + "error": str(e), + "task_id": None + } + + + async def move_custom_fields( + self, + index_name_i: str, + index_name_o: str, + fields: List[str], + rename_fields: Optional[Dict[str, str]] = None, + query_body: Optional[Dict[str, Any]] = None, + chunk_size: int = 1000, + delay_between_chunks: float = 0.1 + ) -> Dict[str, Any]: + """ + Move and rename specific fields from source to target index. + + Args: + index_name_i: Source index name + index_name_o: Target index name + fields: List of fields to move + rename_fields: Dictionary mapping source field names to target names + query_body: Query to filter source documents + chunk_size: Number of documents per chunk + delay_between_chunks: Delay between processing chunks + + Returns: + Operation result + """ + try: + rename_fields = rename_fields or {} + total_processed = 0 + total_updated = 0 + errors = [] + + # Iterate through all documents in source index + async for document in self.es_iterate_all_documents_advanced( + index=index_name_i, + body=query_body, + pagesize=chunk_size, + show_progress=True + ): + try: + source_data = document["source"] + doc_id = document["id"] + + # Extract and transform fields + transformed_data = {} + data_filled = False + + for field in fields: + # Handle nested fields (field.subfield) + if '.' in field: + value = source_data + for subfield in field.split('.'): + value = value.get(subfield, {}) if isinstance(value, dict) else None + if value is None: + break + else: + value = source_data.get(field) + + if value is None: + continue + + # Determine target field name + target_field = rename_fields.get(field, field) + transformed_data[target_field] = value + data_filled = True + + if not data_filled: + continue + + # Update document in target index + await self.update_or_index_document( + index=index_name_o, + document_id=doc_id, + data=transformed_data, + operation_type="auto" + ) + + total_processed += 1 + total_updated += 1 + + except Exception as e: + errors.append({ + "document_id": document.get("id", "unknown"), + "error": str(e) + }) + total_processed += 1 + + # Small delay to prevent overwhelming the cluster + if total_processed % chunk_size == 0: + await asyncio.sleep(delay_between_chunks) + + return { + "success": True, + "total_processed": total_processed, + "total_updated": total_updated, + "error_count": len(errors), + "errors": errors + } + + except Exception as e: + return { + "success": False, + "error": str(e), + "total_processed": total_processed, + "total_updated": total_updated + } + + + async def iterate_json_file( + self, + file_path: str, + is_zip: bool = True, + limit_pack: Optional[int] = None, + chunk_size: int = 1000, + delay_between_chunks: float = 0.1 + ) -> AsyncGenerator[Dict[str, Any], None]: + """ + Iterate through JSON files (regular or zip) and yield documents. + + Args: + file_path: Path to JSON or ZIP file + is_zip: Whether the file is a ZIP archive + limit_pack: Maximum number of packs to process + chunk_size: Number of documents per chunk + delay_between_chunks: Delay between chunks + + Yields: + Documents with source and id + """ + if not os.path.exists(file_path): + print(f"❌ File not found: {file_path}") + return + + try: + if is_zip: + pack_no = 0 + with zipfile.ZipFile(file_path, 'r') as z_object: + file_list = z_object.namelist() + + if limit_pack: + file_list = file_list[:limit_pack] + + print(f'📦 Processing ZIP: {len(file_list)} files') + + for filename in file_list: + pack_no += 1 + print(f'📄 Processing: {filename}') + + with z_object.open(filename) as file: + file_content = file.read().decode('utf-8') + data = json.loads(file_content) + + # Yield documents in chunks + async for chunk in self.chunk_list_async_generator( + data, chunk_size, delay_between_chunks + ): + for item in chunk: + yield { + "source": item.get("_source", {}), + "id": item.get("_id", item.get("id", "")) + } + + else: + # Regular JSON file + with open(file_path, 'r', encoding='utf-8') as file: + data = json.load(file) + + # Yield all documents + for item in data: + yield { + "source": item.get("_source", {}), + "id": item.get("_id", item.get("id", "")) + } + + except Exception as e: + print(f"❌ Error iterating JSON file: {e}") + raise + + + async def file_to_elastic( + self, + file_path: str, + index_name: str, + limit_pack: Optional[int] = None, + fields: Optional[List[str]] = None, + chunk_size: int = 1000, + delay_between_chunks: float = 0.1 + ) -> Dict[str, Any]: + """ + Import data from JSON file to Elasticsearch. + + Args: + file_path: Path to JSON file + index_name: Target index name + limit_pack: Maximum number of documents to import + fields: Specific fields to import (None for all) + chunk_size: Number of documents per chunk + delay_between_chunks: Delay between chunks + + Returns: + Import result + """ + if not os.path.exists(file_path): + return { + "success": False, + "error": f"File not found: {file_path}", + "imported_count": 0 + } + + try: + imported_count = 0 + bulk_actions = [] + + async for document in self.iterate_json_file( + file_path=file_path, + is_zip=False, + limit_pack=limit_pack, + chunk_size=chunk_size, + delay_between_chunks=delay_between_chunks + ): + # Filter fields if specified + source_data = document["source"] + if fields: + filtered_data = {} + for field in fields: + if field in source_data: + filtered_data[field] = source_data[field] + source_data = filtered_data + + # Prepare bulk action + action = { + "_op_type": "index", + "_index": index_name, + "_id": document["id"], + "_source": source_data + } + bulk_actions.append(action) + + imported_count += 1 + + # Process in chunks + if len(bulk_actions) >= chunk_size: + bulk_result = await self.bulk_insert( + index=index_name, + documents=bulk_actions + ) + bulk_actions = [] + + if bulk_result.get("errors"): + print(f"⚠️ Bulk errors: {len(bulk_result['errors'])}") + + # Process remaining actions + if bulk_actions: + bulk_result = await self.bulk_insert( + index=index_name, + documents=bulk_actions + ) + + # Refresh index + await self.client.indices.refresh(index=index_name) + + # Get final count + count_result = await self.client.count(index=index_name) + final_count = count_result['count'] + + return { + "success": True, + "imported_count": imported_count, + "final_count": final_count, + "file_path": file_path + } + + except Exception as e: + return { + "success": False, + "error": str(e), + "imported_count": imported_count + } + + + async def zip_file_to_elastic( + self, + file_path: str, + index_name: str, + limit_pack: Optional[int] = None, + fields: Optional[List[str]] = None + ) -> Dict[str, Any]: + """ + Import data from ZIP file to Elasticsearch. + + Args: + file_path: Path to ZIP file + index_name: Target index name + limit_pack: Maximum number of packs to process + fields: Specific fields to import + + Returns: + Import result + """ + if not os.path.exists(file_path): + return { + "success": False, + "error": f"ZIP file not found: {file_path}", + "imported_count": 0 + } + + try: + total_imported = 0 + file_count = 0 + + with zipfile.ZipFile(file_path, 'r') as z_object: + file_list = z_object.namelist() + + if limit_pack: + file_list = file_list[:limit_pack] + + print(f'🗜️ Processing {len(file_list)} files from ZIP') + + for filename in file_list: + file_count += 1 + print(f'📦 Importing from: {filename}') + + with z_object.open(filename) as file: + file_content = file.read().decode('utf-8') + data = json.loads(file_content) + + # Prepare bulk actions + bulk_actions = [] + for item in data: + source_data = item.get("_source", {}) + + # Filter fields if specified + if fields: + filtered_data = {} + for field in fields: + if field in source_data: + filtered_data[field] = source_data[field] + source_data = filtered_data + + action = { + "_op_type": "index", + "_index": index_name, + "_id": item.get("_id", item.get("id", "")), + "_source": source_data + } + bulk_actions.append(action) + + # Bulk insert + if bulk_actions: + bulk_result = await self.bulk_insert( + index=index_name, + documents=bulk_actions + ) + + if bulk_result.get("success", 0) > 0: + total_imported += bulk_result["success"] + + if bulk_result.get("errors"): + print(f"⚠️ Errors in {filename}: {len(bulk_result['errors'])}") + + # Refresh index and get count + await self.client.indices.refresh(index=index_name) + count_result = await self.client.count(index=index_name) + + return { + "success": True, + "imported_count": total_imported, + "file_count": file_count, + "final_count": count_result['count'], + "file_path": file_path + } + + except Exception as e: + return { + "success": False, + "error": str(e), + "imported_count": total_imported, + "file_count": file_count + } + + + async def mapping_index( + self, + index_name: str, + new_mapping: Dict[str, Any], + reindex_body: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Update index mapping by creating new index and reindexing. + + Args: + index_name: Index name to update + new_mapping: New mapping configuration + reindex_body: Additional reindex parameters + + Returns: + Operation result + """ + try: + temp_index_name = f"{index_name}_temp_{int(time.time())}" + + # Create temporary index with new mapping + create_result = await self.client.indices.create( + index=temp_index_name, + body=new_mapping + ) + + if not create_result.get("acknowledged", False): + return { + "success": False, + "error": "Failed to create temporary index", + "temp_index": temp_index_name + } + + # Reindex data + reindex_body_base = { + "source": {"index": index_name}, + "dest": {"index": temp_index_name} + } + + if reindex_body: + reindex_body_base.update(reindex_body) + + reindex_result = await self.client.reindex( + body=reindex_body_base, + wait_for_completion=True + ) + + # Swap indices + if reindex_result.get("failures"): + # Clean up temporary index on failure + await self.delete_index_if_exists(temp_index_name) + return { + "success": False, + "error": "Reindex failed with failures", + "failures": reindex_result["failures"], + "temp_index": temp_index_name + } + + # Delete original index and create alias + await self.delete_index_if_exists(index_name) + await self.client.indices.put_alias( + index=temp_index_name, + name=index_name + ) + + return { + "success": True, + "reindexed": reindex_result.get("total", 0), + "created": reindex_result.get("created", 0), + "temp_index": temp_index_name, + "original_index": index_name + } + + except Exception as e: + # Clean up on error + await self.delete_index_if_exists(temp_index_name) + return { + "success": False, + "error": str(e), + "temp_index": temp_index_name + } diff --git a/app/core/elastic_helper.py b/app/core/elastic_helper.py new file mode 100644 index 0000000..db48107 --- /dev/null +++ b/app/core/elastic_helper.py @@ -0,0 +1,1071 @@ +import zipfile +import sys +import os +import json +from time import sleep +from elasticsearch import Elasticsearch,helpers + +class ElasticHelper(): + # ----------------------------start--------------------------- + # کلاس ElasticHelper: + # نوع ورودی: بدون ورودی مستقیم در تعریف کلاس + # نوع خروجی: شیء از نوع ElasticHelper + # عملیات: + # - متغیرهای کلاسی برای شمارش و مدیریت عملیات تعریف می‌کند + # - مسیر پیش‌فرض مپینگ‌ها را تنظیم می‌کند + # -----------------------------end---------------------------- + + counter = 0 + total = 0 + id = "" + path_mappings = os.getcwd() + '/repo/_other/' + + # ----------------------------start--------------------------- + # متد __init__: + # نوع ورودی: + # - es_url: آدرس Elasticsearch (str) - پیش‌فرض "http://127.0.0.1:6900" + # - es_pass: رمز عبور (str) - پیش‌فرض خالی + # - es_user: نام کاربری (str) - پیش‌فرض "elastic" + # - path_mappings: مسیر مپینگ‌ها (str) - پیش‌فرض خالی + # نوع خروجی: شیء ElasticHelper + # عملیات: + # - اتصال به Elasticsearch را برقرار می‌کند + # - در صورت وجود رمز عبور، از احراز هویت استفاده می‌کند + # - تا 10 بار برای اتصال مجدد تلاش می‌کند (هر بار 5 ثانیه انتظار) + # - در صورت عدم موفقیت، پیام خطا نمایش داده می‌شود + # -----------------------------end---------------------------- + def __init__(self, es_url="http://127.0.0.1:6900", es_pass="", es_user="elastic", path_mappings = ""): + + if path_mappings : + self.path_mappings = path_mappings + + if es_pass == '' : + self.es = Elasticsearch(es_url) + else: + self.es = Elasticsearch( + es_url, + http_auth=(es_user, es_pass), + ) + + # print(es_url) + # print(self.es) + + self.success_connect = False + for a in range(0,10): + try : + if not self.es.ping(): + print('elastic not ping, sleep 30 s : ', a) + sleep(5) + continue + else: + self.success_connect = True + break + + except Exception as e: + break + if not self.success_connect : + print('******','not access to elastic service') + return + + + self.counter = 0 + self.total = 0 + self.id = "" + + def search(self, **params): + try: + res = self.es.search(**params) + except: + return {'hits':{'hits':[]}} + return res + + def get_doctument(self, index_name, id): + res = self.es.get(index=index_name, id=id) + return res + + def exist_doctument(self, index_name, id): + res = self.es.exists(index=index_name, id=id) + return res + + + # ----------------------------start--------------------------- + # متد update_index_doc: + # نوع ورودی: + # - is_update_state: تعیین عملیات (update یا index) (bool) + # - index_name_o: نام اندیس (str) + # - eid: شناسه سند (str) + # - data: داده‌های سند (dict) + # نوع خروجی: پاسخ Elasticsearch (dict) + # عملیات: + # - اگر is_update_state=True باشد: سند را آپدیت می‌کند + # - در غیر این صورت: سند جدید ایجاد می‌کند + # -----------------------------end---------------------------- + def update_index_doc(self, is_update_state, index_name_o, eid, data): + if is_update_state: + resp = self.es.update(index=index_name_o, id=eid, doc=data) + # resp = self.es.update(index=index_name_o, id=eid, body={'doc':data}) + else: + resp = self.es.index(index=index_name_o, id=eid, document=data) + return resp + + + # ----------------------------start--------------------------- + # متد exportToJsonForAI: + # نوع ورودی: + # - path_back: مسیر ذخیره فایل (str) + # - index_name: نام اندیس (str) + # - out_name: نام فایل خروجی (str) - پیش‌فرض خالی + # - body: بدنه جستجو (dict) - پیش‌فرض خالی + # - fields: لیست فیلدهای مورد نیاز (list) - پیش‌فرض خالی + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - داده‌های اندیس را با استفاده از scroll API دریافت می‌کند + # - پیشرفت عملیات را نمایش می‌دهد + # - داده‌ها را در فایل JSON ذخیره می‌کند + # - اگر fields مشخص شده باشد، فقط فیلدهای مورد نظر را استخراج می‌کند + # -----------------------------end---------------------------- + def exportToJsonForAI(self, path_back, index_name, out_name= '', body={}, fields=[], mode='normal', collapse_field = '') : + print('*' * 50, ' start backup -->', index_name) + self.counter = 0 + sid = None + + out = out_name + if out_name == '' : + out = index_name + + s_res = self.es.search( + index=index_name, + scroll='5m', + size=1000, + body=body + ) + self.total = s_res["hits"]["total"]['value'] + + print('start index = %s' % index_name) + print('total = %d' % self.total) + + sid = s_res['_scroll_id'] + scroll_size = len(s_res['hits']['hits']) + pack_count = 1 + out_json = [] + if mode == 'dict' or collapse_field: + out_json = {} + + prev_collapse_value = '' + pack_collapse = [] + while scroll_size > 0: + "Scrolling..." + self.counter += scroll_size + print("progress -> %.2f %%" % ((self.counter / self.total)*100)) + + #### for test + # if pack_count > 2 : + # break + pack_count += 1 + ############################# + for item in s_res['hits']['hits']: + id = item['_id'] + item2=None + if mode =='id' : # فقط شناسه ها نیاز هست + item2=id + elif fields : + item2={} + item2['id']= id + for kf in fields : + #print(kf) + if kf in item['_source'] : + # print(item['_source'][kf]) + item2[kf] = item['_source'][kf] + elif '.' in kf : + cols = kf.split('.') + subsource = item['_source'] + for sub in cols : + if sub in subsource : + subsource = subsource[sub] + continue + else: + break + key = kf.replace(".", "__") + item2[key] = subsource + + #exit() + else : + item2={} + item2=item + + if collapse_field and collapse_field in item['_source']: + collapse_value = item['_source'][collapse_field] + if not prev_collapse_value : + prev_collapse_value = collapse_value + + if not collapse_value == prev_collapse_value : + out_json[prev_collapse_value] = pack_collapse + pack_collapse = [] + prev_collapse_value = collapse_value + + pack_collapse.append(item2) + + elif mode == 'dict': + out_json[id]=item2 + else : + out_json.append(item2) + + + s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000) + sid = s_res['_scroll_id'] + scroll_size = len(s_res['hits']['hits']) + + sid = None + + if collapse_field and prev_collapse_value and pack_collapse : + out_json[prev_collapse_value] = pack_collapse + + with open(path_back + "/"+ out, "w", encoding="utf-8") as fout: + json.dump(out_json, fout, ensure_ascii=False, indent=4) + + ############################## + + # ----------------------------start--------------------------- + # متد backupIndexToZipfile: + # نوع ورودی: + # - path_back: مسیر ذخیره فایل (str) + # - index_name: نام اندیس (str) + # - out_name: نام فایل خروجی (str) - پیش‌فرض خالی + # - body: بدنه جستجو (dict) - پیش‌فرض {"size":1000} + # - byzip: تعیین فرمت خروجی (bool) - پیش‌فرض True + # - fields: لیست فیلدهای مورد نیاز (list) - پیش‌فرض خالی + # - noFields: لیست فیلدهای مورد حذف (list) - پیش‌فرض خالی + # نوع خروجی: bool (True اگر داده وجود داشته باشد) + # عملیات: + # - داده‌های اندیس را با استفاده از scroll API دریافت می‌کند + # - پیشرفت عملیات را نمایش می‌دهد + # - داده‌ها را در فایل ZIP یا JSON ذخیره می‌کند + # - اگر fields مشخص شده باشد، فقط فیلدهای مورد نظر را استخراج می‌کند + # - اگر noFields مشخص شده باشد، فیلدهای مورد نظر را حذف می‌کند + # -----------------------------end---------------------------- + def backupIndexToZipfile(self, path_back, index_name, file_name= '', body={"size":1000}, byzip = True, fields=[], noFields=[]) : + print('*' * 50, ' start backup -->', index_name) + self.counter = 0 + sid = None + + out = index_name + + if file_name == '' : + file_name = index_name + + + if body == {} : + s_res = self.es.search( + index=index_name, + scroll='5m', + size=1000 + ) + else: + s_res = self.es.search( + index=index_name, + scroll='5m', + body=body + ) + self.total = s_res["hits"]["total"]['value'] + if self.total == 0 : + print('total index_name by query = %d' % self.total) + return False + + if byzip: + fout = zipfile.ZipFile(path_back + "/"+ file_name + '.zip', 'w') + else: + fout = open( path_back + "/"+ file_name + '.json', 'a+' , encoding='utf-8') + + + print('start index = %s' % index_name) + print('total = %d' % self.total) + + sid = s_res['_scroll_id'] + scroll_size = len(s_res['hits']['hits']) + file_count = 1 + prev_percent = 0 + while scroll_size > 0: + "Scrolling..." + self.counter += scroll_size + percent = int((self.counter / self.total)*100) + if percent != prev_percent : + print("progress -> %.2f %%" % percent) + prev_percent = percent + ############################# + out_json = [] + for item in s_res['hits']['hits']: + if fields : + item2={} + item2['id']=item['_id'] + item2['_source']={} + for kf in fields : + if kf in item['_source'] : + item2['_source'][kf] = item['_source'][kf] + else : + item2=item + + if noFields : + for kf in noFields : + if kf in item2['_source']: + del item2['_source'][kf] + + + out_json.append(item2) + + + text = json.dumps(out_json, ensure_ascii=False) + out_json = [] + if byzip: + filename = out + str(file_count) + '.json' + file_count +=1 + fout.writestr(filename, text.encode('utf-8'), zipfile.ZIP_DEFLATED ) + else: + fout.write(text) + + ############################## + s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000) + sid = s_res['_scroll_id'] + scroll_size = len(s_res['hits']['hits']) + sid = None + fout.close() + + + # ----------------------------start--------------------------- + # متد restorFileToElastic: + # نوع ورودی: + # - path_back: مسیر فایل (str) + # - index_name: نام اندیس (str) + # - app_key: کلید برنامه برای مپینگ (str) - پیش‌فرض خالی + # - queryDelete: تعیین حذف اندیس قبل از بازیابی (bool) - پیش‌فرض True + # - map_name: نام فایل مپینگ (str) - پیش‌فرض خالی + # نوع خروجی: bool + # عملیات: + # - وجود فایل ZIP را بررسی می‌کند + # - اگر queryDelete=True باشد، از کاربر تأیید حذف را می‌گیرد + # - اندیس را ایجاد می‌کند + # - داده‌ها را از فایل ZIP به Elasticsearch بازیابی می‌کند + # -----------------------------end---------------------------- + def restorFileToElastic(self, path_back, index_name, app_key = '', queryDelete = True, map_name='') : + if not os.path.exists(path_back) : + print(' **** error *** path not exist: ', path_back) + return False + + file_path = path_back + '/' + index_name + '.zip' + if not os.path.exists(file_path ) : + return False + + if queryDelete : + # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند + if self.deleteIndex(index_name) : + self.createIndex(index_name, app_key, map_name) + self.zipFileToElastic(file_path, index_name) + else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند + self.createIndex(index_name, app_key, map_name) + self.zipFileToElastic(file_path, index_name) + + def restorFileToElastic2(self, path_file, index_name, app_key = '', queryDelete = True, map_name='') : + if not os.path.exists(path_file) : + print(' **** error *** path not exist: ', path_file) + return False + + file_path = path_file + if not os.path.exists(file_path ) : + return False + + if queryDelete : + # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند + if self.deleteIndex(index_name) : + self.createIndex(index_name, app_key, map_name) + self.zipFileToElastic(file_path, index_name) + else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند + self.createIndex(index_name, app_key, map_name) + self.zipFileToElastic(file_path, index_name) + + + # ----------------------------start--------------------------- + # متد renameElasticIndex: + # نوع ورودی: + # - index_name_i: نام اندیس منبع (str) + # - index_name_o: نام اندیس مقصد (str) + # - app_key: کلید برنامه برای مپینگ (str) - پیش‌فرض خالی + # - map_name: نام فایل مپینگ (str) - پیش‌فرض خالی + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - اندیس مقصد را ایجاد می‌کند + # - با استفاده از reindex API، داده‌ها را از اندیس منبع به مقصد منتقل می‌کند + # - پیشرفت عملیات را نمایش می‌دهد + # -----------------------------end---------------------------- + def renameElasticIndex(self, index_name_i, index_name_o, app_key = '', map_name='') : + + if self.createIndex(index_name_o, app_key, map_name) : + res = self.es.reindex( + body={ + "source": {"index": index_name_i}, + "dest": {"index": index_name_o} + }, + wait_for_completion=False) + + print(type(res)) + print(res) + + taskid = res["task"] if res["task"] else "" + #tasks = client.TasksClient(self.es) + tasks = self.es.tasks + while True : + res = tasks.get(task_id = taskid) + if res["completed"] : + break + + # print( res["task"]) + print( '----', index_name_o, ' imported : ', res["task"]["status"]["total"] , ' / ', res["task"]["status"]["created"]) + sleep(1) + print( '----', index_name_o, ' complated') + + + # ----------------------------start--------------------------- + # متد deleteIndex: + # نوع ورودی: + # - index_name: نام اندیس (str) + # نوع خروجی: bool + # عملیات: + # - وجود اندیس را بررسی می‌کند + # - از کاربر تأیید حذف را می‌گیرد + # - در صورت تأیید، اندیس را حذف می‌کند + # -----------------------------end---------------------------- + def deleteIndex(self, index_name) : + if not self.es.indices.exists(index=index_name) : + print(' ' * 10, " for delete NOT exist index :", index_name ) + return True + + question = 'Is DELETE elastic index (' + index_name +') ? ' + if self.query_yes_no(question) : + self.es.indices.delete(index = index_name) + print('%' * 10 , " Finish DELETE index :", index_name ) + return True + else : + return False + + # ----------------------------start--------------------------- + # متد query_yes_no: + # نوع ورودی: + # - question: سوال نمایش داده شده (str) + # - default: پاسخ پیش‌فرض (str) - پیش‌فرض "no" + # نوع خروجی: bool + # عملیات: + # - سوال را به کاربر نمایش می‌دهد + # - پاسخ کاربر را دریافت و اعتبارسنجی می‌کند + # - True برای 'yes'/'y' و False برای 'no'/'n' برمی‌گرداند + # -----------------------------end---------------------------- + def query_yes_no(self, question, default="no"): + valid = { "yes": True, "y": True, "ye": True, "no": False, "n": False } + if default is None: + prompt = " [y/n] " + elif default == "yes": + prompt = " [Y/n] " + elif default == "no": + prompt = " [y/N] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + print('%'*10, ' quistion ', '%'*10 , '\n') + sys.stdout.write(question + prompt) + choice = input().lower() + if default is not None and choice == "": + return valid[default] + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("لطفا یکی از موارد روبرو را وارد کنید : 'yes' or 'no' " "(or 'y' or 'n').\n") + + # ----------------------------start--------------------------- + # متد createIndexIfNotExist: + # نوع ورودی: + # - index_name_o: نام اندیس (str) + # - mapping_o: مپینگ اندیس (str) - پیش‌فرض خالی + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - وجود اندیس را بررسی می‌کند + # - در صورت عدم وجود، اندیس را با مپینگ مشخص شده ایجاد می‌کند + # -----------------------------end---------------------------- + def createIndexIfNotExist(self, index_name_o, mapping_o=""): + try: + if not self.es.indices.exists(index=index_name_o): + response = self.es.indices.create(index=index_name_o, body=mapping_o) + # print out the response: + print("create index response:", response) + except: + print("....... index exist ! ... not created") + + + # ----------------------------start--------------------------- + # متد createIndex: + # نوع ورودی: + # - index_name: نام اندیس (str) + # - app_key: کلید برنامه برای مپینگ (str) - پیش‌فرض خالی + # - map_name: نام فایل مپینگ (str) - پیش‌فرض خالی + # نوع خروجی: bool + # عملیات: + # - وجود اندیس را بررسی می‌کند + # - مسیر فایل مپینگ را تعیین می‌کند + # - فایل مپینگ را خوانده و اندیس را ایجاد می‌کند + # - در صورت عدم یافت فایل مپینگ، خطا نمایش داده می‌شود + # -----------------------------end---------------------------- + def createIndex(self, index_name, app_key='', map_name=''): + + path_base = self.path_mappings + path_mapping1 = path_base + 'general/' + if app_key == '' : + app_key = 'tavasi' + path_mapping2 = path_base + app_key + '/' + + + if map_name == '': + map_name = index_name + + if self.es.indices.exists(index=index_name) : + print("============== exist index :", index_name ) + return True + + if map_name == 'mj_rg_section' or map_name == 'semantic_search' : + map_name = 'mj_qa_section' + elif map_name[-3]=='_ai': + map_name=[0-len(map_name)-3] + print(map_name) + + mapping_file_path = path_mapping1 + map_name + '.json' + print("mapping_file_path : " , mapping_file_path) + if not os.path.isfile(mapping_file_path): + if not os.path.isfile(mapping_file_path): + mapping_file_path = path_mapping2 + map_name + '.json' + + print("mapping_file_path : " , mapping_file_path) + + # Create Index With Mapping + if os.path.isfile(mapping_file_path): + mapping_file = open( mapping_file_path,'r', encoding='utf-8' ) + mapping_file_read = mapping_file.read() + mapping_data = json.loads(mapping_file_read) + mapping_file.close() + if self.es.indices.exists(index=index_name) : + print("============== exist index :", index_name ) + else : + self.es.indices.create(index = index_name , body = mapping_data) + return True + else: + print('*** error not find maping file elastic : *******', mapping_file_path) + return False + + + # ----------------------------start--------------------------- + # متد updateBulkList: + # نوع ورودی: + # - listData: لیست داده‌ها (list) + # - index_name: نام اندیس (str) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - داده‌ها را به صورت bulk آپدیت می‌کند + # - از helpers.bulk Elasticsearch استفاده می‌کند + # -----------------------------end---------------------------- + def updateBulkList(self, listData, index_name): + chunk_size=100000 + raise_on_error=False + raise_on_exception=False + stats_only=True + yield_ok = False + + actions=[] + for item in listData: + actions.append({ + "_op_type": "update", + "_index": index_name, + "_id" : item['_id'], + "doc": item['_source'] + } + ) + helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok ) + + # ----------------------------start--------------------------- + # متد importBulkList: + # نوع ورودی: + # - listData: لیست داده‌ها (list) + # - index_name: نام اندیس (str) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - داده‌ها را به صورت bulk وارد می‌کند + # - از helpers.bulk Elasticsearch استفاده می‌کند + # -----------------------------end---------------------------- + def importBulkList(self, listData, index_name): + chunk_size=100000 + raise_on_error=False + raise_on_exception=False + stats_only=True + yield_ok = False + + for item in listData: + actions = [{ + "_op_type": "index", + "_index": index_name, + "_id" : item['_id'], + "_source": item['_source'] + } + ] + helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok ) + + + # ----------------------------start--------------------------- + # متد importJsonDataToElastic: + # نوع ورودی: + # - jsonData: داده‌های JSON (list) + # - index_name: نام اندیس (str) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - داده‌ها را به صورت bulk وارد می‌کند + # - از helpers.bulk Elasticsearch استفاده می‌کند + # -----------------------------end---------------------------- + def importJsonDataToElastic(self, jsonData, index_name, fields=[]): + chunk_size=100000 + raise_on_error=False + raise_on_exception=False + stats_only=True + yield_ok = False + + actions=[] + + for item in jsonData: + id = item['_id'] if item['_id'] else item['id'] + source = item['_source'] + if fields : + source = {} + for col in fields : + if col in item['_source'] : + source[col] = item['_source'] + + + actions.append({ + "_op_type": "index", + "_index": index_name, + "_id" : id, + "_source": source + }) + helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok ) + + + # ----------------------------start--------------------------- + # متد fileToElastic: + # نوع ورودی: + # - file_path: مسیر فایل (str) + # - index_name: نام اندیس (str) + # - limit_pack: محدودیت تعداد بسته‌ها (int) - پیش‌فرض -1 + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - فایل JSON را خوانده و داده‌ها را به Elasticsearch وارد می‌کند + # - اندیس را refresh می‌کند + # - تعداد اسناد را نمایش می‌دهد + # -----------------------------end---------------------------- + def fileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]): + if not os.path.exists(file_path): + print("file zip:" , file_path , " not exist") + return + print("index:" , index_name , '=>' , file_path ) + self.counter = 0 + with open(file_path) as file: + data = json.loads(file.read()) + self.importJsonDataToElastic(data, index_name, fields) + + self.es.indices.refresh(index=index_name) + print(self.es.cat.count(index=index_name, format="json")) + + # ----------------------------start--------------------------- + # متد zipFileToElastic: + # نوع ورودی: + # - file_path: مسیر فایل ZIP (str) + # - index_name: نام اندیس (str) + # - limit_pack: محدودیت تعداد فایل‌ها (int) - پیش‌فرض -1 + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - فایل ZIP را باز کرده و هر فایل JSON داخل آن را پردازش می‌کند + # - داده‌ها را به Elasticsearch وارد می‌کند + # - اندیس را refresh می‌کند + # - تعداد اسناد را نمایش می‌دهد + # -----------------------------end---------------------------- + def zipFileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]): + if not os.path.exists(file_path): + print("file zip:" , file_path , " not exist for imort to elastic : ", index_name ) + return + + fileNo = 0 + with zipfile.ZipFile(file_path, 'r') as zObject: + fileNo +=1 + print("="*10, " zip fileNo: " , fileNo ," - ( ", index_name," ) | File Numbers:" ,len(zObject.namelist()) , "=" * 10) + + packNo = 0 + self.counter = 0 + for filename in zObject.namelist(): + packNo += 1 + if limit_pack != -1 : + if packNo > limit_pack : + print('limit_data ', index_name, ' ', limit_pack) + break + + print("index:" , index_name , '=>' , filename ) + with zObject.open(filename) as file: + data = json.loads(file.read()) + self.importJsonDataToElastic(data, index_name, fields) + + self.es.indices.refresh(index=index_name) + print(self.es.cat.count(index=index_name, format="json")) + print(" END Of Import to elastic ", index_name ,"\n") + + + # ----------------------------start--------------------------- + # متد iterateJsonFile: + # نوع ورودی: + # - file_path: مسیر فایل (str) + # - isZip: تعیین نوع فایل (ZIP یا JSON) (bool) - پیش‌فرض True + # - limit_pack: محدودیت تعداد فایل‌ها (int) - پیش‌فرض -1 + # نوع خروجی: ژنراتور + # عملیات: + # - اگر isZip=True باشد: فایل ZIP را پردازش می‌کند + # - اگر isZip=False باشد: فایل JSON را پردازش می‌کند + # - داده‌ها را به صورت ژنراتور برمی‌گرداند + # -----------------------------end---------------------------- + def iterateJsonFile(self, file_path, isZip=True, limit_pack = -1): + if not os.path.exists(file_path): + print("file zip:" , file_path , " not exist iterateJsonFile " ) + return + + if isZip : + fileNo = 0 + with zipfile.ZipFile(file_path, 'r') as zObject: + fileNo +=1 + print("="*10, " zip fileNo: " , fileNo ," iterateJsonFile - | File Numbers:" ,len(zObject.namelist()) , "=" * 10) + + packNo = 0 + self.counter = 0 + for filename in zObject.namelist(): + packNo += 1 + if limit_pack != -1 : + if packNo > limit_pack : + print('limit_data iterateJsonFile ', limit_pack) + break + + print("index iterateJsonFile :", '=>' , filename ) + with zObject.open(filename) as file: + data = json.loads(file.read()) + # Yield each entry + # yield data + yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data) + else : + with open(filename, 'r', encoding='utf-8') as file: + data = json.loads(file.read()) + # Yield each entry + # yield from (hit for hit in data) + #return data + yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data) + + + # ----------------------------start--------------------------- + # متد es_iterate_all_documents: + # نوع ورودی: + # - index: نام اندیس (str) + # - body: بدنه جستجو (dict) - پیش‌فرض خالی + # - pagesize: اندازه صفحه (int) - پیش‌فرض 250 + # - scroll_timeout: زمان اسکرول (str) - پیش‌فرض "25m" + # - **kwargs: پارامترهای اضافی + # نوع خروجی: ژنراتور + # عملیات: + # - تمام اسناد اندیس را با استفاده از scroll API دریافت می‌کند + # - پیشرفت عملیات را نمایش می‌دهد + # - داده‌ها را به صورت ژنراتور برمی‌گرداند + # -----------------------------end---------------------------- + def es_iterate_all_documents(self, index, body="", pagesize=250, scroll_timeout="25m", **kwargs): + """ + Helper to iterate ALL values from a single index + Yields all the documents. + """ + is_first = True + while True: + # Scroll next + if is_first: # Initialize scroll + # result = self.es.search(index=index, scroll="2m", **kwargs, body={ + # "size": pagesize + # }) + if body : + result = self.es.search( + index=index, + scroll=scroll_timeout, + **kwargs, + size=pagesize, + body=body + ) + else : + result = self.es.search( + index=index, + scroll=scroll_timeout, + **kwargs, + size=pagesize + ) + + self.total = result["hits"]["total"]["value"] + if self.total > 0: + print("total = %d" % self.total) + is_first = False + else: + # result = es.scroll(body={ + # "scroll_id": scroll_id, + # "scroll": scroll_timeout + # }) + result = self.es.scroll(scroll_id=scroll_id, scroll=scroll_timeout) + + scroll_id = result["_scroll_id"] + hits = result["hits"]["hits"] + self.counter += len(hits) + if self.total > 0 : + print("progress -> %.2f %%" % ((self.counter / self.total) * 100)) + # Stop after no more docs + if not hits: + break + # Yield each entry + yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in hits) + + + def removeCustomFileds(self, index_name_i, fields=[], renameFileds={}, body={}, bulk_update_size=200): + try: + _list = [] + try: + _list = self.es_iterate_all_documents(index_name_i, body) + except Exception as e: + print(e) + + bulk_data = [] + count = 0 + total_count = 0 + for mentry in _list: + count += 1 + + entry = mentry["source"] + id = mentry["id"] + # print(id) + eid = id + + # if (count % 100) == 0 : + # print("%s -> %.2f " % (id , (count / self.total) if self.total > 0 else 0)) + + data_filled = False + data = entry + for col in fields: + if col in data : + del data[col] + + elif '.' in col : + cols = col.split('.') + subsource = entry + for sub in cols : + dCol = subsource.get(sub, None) + if dCol : + subsource = dCol + else : + break + + for col in renameFileds.items(): + if col in data : + dCol = data[col] + data[renameFileds[col]] = dCol + del data[col] + + bulk_data.append({ + '_id': eid, + '_source' : data + }) + + # انتقال دسته جمعی کدها به الاستیک + if len(bulk_data) >= bulk_update_size: + total_count += len(bulk_data) + print('=' * 5, ' update bulk --> ' , 'total='+ str(total_count) , str(count)) + self.importBulkList(bulk_data, index_name_i) + bulk_data = [] + + + if len(bulk_data) >= 0: + total_count += len(bulk_data) + print('=' * 5, ' update bulk --> ' , 'total='+ str(total_count) , str(count)) + self.importBulkList(bulk_data, index_name_i) + bulk_data = [] + + except Exception as e: + # print("1111") + print(e) + + # save_error(id, e) + + + # ----------------------------start--------------------------- + # متد moveCustomFileds: + # نوع ورودی: + # - index_name_i: نام اندیس منبع (str) + # - index_name_o: نام اندیس مقصد (str) + # - fields: لیست فیلدهای مورد انتقال (list) - پیش‌فرض خالی + # - renameFileds: دیکشنری تغییر نام فیلدها (dict) - پیش‌فرض خالی + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - تمام اسناد اندیس منبع را دریافت می‌کند + # - فیلدهای مشخص شده را استخراج و به اندیس مقصد انتقال می‌دهد + # - اگر renameFileds وجود داشته باشد، نام فیلدها را تغییر می‌دهد + # -----------------------------end---------------------------- + def moveCustomFileds(self, index_name_i, index_name_o, fields=[], renameFileds={}): + try: + body = {} + list = [] + try: + list = self.es_iterate_all_documents(index_name_i) + except Exception as e: + print(e) + + count = 0 + for mentry in list: + count += 1 + + entry = mentry["source"] + id = mentry["id"] + # print(id) + eid = id + + if (count % 100) == 0 : + print("%s -> %.2f " % (id , (count / self.total) if self.total > 0 else 0)) + + data_filled = False + data = {} + for col in fields: + + if '.' in col : + cols = col.split('.') + subsource = entry + for sub in cols : + dCol = subsource.get(sub, None) + if dCol : + subsource = dCol + else : + break + else : + dCol = entry.get(col, None) + + if dCol is None: + continue + + if col in renameFileds : + data[renameFileds[col]] = dCol + else: + data[col] = dCol + + data_filled = True + + if not data_filled : + continue + + try: + resp = self.update_index_doc(True, index_name_o, eid, data) + except Exception as e: + print(e) + # save_error(id, e) + + except Exception as e: + # print("1111") + print(e) + + # save_error(id, e) + + # ----------------------------start--------------------------- + # متد mappingIndex: + # نوع ورودی: + # - index_name_i: نام اندیس (str) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - توضیح می‌دهد که تغییر مپینگ از طریق پایتون امکان‌پذیر نیست + # - باید اندیس جدیدی با مپینگ مطلوب ایجاد و رایندکس شود + # -----------------------------end---------------------------- + def mappingIndex(self, index_name_i): + # فقط از طریق کیبانا میشه تغییر مپ داد + + # با پایتون نمیشه + # باید ایندکس جدیدی با مپ مطلوب ایجاد کرد و رایندکس کرد + pass + + # ----------------------------start--------------------------- + # متد updateByQueryIndex: + # نوع ورودی: + # - index_name_i: نام اندیس (str) + # - body: بدنه آپدیت (dict) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - اسناد را با استفاده از update_by_query API آپدیت می‌کند + # - در صورت خطا، پیام خطا نمایش داده می‌شود + # -----------------------------end---------------------------- + def updateByQueryIndex(self, index_name_i, body): + ## sample + # body = { + # "script": { + # "inline": "ctx._source.Device='Test'", + # "lang": "painless" + # }, + # "query": { + # "match": { + # "Device": "Boiler" + # } + # } + # } + try: + self.es.update_by_query(body=body, index=index_name_i) + + except Exception as e: + print(e) + # save_error(id, e) + + + # ----------------------------start--------------------------- + # متد deleteByQueryIndex: + # نوع ورودی: + # - index_name_i: نام اندیس (str) + # - body: بدنه حذف (dict) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - اسناد را با استفاده از delete_by_query API حذف می‌کند + # - در صورت خطا، پیام خطا نمایش داده می‌شود + # -----------------------------end---------------------------- + def deleteByQueryIndex(self, index_name_i, body): + ## sample + # body = { + # "query": { + # "match": { + # "Device": "Boiler" + # } + # } + # } + try: + self.es.delete_by_query(index=index_name_i, body=body ) + + except Exception as e: + print(e) + # save_error(id, e) + + # ----------------------------start--------------------------- + # متد delete_by_ids: + # نوع ورودی: + # - index_name_i: نام اندیس (str) + # - ids: لیست شناسه‌ها (list) + # نوع خروجی: بدون خروجی مستقیم + # عملیات: + # - اسناد با شناسه‌های مشخص شده را حذف می‌کند + # - در صورت خطا، پیام خطا نمایش داده می‌شود + # -----------------------------end---------------------------- + def delete_by_ids(self, index_name_i, ids): + try: + # ids = ['test1', 'test2', 'test3'] + + query = {"query": {"terms": {"_id": ids}}} + res = self.es.delete_by_query(index=index_name_i, body=query) + print(res) + + except Exception as e: + print(e) + # save_error(id, e) diff --git a/app/core/elastic_query_builder.py b/app/core/elastic_query_builder.py new file mode 100644 index 0000000..920023e --- /dev/null +++ b/app/core/elastic_query_builder.py @@ -0,0 +1,925 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, List, Optional, Union + +from app.core.map_index_reader import MapIndexReader +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) + + +class ElasticQueryBuilder: + """Builds Elasticsearch queries based on configs and request models. + + This class consolidates functionality from both elastic_query_builder.py and + elastic_query_builder_1.py to provide a comprehensive query building solution. + """ + + def __init__(self, map_index_reader: Optional[MapIndexReader] = None): + """Initialize the query builder with an optional MapIndexReader instance. + + Args: + map_index_reader: Optional MapIndexReader instance for backward compatibility + """ + self.map_index_reader = map_index_reader + self.stopwords_fr = [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "دار", + "داشت", + "داشتن", + "خواه", + "خواست", + "خواستن", + "گوی", + "گفت", + "گفتن", + "گیر", + "گرفت", + "گرفتن", + "آی", + "آمد", + "آمدن", + "توان", + "توانستن", + "یاب", + "یافتن", + "آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno", + "است", + "یعنی", + "یابی", + "بعد", + "اینکه", + "وقتی", + "دارد", + "ولی", + "" + ] + + # ============================================================================ + # CORE ... + # ============================================================================ + + def get_source_fileds(self, reader, request:Dict): + # Source filtering + + if request and request.include_fields: + includes = request.include_fields or reader.get_include_fields() + else: + includes = reader.get_include_fields() + + if request and request.exclude_fields: + excludes = request.exclude_fields or reader.get_exclude_fields() + else: + excludes = reader.get_exclude_fields() + + _source = {} + if len (includes) and len (excludes) : + _source = {"includes": includes, "excludes": excludes} + elif len (includes) : + _source["includes"] = includes + elif len (excludes) : + _source["excludes"] = excludes + else : + return None + + return _source + + + # ============================================================================ + # CORE SEARCH QUERY BUILDING METHODS + # ============================================================================ + + + def build_search_query(self, reader: MapIndexReader, request: SearchRequest) -> Dict[str, Any]: + query: Dict[str, Any] = {"bool": {"must": [], "filter": [], "should": [], "must_not": []}} + # Basic full-text search + normal = [] + if request.query: + normal = reader.get_query_normal(request.query, request.search_type) + query["bool"]["should"] = normal + + # Filters + if request.filters: + for field_key, value in request.filters.items(): + field = reader.get_filter_key(field_key) + print("request.filters", field_key, field, value ) + if not field : + continue + if isinstance(value, dict) and any(k in value for k in ("gte", "lte", "gt", "lt")): + query["bool"]["filter"].append({"range": {field: value}}) + elif isinstance(value, list): + query["bool"]["filter"].append({"terms": {field: value}}) + else: + query["bool"]["filter"].append({"term": {field: value}}) + + body: Dict[str, Any] = { + "query": query, + "from": request.from_, + "size": request.size, + "track_total_hits": request.track_total_hits, + } + + # Source filtering + _source = self.get_source_fileds(reader, request) + if _source : + body["_source"] = _source + + + # Sorting + sort: List[Dict[str, Any]] = [] + if request.sort: + for item in request.sort: + if isinstance(item, dict): + sort.append(item) + else: + # Handle string forms: "field:asc", "field:desc, lasttitle, title" + direction = "asc" + field_name = item + if ":" in item: + field_name, direction = item.split(":", 1) + if reader.is_sortable(field_name) or field_name == "_score": + sort.append({field_name: {"order": direction}}) + else: + # eg : "lasttitle, title" + _sort = reader.get_sort_key("item") + if _sort : + sort.append(_sort) + + else: + sort = reader.default_sort() + + if sort: + body["sort"] = sort + + # Highlight + highlight = {} + if normal : # اگر کوئری وجود داشته باشد، هایلایت لازم هست + highlight = request.highlight if request.highlight is not None else reader.default_highlight() + if highlight: + highlight_tmp = highlight + if not 'require_field_match' in highlight_tmp : + highlight_tmp['require_field_match'] = False + if not 'fragment_size' in highlight_tmp : + highlight_tmp['fragment_size'] = 200 + if not 'number_of_fragments' in highlight_tmp : + highlight_tmp['number_of_fragments'] = 3 + if not 'boundary_scanner' in highlight_tmp : + highlight_tmp['boundary_scanner'] = 'sentence' + + body["highlight"] = highlight_tmp + + + # suggest + # ...... + + + # Add aggregations + aggregation_fields = request.aggregation_fields if request.aggregation_fields is not None else reader.default_aggregation_fields() + aggs = {} + aggregation_fields_for_itreate = aggregation_fields.items() if isinstance(aggregation_fields,dict) else aggregation_fields + for field, value in aggregation_fields_for_itreate: + if field in reader.list_fields(): + nead_keyword, field_type, field_meta = reader.needs_keyword_suffix(field) + + name = field + size = 10 + if isinstance(value, int) : + size = value + elif isinstance(value, dict) : + if 'size' in value : + size = value['size'] + if 'name' in value : + name = value['name'] + + if nead_keyword: + aggs[f"{name}"] = { + "terms": { + "field": field+'.keyword', + "size": size + } + } + elif field_type in ["integer", "long", "float", "double", "date"]: + aggs[f"{name}"] = { + "terms": { + "field": field, + "size": size + } + } + + # elif field_type in ["integer", "long", "float", "double"]: + # aggs[f"{field}_stats"] = { + # "stats": {"field": field}, + # "size": value + # } + # elif field_type == "date": + # aggs[f"{field}_histogram"] = { + # "date_histogram": { + # "field": field, + # "size": value, + # "calendar_interval": "1y" + # } + # } + + if aggs: + body["aggs"] = aggs + + # Collapse + collapse_field = request.collapse_field + + collapse_field_info = reader.get_collapse_field(collapse_field) + if collapse_field_info: + body["collapse"] = { + "field": collapse_field, + "inner_hits" :{ + "name": "by_collapse", + "size" : 5 + } + } + + if highlight : + body["collapse"]["inner_hits"]["highlight"] = highlight + if collapse_field_info and collapse_field_info.sort : + body["collapse"]["inner_hits"]["sort"] = collapse_field_info.sort + if collapse_field_info and collapse_field_info.size : + body["collapse"]["inner_hits"]["size"] = collapse_field_info.size + if collapse_field_info and collapse_field_info.name : + body["collapse"]["inner_hits"]["name"] = collapse_field_info.name + if not "aggs" in body : + body["aggs"] = {} + + # Add collapse-specific aggregations + # body["aggs"]['total'] = { + # 'cardinality': { + # 'field': collapse_field + # } + # } + + body["aggs"]['total_collapse'] = { + 'cardinality': { + 'field': collapse_field, + 'precision_threshold': 200000 # For accuracy in calculations + } + } + + # Search after + if request.search_after: + body["search_after"] = request.search_after + + + return body + + # Enhanced query building methods utilizing new field properties + def build_enhanced_search_query(self, reader: MapIndexReader, request: SearchRequest) -> Dict[str, Any]: + """Build an enhanced search query with advanced field handling. + + Args: + reader: MapIndexReader instance for index configuration + request: SearchRequest containing search parameters + + Returns: + Enhanced Elasticsearch query body + """ + # Build basic query + body = self.build_search_query(reader, request) + + + # Source filtering + _source = self.get_source_fileds(reader, request) + if _source : + body["_source"] = _source + + + # Enhanced sorting with field validation + if body.get("sort"): + validated_sort = [] + for sort_item in body["sort"]: + if isinstance(sort_item, dict): + for field, config in sort_item.items(): + if field == "_score" or reader.is_sortable(field): + validated_sort.append(sort_item) + break + else: + validated_sort.append(sort_item) + body["sort"] = validated_sort + + return body + + def build_aggregation_query(self, reader: MapIndexReader, request: SearchRequest, + aggregation_fields: List[str]) -> Dict[str, Any]: + """Build a query with aggregations for specified fields. + + Args: + reader: MapIndexReader instance for index configuration + request: SearchRequest containing search parameters + aggregation_fields: List of fields to aggregate on + + Returns: + Query body with aggregations + """ + # Build base query + body = self.build_field_aware_query(reader, request) + + # Add aggregations + aggs = {} + for field in aggregation_fields: + if field in reader.list_fields(): + field_type = reader.get_field_type(field) + field_meta = reader.get_field_meta(field) + + if field_type == "keyword" or field_meta.get("needs_keyword", False): + aggs[f"{field}_counts"] = { + "terms": { + "field": field, + "size": 100 + } + } + elif field_type in ["integer", "long", "float", "double"]: + aggs[f"{field}_stats"] = { + "stats": {"field": field} + } + elif field_type == "date": + aggs[f"{field}_histogram"] = { + "date_histogram": { + "field": field, + "calendar_interval": "1y" + } + } + + if aggs: + body["aggs"] = aggs + + return body + + def build_export_query(self, reader: MapIndexReader, request: SearchRequest) -> Dict[str, Any]: + """Build a query optimized for data export. + + Args: + reader: MapIndexReader instance for index configuration + request: SearchRequest containing search parameters + + Returns: + Export-optimized query body + """ + # Build base query + body = self.build_field_aware_query(reader, request) + + # Source filtering + _source = self.get_source_fileds(reader, request) + if _source : + body["_source"] = _source + + + # Remove highlighting for export + if "highlight" in body: + del body["highlight"] + + # Remove aggregations for export + if "aggs" in body: + del body["aggs"] + + return body + + def build_update_by_query(self, reader: MapIndexReader, request: UpdateByQueryRequest) -> Dict[str, Any]: + """Build update by query body. + + Args: + reader: MapIndexReader instance for index configuration + request: UpdateByQueryRequest containing update parameters + + Returns: + Update by query body + """ + # Build query portion reused from search builder + search_body = self.build_search_query( + reader, + SearchRequest( + query=None, + phrase=False, + filters=request.filters, + sort=None, + include_fields=None, + exclude_fields=None, + from_=0, + size=0, + collapse_field=None, + highlight=None, + track_total_hits=False, + search_after=None, + search_fields=None, + default_search_field="_all", + ), + ) + + # Remove fields not applicable to update_by_query + update_query = {"query": search_body["query"]} + + script = request.script + if not script and request.set_fields: + # Build painless script to set fields + assigns = [] + params: Dict[str, Any] = {} + for k, v in request.set_fields.items(): + assigns.append(f"ctx._source.{k} = params.{k};") + params[k] = v + script = {"source": " ".join(assigns), "params": params} + + body: Dict[str, Any] = {"query": update_query["query"]} + if script: + body["script"] = script + return body + + def build_delete_by_query(self, reader: MapIndexReader, request: DeleteByQueryRequest) -> Dict[str, Any]: + """Build delete by query body. + + Args: + reader: MapIndexReader instance for index configuration + request: DeleteByQueryRequest containing delete parameters + + Returns: + Delete by query body + """ + # Build query portion reused from search builder + search_body = self.build_search_query( + reader, + SearchRequest( + query=None, + phrase=False, + filters=request.filters, + sort=None, + include_fields=None, + exclude_fields=None, + from_=0, + size=0, + collapse_field=None, + highlight=None, + track_total_hits=False, + search_after=None, + search_fields=None, + default_search_field="_all", + ), + ) + return {"query": search_body["query"]} + + # ============================================================================ + # ADVANCED SEARCH AND FILTERING METHODS + # ============================================================================ + + + def create_filter_term(self, value: str, field: str) -> Dict[str, Any]: + """Create filter term for Elasticsearch query. + + Args: + value: Value to filter by (supports $ separated multiple values) + field: Field name to filter on + + Returns: + Filter term dictionary + """ + if not value: + return {'term': {field: ''}} + + value_items = value.split('$') if value else [] + + if len(value_items) < 2: + return {'term': {field: value}} + else: + # Multiple values - create bool should query + should_clauses = [] + for item in value_items: + should_clauses.append({'term': {field: item}}) + return {'bool': {'should': should_clauses}} + + def parse_advance_search(self, query_normal: str) -> tuple[str, List[Dict[str, str]]]: + """Parse advanced search queries with hashtags. + + Args: + query_normal: The input query string + + Returns: + Tuple of (cleaned_query, parsed_tags) + """ + pattern = r'#(?P[^\b]+?) ' + matches = list(re.finditer(pattern, query_normal)) + + q_normal = '' + index = 0 + res = [] + key = '' + key_prev = '' + + for match in matches: + key_prev = key + key = match.group('tag') + m_index = match.start() + + if index < m_index: + sub = query_normal[index:m_index - 1] + if index == 0: + q_normal += sub + else: + res.append({'key': key_prev, 'value': sub}) + + index += len(sub) + + index = m_index + len(key) + 1 # +1 for the # + + if index < len(query_normal): + sub = query_normal[index:] + if index == 0: + q_normal += sub + else: + res.append({'key': key, 'value': sub}) + + return q_normal, res + + def get_suggest(self, field: str, query: str) -> List[Dict[str, Any]]: + """Generate a suggestion query for Elasticsearch. + + Args: + field: The field to search in + query: The search query text + + Returns: + Suggestion query structure or empty list if query is empty + """ + if not query: + return [] + + return [{ + 'result': { + 'text': query, + 'term': { + 'field': field + } + } + }] + + def get_query_advance(self, index_key: str, query_normal: str, + iscollapse: int, filter_list: List[Dict[str, Any]], + query_type: str = 'match', normal: List[Dict[str, Any]] = None) -> Dict[str, Any]: + """Build advanced search query with hashtag parsing. + + Args: + index_key: Index key identifier + query_normal: Query string with hashtags + iscollapse: Collapse parameter + filter_list: List of filters + query_type: Type of query (match, match_phrase, and) + normal: Normal query clauses + + Returns: + Dictionary with normal and parent query clauses + """ + if normal is None: + normal = [] + + parent_tags = [] + filter_list = [] + cleaned_query, items = self.parse_advance_search(query_normal) + + res = [] + # Get tags from reader if available + reader = getattr(self, 'reader', None) or self.map_index_reader + if reader and hasattr(reader, 'get_search_advance_tags'): + tags = reader.get_search_advance_tags(index_key, True) + else: + tags = {"tags": {}} + + if not tags.get('tags'): + return {"normal": normal, "parent": parent_tags} + + tags = tags['tags'] + iscollapse = -1 + + for item in items: + key = item['key'] + + if key not in tags: + continue + + value = item['value'].strip() + tag_info = tags[key] + + if iscollapse == -1: + iscollapse = 1 # first tag + iscollapse = iscollapse * tag_info.get('iscollapse', 1) + + boost = 30 + if 'boost_offset' in tag_info: + boost += int(tag_info['boost_offset']) + + tag_keys = tag_info['key'].split(',') + for tag_key in tag_keys: + # Check for range search like date range + term = self.parse_range_advance(value, tag_key) + + if term: # Range term found + # For proper date range search, it needs to be added to filter + filter_list.append(term) + term = {} + continue + elif not term and value != '': # Normal search + if query_type == 'and': + words = value.split(' ') + term = self.get_and_temp(tag_key, words, boost) + elif query_type in ['match', 'match_phrase']: + term = { + query_type: { + tag_key: { + 'query': value, + 'boost': boost + } + } + } + + if term: # Valid term found + # This filter should first search in another index and use results in current index + if 'index_parent' in tag_info: + parent_tags.append({"query": term, "tag": tag_info}) + else: + res.append(term) + + if res: + res = res + normal + else: + res = normal + + return {"normal": res, "parent": parent_tags} + + def get_query_bookmarks_flag(self, ref_ids: List[str], user_id: str, + bookmark_index_name: str = "gn_entity") -> Dict[str, Any]: + """Build query for bookmarks with user filtering. + + Args: + ref_ids: List of reference IDs + user_id: User ID for filtering + bookmark_index_name: Index name for entity data + + Returns: + Query parameters for bookmarks + """ + filter_list = [] + + filter_list.append({"terms": {'ref_id': ref_ids}}) + filter_list.append(self.create_filter_term('bookmark', 'data_type')) + filter_list.append(self.create_filter_term(user_id, 'user_id')) + + params = { + 'index': bookmark_index_name, + 'body': { + '_source': ['ref_id', 'time_edit'], + 'query': self.get_base_query([], filter_list), + 'size': len(ref_ids) + 100 + } + } + + return params + + # ============================================================================ + # QUERY BUILDING UTILITY METHODS + # ============================================================================ + + @staticmethod + def get_synonym_normal_query(synonyms: Dict[str, Union[str, List[str]]], + fields: List[str]) -> List[Dict[str, Any]]: + """Convert synonyms dictionary to Elasticsearch query clauses. + + Args: + synonyms: Dictionary mapping words to their synonyms + Format: {"w1": ["syn1"], "w2": ["syn1", "syn2", "syn3"], "w3": []} + fields: List of fields to search in Elasticsearch + + Returns: + List of Elasticsearch query clauses + """ + normal_queries = [] + + for key, value in synonyms.items(): + # Convert value to list if it's a string + values = [] + if isinstance(value, str): + values = value.split(',') if value else [] + elif isinstance(value, list): + values = value + + # Build the query string + q = key + if values: + # Escape quotes in synonyms and join with OR clauses + escaped_synonyms = [f'\\"{syn}\\"' for syn in values] + q += ' OR (\"' + ") OR (\"".join(escaped_synonyms) + '\")' + + # Create the query clause + normal_queries.append({ + 'query_string': { + 'fields': fields, + 'query': q, + 'default_operator': 'AND' + } + }) + + return normal_queries + + def get_base_query(self, normal: List[Dict[str, Any]], + filter_list: Optional[List[Dict[str, Any]]] = None, + not_list: Optional[List[Dict[str, Any]]] = None, + permit_filter: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]: + """Build base bool query structure. + + Args: + normal: Normal query clauses (should) + filter_list: Filter clauses (must) + not_list: Must not clauses + permit_filter: Permitted filter clauses (should) + + Returns: + Bool query structure + """ + # Initialize empty lists if parameters are None + if filter_list is None: + filter_list = [] + if not_list is None: + not_list = [] + if permit_filter is None: + permit_filter = [] + + query = { + 'bool': { + 'must': [ + { + 'bool': { + 'should': normal, + 'must_not': not_list + } + }, + { + 'bool': { + 'filter': { + 'bool': { + 'must': filter_list, + 'should': permit_filter + } + } + } + } + ] + } + } + + # Clean up empty query parts + if not normal: + query['bool']['must'][0]['bool'].pop('should', None) + elif not not_list: + query['bool']['must'][0]['bool'].pop('must_not', None) + + if not filter_list: + query['bool']['must'][1]['bool']['filter']['bool'].pop('must', None) + elif not permit_filter: + query['bool']['must'][1]['bool']['filter']['bool'].pop('should', None) + + return query + + # ============================================================================ + # HELPER METHODS (to be implemented based on specific needs) + # ============================================================================ + + def parse_range_advance(self, value: str, field: str, type_: str = "time") -> Dict[str, Any]: + value = value.replace(" تا ", "--") + + if "--" not in value: + return {} + + values = (value or "").split("--") + if len(values) != 2: + return {} + + value1 = values[0].strip() + value2 = values[1].strip() + + term: Dict[str, Any] = {} + + if field == "sort_date_timestamp" or type_ == "time": + value1 = value1.replace("/", "-").replace("\\", "-") + value2 = value2.replace("/", "-").replace("\\", "-") + + time1 = self.jalali_to_tsmp(value1, "-") if value1 else "" + time2 = self.jalali_to_tsmp(value2, "-") if value2 else "" + + self.debug_data.append({"value1": value1}) + self.debug_data.append({"value2": value2}) + self.debug_data.append({"time1": time1}) + self.debug_data.append({"time2": time2}) + + term = { + "range": { + field: {} + } + } + if time1: + term["range"][field]["gte"] = f"{time1}000" # ثانیه به میلی‌ثانیه + if time2: + term["range"][field]["lte"] = f"{time2}000" + + elif type_ == "date": + value1 = value1.replace("/", "-").replace("\\", "-") + value2 = value2.replace("/", "-").replace("\\", "-") + + term = { + "range": { + field: {} + } + } + if value1: + term["range"][field]["gte"] = value1 + if value2: + term["range"][field]["lte"] = value2 + + return term + + def get_and_temp(self, field: str, words: List[str], boost: float, del_stop_words: bool = True) -> Dict[str, Any]: + must = [] + count = len(words) + if count == 0: + return {"bool": {"must": []}} + + w_boost = boost / count + + for i, w1 in enumerate(words): + if not w1: # skip empty strings + continue + if del_stop_words and w1 in self.stopwords_fr: + continue + + myboost = w_boost + count - i + temp1 = { + "match": { + field: { + "query": w1, + "boost": myboost + } + } + } + must.append(temp1) + + res = { + "bool": { + "must": must + } + } + return res + + diff --git a/app/core/elastic_wrapper.py b/app/core/elastic_wrapper.py new file mode 100644 index 0000000..07dbbab --- /dev/null +++ b/app/core/elastic_wrapper.py @@ -0,0 +1,55 @@ +import inspect +import asyncio + +class AsyncSafeWrapper: + def __init__(self, obj): + object.__setattr__(self, '_obj', obj) + + def __getattribute__(self, name): + _obj = object.__getattribute__(self, '_obj') + + try: + attr = getattr(_obj, name) + except AttributeError: + raise AttributeError(f"'{type(_obj).__name__}' object has no attribute '{name}'") + + if callable(attr) and not name.startswith('_'): + is_async = inspect.iscoroutinefunction(attr) + + try: + sig = inspect.signature(attr) + params = list(sig.parameters.items()) + for item in params: + if 'index' in item: + index = 'index' + elif 'index_name' in item: + index = 'index_name' + print(f"📋 Method '{name}' parameters: {params}") + except (ValueError, TypeError): + print(f"📋 Method '{name}' - cannot inspect signature") + + if is_async: + async def async_wrapper(*args, **kwargs): + kwargs[index]='use_for_test' + print(f"🔄 Calling async method: {name}") + result = await attr(*args, **kwargs) + print(f"✅ Async method '{name}' completed") + return result + return async_wrapper + else: + def sync_wrapper(*args, **kwargs): + kwargs[index]='use_for_test' + print(f"⚡ Calling sync method: {name}") + result = attr(*args, **kwargs) + print(f"✅ Sync method '{name}' completed") + return result + return sync_wrapper + + return attr + + def __setattr__(self, name, value): + if name == '_obj': + object.__setattr__(self, name, value) + else: + setattr(object.__getattribute__(self, '_obj'), name, value) + \ No newline at end of file diff --git a/app/core/field_processor.py b/app/core/field_processor.py new file mode 100644 index 0000000..3ca6a19 --- /dev/null +++ b/app/core/field_processor.py @@ -0,0 +1,418 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Any, Dict, List, Optional + +from app.core.map_index_reader import MapIndexReader + + +class FieldProcessor: + """ + Handles enhanced field processing, validation, and transformation based on backend.json configuration. + + This class provides utilities for: + - Field validation and type checking + - Data transformation based on join processes + - Field metadata extraction and processing + - Export and import field handling + """ + + def __init__(self, map_index_reader: MapIndexReader): + self.reader = map_index_reader + + # def set_valid_is_array_field(self, reader, document, field, prefix_field = ''): + # if reader.is_array( prefix_field + field ) and field in document and not isinstance(document[field], list): + # document[field] = [document[field]] + # return document + + # def set_valid_document(self, reader, document, field = '', prefix_field = ''): + + # if field and not field in document : + # return document + + # document_temp = document + # if field : + # document_temp = document[field] + + # for doc_field in document_temp.items(): + # if reader.is_object(prefix_field + doc_field) : + # prefix_field_tmp = doc_field + '.' + # document_temp[doc_field] = self.set_valid_document(reader, document_temp[doc_field], doc_field, prefix_field_tmp) + # else: + # document_temp[doc_field] = self.set_valid_is_array_field(reader, document_temp, doc_field, prefix_field) + + # if field : + # document[field] = document_temp + # document[field] = self.set_valid_is_array_field(reader, document, field, prefix_field) + # else : + # document = document_temp + + # return document + + def validate_field_value(self, field_name: str, value: Any, prefix_field: Optional[str] = '', + properties: Optional[Dict] = None) -> Dict[str, Any]: + """ + Validate a field value against its configuration (recursive for arrays/objects). + + Args: + field_name: Name of the field to validate + value: Value to validate + + Returns: + Validation result with status and any errors/warnings + """ + try: + field_meta = self.reader.get_field_prefix_meta(field_name, prefix_field, properties) + validation = { + "valid": True, + "value": value, + "errors": [], + "warnings": [], + "field_name": field_name, + "field_type": field_meta.get("type") + } + + # # Required check + # if self.reader.is_required(prefix_field+field_name) and value is None: + # validation["valid"] = False + # validation["errors"].append("Field is required but value is None") + # return validation + + field_type = field_meta.get("type") + + # Type-specific validation + if field_type == "date": + if not self._is_valid_date(value): + validation["valid"] = False + validation["errors"].append("Invalid date format") + + elif field_type in ["integer", "long"]: + if not self._is_valid_integer(value): + validation["valid"] = False + validation["errors"].append("Value must be an integer") + + elif field_type in ["float", "double"]: + if not self._is_valid_float(value): + validation["valid"] = False + validation["errors"].append("Value must be a number") + + elif field_type == "boolean": + if not isinstance(value, bool): + validation["warnings"].append("Value should be boolean") + + # Array validation (recursive on elements) + if self.reader.is_array(prefix_field + field_name): + if not isinstance(value, list): + validation["warnings"].append("Field is configured as array but value is not a list") + value = [value] + + validated_items = [] + prefix_field = prefix_field + field_name + '.' + for i, item in enumerate(value): + item_validation = self.validate_field_value(field_name, item, prefix_field, field_meta) + validated_items.append(item_validation["value"]) + if not item_validation["valid"]: + validation["valid"] = False + validation["errors"].append( + f"Array item {i}: {', '.join(item_validation['errors'])}" + ) + validation["warnings"].extend(item_validation["warnings"]) + validation["value"] = validated_items + + # Object validation (recursive on fields) + if field_meta.get("object", False): + if not isinstance(value, dict): + validation["valid"] = False + validation["errors"].append("Field is configured as object but value is not a dict") + else: + validated_obj = {} + prefix_field = prefix_field + field_name + '.' + + for sub_field, sub_value in value.items(): + sub_validation = self.validate_field_value(sub_field, sub_value, prefix_field, field_meta) + validated_obj[sub_field] = sub_validation["value"] + + if not sub_validation["valid"]: + validation["valid"] = False + validation["errors"].append( + f"Object field '{sub_field}': {', '.join(sub_validation['errors'])}" + ) + validation["warnings"].extend(sub_validation["warnings"]) + validation["value"] = validated_obj + + return validation + + except Exception as e: + return { + "valid": False, + "errors": [f"Validation error: {str(e)}"], + "warnings": [], + "field_name": field_name, + "field_type": "unknown" + } + + def process_field_value(self, field_name: str, value: Any) -> Any: + """ + Process a field value based on its configuration and join processes. + + Args: + field_name: Name of the field to process + value: Value to process + + Returns: + Processed value + """ + try: + field_meta = self.reader.get_field_meta(field_name) + + # Handle array fields + if field_meta.get("array", False) and not isinstance(value, list): + value = [value] + + # Apply join processes + join_processes = self.reader.get_join_processes(field_name) + if join_processes: + return self._apply_join_processes(field_name, value, join_processes) + + return value + + except Exception: + return value + + def _apply_join_processes(self, field_name: str, value: Any, processes: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Apply join processes to a field value. + + Args: + field_name: Name of the field + value: Original value + processes: List of join processes to apply + + Returns: + Dictionary with original value and processed properties + """ + result = {field_name: value} + + for process in processes: + process_name = process.get("process") + property_name = process.get("property") + + if process_name == "jalaliTotsmp" and property_name: + result[property_name] = self._jalali_to_timestamp(value) + elif process_name == "getYearOfDate" and property_name: + result[property_name] = self._extract_year(value) + + return result + + def _jalali_to_timestamp(self, jalali_date: str) -> Optional[int]: + """ + Convert Jalali date string to timestamp. + + Args: + jalali_date: Jalali date string (e.g., "1403/05/06") + + Returns: + Unix timestamp or None if conversion fails + """ + try: + # Simple Jalali to Gregorian conversion (this is a basic implementation) + # In production, you might want to use a proper Jalali calendar library + if "/" in jalali_date: + parts = jalali_date.split("/") + if len(parts) == 3: + year, month, day = int(parts[0]), int(parts[1]), int(parts[2]) + # Basic conversion (this is simplified) + gregorian_year = year - 621 + # This is a very basic conversion - in production use proper library + return int(datetime(gregorian_year, month, day).timestamp()) + return None + except Exception: + return None + + def _extract_year(self, date_string: str) -> Optional[str]: + """ + Extract year from date string. + + Args: + date_string: Date string + + Returns: + Year as string or None if extraction fails + """ + try: + if "/" in date_string: + parts = date_string.split("/") + if len(parts) >= 1: + return parts[0] + return None + except Exception: + return None + + def _is_valid_date(self, value: Any) -> bool: + """Check if value is a valid date.""" + if isinstance(value, str): + try: + datetime.fromisoformat(value.replace('Z', '+00:00')) + return True + except ValueError: + return False + elif isinstance(value, (int, float)): + try: + datetime.fromtimestamp(value) + return True + except (OSError, ValueError): + return False + return False + + def _is_valid_integer(self, value: Any) -> bool: + """Check if value is a valid integer.""" + try: + int(value) + return True + except (ValueError, TypeError): + return False + + def _is_valid_float(self, value: Any) -> bool: + """Check if value is a valid float.""" + try: + float(value) + return True + except (ValueError, TypeError): + return False + + # def get_export_fields(self) -> List[str]: + # """ + # Get list of fields that should be included in exports. + + # Returns: + # List of exportable field names + # """ + # return [field for field in self.reader.list_fields() if self.reader.get_export_state(field)] + + def get_searchable_fields(self) -> List[str]: + """ + Get list of fields that are searchable. + + Returns: + List of searchable field names + """ + searchable_types = ["text", "keyword", "search_as_you_type"] + return [field for field in self.reader.list_fields() + if self.reader.get_field_type(field) in searchable_types] + + def get_sortable_fields(self) -> List[str]: + """ + Get list of fields that can be used for sorting. + + Returns: + List of sortable field names + """ + return [field for field in self.reader.list_fields() if self.reader.is_sortable(field)] + + def get_filterable_fields(self) -> List[str]: + """ + Get list of fields that can be used for filtering. + + Returns: + List of filterable field names + """ + return [field for field in self.reader.list_fields() + if self.reader.get_field_type(field) in ["keyword", "integer", "long", "float", "double", "boolean", + "date"]] + + # def get_field_labels(self) -> Dict[str, str]: + # """ + # Get mapping of field names to their Persian labels. + + # Returns: + # Dictionary mapping field names to labels + # """ + # labels = {} + # for field in self.reader.list_fields(): + # label = self.reader.get_field_label(field) + # if label: + # labels[field] = label + # return labels + + # ============================================================================ + # ..... + # ============================================================================ + + + def edition_document(self, document: Dict[str, Any]) -> Dict[str, Any]: + default_value_field = self.reader.get_default_value_field() + + for key, value in default_value_field.items(): + if key not in document.keys(): + document[key] = value + return document + + def validate_document(self, document: Dict[str, Any], id: str, check_requirds:bool=True) -> Dict[str, Any]: + """ + Validate an entire document against field configurations. + + Args: + document: Document to validate + + Returns: + Validation results for all fields + """ + validation_results = { + "valid": True, + "field_validations": {}, + "errors": [], + "warnings": [] + } + + # ----------------------------- + # Required check fields + if check_requirds : + for field_name in self.reader.get_required_fields(): + if not field_name in document: + if field_name == 'id' and id: + document['id'] = id + else: + validation_results["valid"] = False + validation_results["errors"].append("Field is required but value is None :" + field_name) + if not validation_results["valid"]: + return validation_results, document + # ----------------------------- + document_new = {} + for field_name, value in document.items(): + if field_name in self.reader.list_fields(): + field_validation = self.validate_field_value(field_name, value) + validation_results["field_validations"][field_name] = field_validation + + if not field_validation["valid"]: + validation_results["valid"] = False + validation_results["errors"].extend(field_validation["errors"]) + else: + document_new[field_name] = field_validation["value"] + + validation_results["warnings"].extend(field_validation["warnings"]) + + return validation_results, document_new + + def process_joinning_document(self, document: Dict[str, Any]) -> Dict[str, Any]: + """ + Process an entire document based on field configurations. + + Args: + document: Document to process + + Returns: + Processed document + """ + processed_doc = {} + + for field_name, value in document.items(): + if field_name in self.reader.list_fields(): + processed_value = self.process_field_value(field_name, value) + if isinstance(processed_value, dict): + processed_doc.update(processed_value) + else: + processed_doc[field_name] = processed_value + else: + processed_doc[field_name] = value + + return processed_doc diff --git a/app/core/map_index_reader.py b/app/core/map_index_reader.py new file mode 100644 index 0000000..5625916 --- /dev/null +++ b/app/core/map_index_reader.py @@ -0,0 +1,705 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, Optional, List +import os + +from app.config.settings import Settings, get_settings +settings: Settings = get_settings() + +class MapIndexReader: + """ + Loads per-type configuration from two JSON files: + - mapping.json: The Elasticsearch index mapping for creation + - backend.json: Metadata for each field and index-level behavior + + Directory structure: + app/schemas/{type_name}_mapping.json + app/schemas/{type_name}_backend.json + """ + + def __init__(self, type_name: str, properties: Optional[Dict]=None, mapping: Optional[Dict]=None) -> None: + self.type_name = type_name + self.base_path = f"{str(settings.base_project)}/app/schemas/" + self._mapping: Dict[str, Any] | None = mapping + self._backend: Dict[str, Any] | None = properties + self._query: Dict[str, Any] | None = None + if not properties : + self.properties = self.backend.get("properties", {}) + + def _create_map_file(self, type_name: str, sub_folder:Optional[str]='') -> None: + path_file = self.base_path + if sub_folder : + path_file = path_file + sub_folder + '/' + path_file = path_file + f"{type_name}_map.json" + path = Path(path_file) + if not path.exists(): + if not self.backend : + return False + + properties = self.backend.get("properties", {}) + properties_map = {} + for field_name, value in properties.items(): + if "fields" in value: + fields = value["fields"] + if "fa" in fields: + fields["fa"]["analyzer"] = "normal_analyzer_fa" + fields["fa"]["search_analyzer"] = "normal_analyzer_fa" + fields["fa"]["search_quote_analyzer"] = "phrase_analyzer_fa" + if "ph" in fields: + fields["ph"]["analyzer"] = "phrase_analyzer_fa" + fields["ph"]["search_analyzer"] = "phrase_analyzer_fa" + fields["ph"]["search_quote_analyzer"] = "phrase_analyzer_fa" + properties_map[field_name] = value + + settings = {} + path_settings = self.base_path + f"settings.json" + with open(path_settings, "r", encoding="utf-8") as f: + settings = json.load(f) + + json_map = { "mappings": {"date_detection": False, "properties": properties_map}, "settings": settings } + with open(path_file, "w", encoding="utf-8") as file: + json.dump(json_map, file, ensure_ascii=False, indent=2) + + + + def _load_json(self, type_name: str, suffix: str, sub_folder:Optional[str]='') -> Dict[str, Any]: + path_file = self.base_path + if sub_folder : + path_file = path_file + sub_folder + '/' + path_file = path_file + f"{type_name}_{suffix}.json" + path = Path(path_file) + if not path.exists(): + if suffix == "map" : + # print("_load_json", type_name) + if not self._create_map_file(type_name, sub_folder) : + return {} + else : + print(f"Config file not found: {path_file}") + raise FileNotFoundError(f"Config file not found: {path_file}") + + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + @property + def mapping(self) -> Dict[str, Any]: + if self._mapping is None: + self._mapping = self._load_json(self.type_name, "map", "mappings") + return self._mapping + + @property + def backend(self) -> Dict[str, Any]: + if self._backend is None: + self._backend = self._load_json(self.type_name, "backend") + return self._backend + + @property + def query(self) -> Dict[str, Any]: + if self._query is None: + self._query = self.backend.get("query", {}) + return self._query + + def get_index_key(self) -> str: + idx = self.backend.get("index", {}) + key = idx.get("index_key") + if not key: + raise ValueError("backend.json must include index_key") + return key + + def get_index_name(self) -> str: + idx = self.backend.get("index", {}) + name = idx.get("name") + if not name: + raise ValueError("backend.json must include index.name") + return name + + def get_index_aliases(self) -> List[str]: + idx = self.backend.get("index", {}) + aliases = idx.get("aliases", []) + if not isinstance(aliases, list): + raise ValueError("backend.index.aliases must be a list") + return aliases + + # def get_field_meta(self, field_name: str) -> Dict[str, Any]: + # if field_name not in self.properties: + # # raise KeyError(f"Field '{field_name}' not defined in backend.json for type '{self.type_name}'") + # return {} + # return self.properties[field_name] + + def get_field_prefix_meta(self, field_name: str, prefix_field: str, properties: Optional[Dict]=None) -> bool: + field_name = prefix_field + field_name + return self.get_field_meta(field_name, properties) + + def get_default_value_field(self) -> Dict[str, Any]: + """Get dict of fields that are default value.""" + try: + dict_default_value = dict(self.backend.get("validation", {}).get("default_value", {})) + return dict_default_value + except Exception: + return {} + + def get_field_meta(self, field_name: str, properties: Optional[Dict]=None) -> bool: + # eg : title + # eg : initial.files + sub_fields = field_name.split('.') + if not properties : + properties = self.properties + + sub_field_end = "" + for field in sub_fields : + if field in properties : + properties = properties[field] + sub_field_end = field + else : + return {} + + if sub_field_end : + return properties + else: + return properties[field_name] + + + + def list_fields(self) -> List[str]: + return list(self.properties.keys()) + + def fields_with_property(self, property_name: str, expected_value: Any = True) -> List[str]: + result: List[str] = [] + for field_name, meta in self.properties.items(): + if meta.get(property_name) == expected_value: + result.append(field_name) + return result + + def is_array(self, field_name: str) -> bool: + list_is_array = list(self.backend.get("is_array", [])) + if not list_is_array : + return False + return bool( field_name in list_is_array) + + def is_required(self, field_name: str) -> bool: + list_is_required = list(self.backend.get("validation", {}).get("required", [])) + if not list_is_required : + return False + return bool( field_name in list_is_required) + + def is_object(self, field_name: str, properties: Dict) -> bool: + properties = self.get_field_meta(field_name, properties) + if not properties: + return False + return bool(properties.get("properties", False)) + + def is_sortable(self, field_name: str) -> bool: + field_meta = self.get_field_meta(field_name) + type = field_meta.get("type", "") + if type == "text": + fields = field_meta.get("fields", {}) + if "keyword" in fields: + return True, "keyword" + result = ( type == "" or type == "object" or type == "dense_vector") + return not result, "" + # return bool(self.get_field_meta(field_name).get("sortable", False)) + + def default_sort(self) -> List[Dict[str, Any]]: + return list(self.query.get("default_sort", [])) + + def default_collapse_field(self) -> Optional[str]: + return self.query.get("collapse_field") + + def default_highlight(self) -> Optional[Dict[str, Any]]: + return self.query.get("highlight", {}) + + def default_aggregation_fields(self) -> Optional[Dict[str, Any]]: + return self.query.get("aggregation_fields", {}) + + # Enhanced functionality from map_index_reader_1.py + + def is_field_valid(self, field_name: str) -> bool: + """Check if a field exists and is valid in the index configuration.""" + try: + return field_name in self.properties + except Exception: + return False + + def get_field_joined(self, field_name: str) -> List[str]: + """Get information about fields that are joined to this field.""" + try: + field_meta = self.get_field_meta(field_name) + return field_meta.get("join_to", []) + except (KeyError, Exception): + return [] + + def needs_keyword_suffix(self, field_name: str) -> bool: + """Check if a field needs the .keyword suffix for exact matching.""" + try: + field_meta = self.get_field_meta(field_name) + field_type = field_meta.get("type", "") + if field_type == "text" and "keyword" in field_meta.get("fields", {}): + return True, field_type, field_meta + return False, field_type, field_meta + # return bool(field_meta.get("needs_keyword", False)) + except (KeyError, Exception): + return False, "", {} + + def in_include_fields(self, field_name: str) -> bool: + try: + include_fields = self.backend.get("include_fields", []) + return field_name in include_fields + except Exception: + return False + + def get_include_fields(self) -> List[str]: + """Get list of field names that should be included in search output.""" + try: + include_fields = self.backend.get("include_fields", []) + return include_fields + except Exception: + return [] + + def in_exclude_fields(self, field_name: str) -> bool: + try: + exclude_fields = self.backend.get("exclude_fields", []) + return field_name in exclude_fields + except Exception: + return False + + + def get_exclude_fields(self) -> List[str]: + """Get list of field names that should be exclude in search output.""" + try: + exclude_fields = self.backend.get("exclude_fields", []) + return exclude_fields + + except Exception: + return [] + + def get_filter_key(self, field_name: str) -> bool: + try: + filter_keys = self.query.get("filter_keys", {}) + if field_name in filter_keys : + return filter_keys[field_name] + return "" + except Exception: + return "" + + def get_sort_key(self, field_name: str) -> bool: + try: + sort_keys = self.backend.get("sort_keys", {}) + if field_name in sort_keys : + return sort_keys[field_name] + return {} + except Exception: + return {} + + def get_collapse_field(self, field_name: str) -> bool: + try: + if field_name == 'normal' : + return {} + collapse_fields = self.query.get("collapse_fields", {}) + if field_name in collapse_fields : + return collapse_fields[field_name] + return {} + except Exception: + return {} + + + def is_field_no_sort(self, field_name: str) -> bool: + """Check if a field should not be used for sorting.""" + try: + field_meta = self.get_field_meta(field_name) + type = field_meta.get("type", "") + if type == "text": + fields = field_meta.get("fields", {}) + if "keyword" in fields: + return False + result = ( type == "" or type == "object" or type == "dense_vector") + return result + except (KeyError, Exception): + return True + + # def get_query_boost_fields(self) -> Dict[str, float]: + # """Get fields with query boost values greater than 0.""" + # boost_fields = {} + # try: + # for field_name, meta in self.properties.items(): + # boost_value = meta.get("query_boost", 0) + # if boost_value > 0: + # boost_fields[field_name] = boost_value + # return boost_fields + # except Exception: + # return {} + + def get_field_type(self, field_name: str) -> Optional[str]: + """Get the Elasticsearch field type for a given field.""" + try: + field_meta = self.get_field_meta(field_name) + return field_meta.get("type") + except (KeyError, Exception): + return None + + def is_field_highlightable(self, field_name: str) -> bool: + """Check if a field should be highlighted in search results.""" + try: + highlight = self.query.get("highlight") + return bool(field_name in highlight) + except (KeyError, Exception): + return False + + def get_required_fields(self) -> List[str]: + """Get list of fields that are required (not nullable).""" + try: + list_is_required = list(self.backend.get("validation", {}).get("required", [])) + return list_is_required + except Exception: + return [] + + def get_searchable_fields(self) -> List[str]: + """Get list of fields that are searchable (text or keyword types).""" + try: + searchable_types = ["text", "keyword", "search_as_you_type"] + return [field_name for field_name, meta in self.properties.items() + if meta.get("type") in searchable_types] + except Exception: + return [] + + def get_query_normal(self, query_value:str, search_type:Optional[str]='normal') -> Optional[list[Dict[str,Any]]]: + try: + normal_fields = self.query.get("normal", {"title":2, "content":1}) + query = [] + for _type, value in normal_fields.items(): + fields = [] + for key, boost in value.items(): + fields.append(f'{key}^{boost}') + + multi_match = { + "multi_match":{ + "query": query_value, + "fields": fields, + } + } + if _type == "phrase" and ( search_type == 'normal' or search_type == 'phrase') : + multi_match["multi_match"]["type"] = "phrase" + query.append(multi_match) + + elif _type == "match" and search_type == 'normal': + query.append(multi_match) + elif _type == "match" and search_type == 'and' : + multi_match["multi_match"]["operator"] = "and" + query.append(multi_match) + + except Exception: + query = [] + + return query + + + def get_search_advance_tags(self, index_key: str = None, active_only: bool = True) -> Dict[str, Any]: + """ + Get advanced search tags configuration from backend.json query.advanced_tags section. + + Args: + index_key: Optional index key filter (for backward compatibility) + active_only: Whether to return only active tags (for backward compatibility) + + Returns: + Dictionary containing advanced search tags configuration with structure: + { + "tags": { + "tag_name": { + "key": "field_path", + "boost_offset": 30, + "collapse_field": "field_name", + "index_parent": "parent_index", + "relation_key": "relation_field" + } + } + } + """ + try: + advanced_tags = self.query.get("advanced_tags", {}) + + # Filter tags if active_only is True (for backward compatibility) + if active_only: + # In the current implementation, all tags are considered active + # You can add additional filtering logic here if needed + pass + + # Return in the expected format + return { + "tags": advanced_tags + } + except Exception as e: + print(f"Error getting search advance tags: {e}") + return {"tags": {}} + + def get_advanced_tag_info(self, tag_name: str) -> Optional[Dict[str, Any]]: + """ + Get information about a specific advanced search tag. + + Args: + tag_name: Name of the tag to get information for + + Returns: + Dictionary containing tag configuration or None if not found + """ + try: + advanced_tags = self.query.get("advanced_tags", {}) + return advanced_tags.get(tag_name) + except Exception: + return None + + def list_advanced_tags(self) -> List[str]: + """ + Get list of all available advanced search tag names. + + Returns: + List of tag names + """ + try: + advanced_tags = self.query.get("advanced_tags", {}) + return list(advanced_tags.keys()) + except Exception: + return [] + + def get_advanced_tags_by_field(self, field_path: str) -> List[str]: + """ + Get list of advanced search tags that use a specific field. + + Args: + field_path: The field path to search for + + Returns: + List of tag names that reference the specified field + """ + try: + advanced_tags = self.query.get("advanced_tags", {}) + matching_tags = [] + + for tag_name, tag_config in advanced_tags.items(): + if tag_config.get("key") == field_path: + matching_tags.append(tag_name) + + return matching_tags + except Exception: + return [] + + def get_collapse_fields(self) -> List[str]: + """ + Get list of all collapse fields used in advanced search tags. + + Returns: + List of unique collapse field names + """ + try: + advanced_tags = self.query.get("advanced_tags", {}) + collapse_fields = set() + + for tag_config in advanced_tags.values(): + collapse_field = tag_config.get("collapse_field") + if collapse_field: + collapse_fields.add(collapse_field) + + return list(collapse_fields) + except Exception: + return [] + + # # New methods for enhanced field properties + # def get_field_label(self, field_name: str) -> Optional[str]: + # """Get the Persian label for a field.""" + # try: + # field_meta = self.get_field_meta(field_name) + # return field_meta.get("label") + # except (KeyError, Exception): + # return None + + # def get_filter_key(self, field_name: str) -> str: + # """Get the filter key for a field.""" + # try: + # field_meta = self.get_field_meta(field_name) + # return field_meta.get("filter_key", "") + # except (KeyError, Exception): + # return "" + + # def can_filter_by_more(self, field_name: str) -> bool: + # """Check if a field can be filtered by multiple values.""" + # try: + # field_meta = self.get_field_meta(field_name) + # return bool(field_meta.get("filter_by_more", False)) + # except (KeyError, Exception): + # return False + + # def get_export_state(self, field_name: str) -> bool: + # """Check if a field should be included in exports.""" + # try: + # field_meta = self.get_field_meta(field_name) + # return bool(field_meta.get("export_state", True)) + # except (KeyError, Exception): + # return True + + def is_auto_id(self, field_name: str) -> bool: + """Check if a field is auto-generated ID.""" + try: + field_meta = self.get_field_meta(field_name) + return bool(field_meta.get("is_autoid", False)) + except (KeyError, Exception): + return False + + def get_un_repeat_keys(self, field_name: str) -> List[str]: + """Get unique constraint keys for array/object fields.""" + try: + field_meta = self.get_field_meta(field_name) + return field_meta.get("un_repeat_keys", []) + except (KeyError, Exception): + return [] + + def get_join_processes(self, field_name: str) -> List[Dict[str, Any]]: + """Get join processes for a field.""" + try: + field_joins = self.backend.get("field_joins", {}) + if field_name in field_joins : + join_to = field_joins[field_name] + return join_to + return [] + except (KeyError, Exception): + return [] + + def get_processed_fields(self, field_name: str) -> List[str]: + """Get list of processed fields generated from join processes.""" + try: + processes = self.get_join_processes(field_name) + return [process.get("property") for process in processes if process.get("property")] + except Exception: + return [] + + def get_fields_by_type(self, field_type: str) -> List[str]: + """Get list of fields with a specific Elasticsearch type.""" + try: + return [field_name for field_name, meta in self.properties.items() + if meta.get("type") == field_type] + except Exception: + return [] + + def get_array_fields(self) -> List[str]: + """Get list of fields that are arrays.""" + try: + return [field_name for field_name, meta in self.properties.items() + if meta.get("array", False)] + except Exception: + return [] + + def get_object_fields(self) -> List[str]: + """Get list of fields that are objects.""" + try: + return [field_name for field_name, meta in self.properties.items() + if meta.get("properties", False)] + except Exception: + return [] + + def get_date_fields(self) -> List[str]: + """Get list of date fields.""" + return self.get_fields_by_type("date") + + def get_numeric_fields(self) -> List[str]: + """Get list of numeric fields (integer, long, float, double).""" + try: + numeric_types = ["integer", "long", "float", "double"] + return [field_name for field_name, meta in self.properties.items() + if meta.get("type") in numeric_types] + except Exception: + return [] + + def get_boolean_fields(self) -> List[str]: + """Get list of boolean fields.""" + return self.get_fields_by_type("boolean") + + def get_vector_fields(self) -> List[str]: + """Get list of vector fields (dense_vector).""" + return self.get_fields_by_type("dense_vector") + + def get_field_metadata(self, field_name: str) -> Dict[str, Any]: + """Get complete metadata for a field including all properties.""" + try: + field_meta = self.get_field_meta(field_name) + return { + "name": field_name, + "type": field_meta.get("type"), + "label": field_meta.get("label"), + "return": field_meta.get("return", True), + # "sortable": field_meta.get("sortable", False), + "array": field_meta.get("array", False), + "object": field_meta.get("properties", False), + # "highlight": field_meta.get("highlight", False), + # "query_boost": field_meta.get("query_boost", 0.0), + # "query_normal_boost": field_meta.get("query_normal_boost", 0.0), + # "needs_keyword": field_meta.get("needs_keyword", False), + # "required": field_meta.get("required", False), + # "join_to": field_meta.get("join_to", []), + "filter_key": field_meta.get("filter_key", ""), + "filter_by_more": field_meta.get("filter_by_more", False), + "export_state": field_meta.get("export_state", True), + "is_autoid": field_meta.get("is_autoid", False), + "un_repeat_keys": field_meta.get("un_repeat_keys", []) + } + except (KeyError, Exception): + return {} + + def get_index_info(self) -> Dict[str, Any]: + """Get complete index information.""" + try: + index_config = self.backend.get("index", {}) + return { + "name": index_config.get("name"), + "aliases": index_config.get("aliases", []), + "index_key": index_config.get("index_key"), + # "file_map": index_config.get("file_map") + } + except Exception: + return {} + + def get_query_config(self) -> Dict[str, Any]: + """Get complete query configuration.""" + try: + query_config = self.query + return { + "default_sort": query_config.get("default_sort", []), + "collapse_field": query_config.get("collapse_field"), + "advanced_tags": query_config.get("advanced_tags", {}), + "highlight": query_config.get("highlight", {}) + } + except Exception: + return {} + + def validate_field_configuration(self, field_name: str) -> Dict[str, Any]: + """Validate field configuration and return validation results.""" + try: + field_meta = self.get_field_meta(field_name) + validation = { + "valid": True, + "errors": [], + "warnings": [] + } + + # Check required properties + if not field_meta.get("type"): + validation["valid"] = False + validation["errors"].append("Missing field type") + + # Check for conflicting properties + if field_meta.get("array", False) and field_meta.get("properties", False): + validation["warnings"].append("Field is both array and object - this may cause issues") + + # # Check boost values + # if field_meta.get("query_boost", 0) < 0: + # validation["warnings"].append("Query boost should be non-negative") + + # if field_meta.get("query_normal_boost", 0) < 0: + # validation["warnings"].append("Query normal boost should be non-negative") + + return validation + except Exception as e: + return { + "valid": False, + "errors": [str(e)], + "warnings": [] + } + diff --git a/app/core/request_helper.py b/app/core/request_helper.py new file mode 100644 index 0000000..ed48308 --- /dev/null +++ b/app/core/request_helper.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import json +from typing import Any, Dict, Optional, Tuple, Union +from urllib.parse import parse_qs, urlencode + +import httpx + +from app.config.settings import get_settings + + +class RequestHelper: + """ + Helper for communicating with other backend services (HTTP) and optionally SQL-based services. + For SQL, projects can extend this class; here we provide only HTTP helpers to avoid extra runtime deps. + + Features: + - Async HTTP client with httpx + - JWT authentication support + - URL query parsing and manipulation + - Comprehensive HTTP method support (GET, POST, PUT, DELETE) + - Service-specific helper methods + - Proper error handling and response parsing + """ + + def __init__(self, jwt: Optional[str] = None, timeout_seconds: Optional[int] = None) -> None: + settings = get_settings() + self.jwt = jwt + self.timeout_seconds = timeout_seconds or settings.request_timeout_seconds + self._client: httpx.AsyncClient | None = None + self._sync_client: httpx.Client | None = None + + async def _get_async_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient(timeout=self.timeout_seconds) + return self._client + + def _get_sync_client(self) -> httpx.Client: + if self._sync_client is None: + self._sync_client = httpx.Client(timeout=self.timeout_seconds) + return self._sync_client + + def _get_headers(self, custom_headers: Optional[Dict[str, str]] = None) -> Dict[str, str]: + """Get default headers with optional JWT authentication and custom headers.""" + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + if self.jwt: + headers["Authorization"] = self.jwt + + if custom_headers: + headers.update(custom_headers) + + return headers + + # Async HTTP methods + async def http_get(self, url: str, params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """Async GET request.""" + client = await self._get_async_client() + resp = await client.get(url, params=params, headers=self._get_headers(headers)) + resp.raise_for_status() + return resp.json() + + async def http_post(self, url: str, json_body: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """Async POST request.""" + client = await self._get_async_client() + resp = await client.post(url, json=json_body, headers=self._get_headers(headers)) + resp.raise_for_status() + return resp.json() + + async def http_put(self, url: str, json_body: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """Async PUT request.""" + client = await self._get_async_client() + resp = await client.put(url, json=json_body, headers=self._get_headers(headers)) + resp.raise_for_status() + return resp.json() + + async def http_delete(self, url: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """Async DELETE request.""" + client = await self._get_async_client() + resp = await client.delete(url, headers=self._get_headers(headers)) + resp.raise_for_status() + return resp.json() + + # Synchronous HTTP methods for backward compatibility + def get(self, url: str, params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Union[Dict[str, Any], str]: + """Synchronous GET request.""" + return self._request("GET", url, params=params, headers=headers) + + def post(self, url: str, body: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Union[Dict[str, Any], str]: + """Synchronous POST request.""" + return self._request("POST", url, body=body, headers=headers) + + def put(self, url: str, body: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Union[Dict[str, Any], str]: + """Synchronous PUT request.""" + return self._request("PUT", url, body=body, headers=headers) + + def delete(self, url: str, body: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Union[Dict[str, Any], str]: + """Synchronous DELETE request.""" + return self._request("DELETE", url, body=body, headers=headers) + + def _request(self, method: str, url: str, body: Optional[Dict[str, Any]] = None, + params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None) -> Union[Dict[str, Any], str]: + """ + Internal method for sending synchronous requests with any HTTP method. + """ + client = self._get_sync_client() + + try: + # Prepare headers + request_headers = self._get_headers(headers) + + # For form data, change content type + if body and not isinstance(body, dict): + request_headers["Content-Type"] = "application/x-www-form-urlencoded" + + response = client.request( + method=method.upper(), + url=url, + headers=request_headers, + data=body if body else None, + params=params if params else None, + ) + response.raise_for_status() + + try: + # Try to parse as JSON + return response.json() + except json.JSONDecodeError: + # If JSON parsing fails, return as text + return response.text + + except httpx.RequestException as e: + print(f"{method.upper()} request failed: {e}") + return "" + except Exception as e: + print(f"Unexpected error in {method.upper()} request: {e}") + return "" + + # URL query parsing utilities + def parse_url_query(self, query_string: str, query_tag: str = "q", + delimiter: str = "&") -> Tuple[str, Dict[str, str]]: + """ + Parse URL query string and extract the main query and other parameters. + + Args: + query_string: The query string to parse + query_tag: The key for the main query parameter + delimiter: The delimiter used to separate query parameters + + Returns: + Tuple of (main_query, other_params_dict) + """ + if not query_string: + return "", {} + + result = {} + main_query = "" + + # Split by delimiter and parse each parameter + items = query_string.split(delimiter) if query_string else [] + + for item in items: + if not item: + continue + + # Split by '=' to get key-value pairs + if "=" in item: + key, value = item.split("=", 1) + if key == query_tag: + main_query = value + else: + result[key] = value + + return main_query, result + + def build_query_string(self, params: Dict[str, Any]) -> str: + """Build a query string from a dictionary of parameters.""" + return urlencode(params) + + def parse_query_params(self, query_string: str) -> Dict[str, str]: + """Parse query string into a dictionary using urllib.parse.""" + if not query_string: + return {} + return dict(parse_qs(query_string, keep_blank_values=True)) + + # Service-specific helper methods + def get_tanghih_child_ids(self, parents: list, list_url: str) -> list: + """ + Get child IDs for tanghih subjects. + + Args: + parents: List of parent IDs + list_url: Base URL for the list service + + Returns: + List of child IDs + """ + url = f"{list_url}/subject/get/childs/tanghih" + data = {"parents": parents} + + response = self.post(url, data) + if response and isinstance(response, dict) and "meta" in response and response["meta"]: + return response["meta"][0].get("child_ids", []) + return [] + + def get_child_ids(self, parents: list, list_url: str, item_state: int = 1) -> list: + """ + Get child IDs for subjects. + + Args: + parents: List of parent IDs + list_url: Base URL for the list service + item_state: State filter for items + + Returns: + List of child IDs + """ + url = f"{list_url}/subject/get/childs" + data = {"parents": parents, "item_state": item_state} + + response = self.post(url, data) + if response and isinstance(response, dict) and "meta" in response and response["meta"]: + return response["meta"][0].get("child_ids", []) + return [] + + # Client lifecycle management + async def close(self) -> None: + """Close the async HTTP client.""" + if self._client is not None: + await self._client.aclose() + self._client = None + + def close_sync(self) -> None: + """Close the synchronous HTTP client.""" + if self._sync_client is not None: + self._sync_client.close() + self._sync_client = None + + def __enter__(self): + """Context manager entry for synchronous usage.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit for synchronous usage.""" + self.close_sync() + + async def __aenter__(self): + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit.""" + await self.close() + + diff --git a/app/core/response_helper.py b/app/core/response_helper.py new file mode 100644 index 0000000..f759ee3 --- /dev/null +++ b/app/core/response_helper.py @@ -0,0 +1,260 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional +from fastapi.responses import JSONResponse + + +class ResponseHelper: + """Normalizes Elasticsearch responses into a unified structure and provides utility methods.""" + + def __init__(self, elastic_helper=None, reader=None): + self.elastic_helper = elastic_helper + self.reader = reader + self.message = '' + self.status_code = 406 + self.status = 1 + self.position_id = None + self.is_web_service = True + + # Highlight field mappings for repair_result method + # self.hilightField1_ph = 'title_ph' + # self.hilightField1_fa = 'title_fa' + # self.hilightKey1 = 'title' + # self.hilightField2_ph = 'content_ph' + # self.hilightField2_fa = 'content_fa' + # self.hilightKey2 = 'content' + # self.hilightField3_ph = 'description_ph' + # self.hilightField3_fa = 'description_fa' + # self.hilightKey3 = 'description' + # self.current_index_key = None + + async def general(self, response): + return response + + def return_elastic_list(self, data: Any): + pass + + + + def normalize_search_response(self, response: Dict[str, Any], collapse_field: Optional[str] = None, bookmark_id: Optional[bool] = '', mode:Optional[str]='elastic', user_id: Optional[bool] = '') -> Dict[str, Any]: + """Normalize Elasticsearch search response into a consistent format.""" + + took = response.get("took") + shards = response.get("_shards") + hits_section = response.get("hits", {}) + total_count = 0 + if isinstance(hits_section.get("total"), Dict): + total_count = hits_section.get("total", {}).get("value", 0) + elif isinstance(hits_section.get("total"), int): + total_count = hits_section.get("total", 0) + + #add bookmark flag + # if bookmark_id : + # bookmark_ref_ids = [] + # bookmark_ref_id_indexs = {} + # for i, h in hits_section.get("hits", []): + # ref_id = h.get("_id", 0) + # source = h.get("_source", {}) + # if bookmark_id == 'normal' or bookmark_id == 'id' or bookmark_id == '_id' : + # bookmark_ref_ids.append(ref_id) + # bookmark_ref_id_indexs[ref_id] = i + # else: + # if bookmark_id in source : + # ref_id = source[bookmark_id] + # bookmark_ref_ids.append(source[bookmark_id]) + # bookmark_ref_id_indexs[ref_id] = i + + + # if bookmark_ref_ids : + # self.set_bookmarks_flag(bookmark_ref_ids, bookmark_ref_id_indexs, hits_section, None) # Note: elastic_query_builder needed here + + # rename_values of aggregations + aggregations = response.get("aggregations") + # if aggregations and self.reader : + # aggregation_fields = self.reader.default_aggregation_fields() + # user_ids = [] + # for agg_name, agg_value in aggregations.items(): + # if agg_name in aggregation_fields : + # if not isinstance(aggregation_fields[agg_name], Dict ) : + # continue + + # if 'is_value_user_id' in aggregation_fields[agg_name] : + # rename_values = aggregation_fields[agg_name].get('rename_values', {}) + # for item in agg_value.get('buckets', []): + # value = item['key'] + # if value in rename_values : + # item['title'] = rename_values[value] + + # if 'rename_values' in aggregation_fields[agg_name] : + # rename_values = aggregation_fields[agg_name].get('rename_values', {}) + # for item in agg_value.get('buckets', []): + # value = item['key'] + # if value in rename_values : + # item['title'] = rename_values[value] + + + result = {} + if mode == 'elastic' : + hits = hits_section.get("hits", []) + print('elastic' , 1) + result = response + # result["count"] = len(hits) + # result["total_count"] = total_count + # result["collapse_field"] = collapse_field + print('elastic' , 2) + # if 'total_collapse' in aggregations : + # result['hits']['total']['value'] = result['aggregations']['total_collapse']['value'] + + else: # mode == 'normal' + hits = [] + for h in hits_section.get("hits", []): + source = h.get("_source", {}) + # highlight = self._normalize_highlight(h.get("highlight")) if h.get("highlight") else None + highlight = h.get("highlight") + hits.append({ + "id": h.get("_id"), + "index": h.get("_index"), + "score": h.get("_score"), + "source": source, + "highlight": highlight, + "sort": h.get("sort"), + }) + + result = { + "success": True, + "took": took, + "shards": shards, + "total": total_count, + "total_count": total_count, + "count": len(hits), + "collapse_field": collapse_field, + "hits": hits, + "aggregations": response.get("aggregations"), + } + + return result + + def normalize_get_response(self, response: Dict[str, Any]) -> Dict[str, Any]: + """Normalize Elasticsearch get response into a consistent format.""" + return { + "success": True, + "found": response.get("found", False), + "id": response.get("_id"), + "index": response.get("_index"), + "source": response.get("_source"), + } + + def error(self, message: str, status_code: int = 400, details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Create a standardized error response.""" + return {"success": False, "status_code": status_code, "message": message, "details": details} + + def run_error(self, msg: str = 'error', response_code: int = 406, status: int = 1, force_exit: bool = False): + """Handle error responses with optional force exit.""" + if msg == '': + msg = "An error occurred" + + self.message = msg + self.status_code = response_code + self.status = status + + if force_exit: + more_data = { + "status": self.status, + "message": self.message, + "position": self.position_id, + "data": [], + "meta": None + } + + if self.is_web_service: + return JSONResponse(content=more_data, status_code=self.status_code) + + return JSONResponse(content={"message": self.message}, status_code=self.status_code) + + async def run_error_async(self, message: str, response_code=406, status=1, force_exit=False): + """Async version of run_error.""" + return self.run_error(msg=message, response_code=response_code, status=status, force_exit=force_exit) + + def set_bookmarks_flag(self, ref_ids: List[str], ref_ids_indexes: Dict[str, int], results: Dict[str, Any], elastic_query_builder): + """Set bookmark flags for search results.""" + if not self.elastic_helper: + return + + params = elastic_query_builder.get_query_bookmarks_flag(ref_ids) + res2 = self.elastic_helper.search(**params) + + for res in res2['hits']: + ref_id = res['_source']['ref_id'] + if ref_id in ref_ids_indexes: + index = ref_ids_indexes[ref_id] + results['hits'][index]['_source']['tbookmark'] = 1 + else: + results['hits'][index]['_source']['tbookmark'] = 0 + + # def repair_result(self, results: Dict[str, Any], by_bookmarks: bool = False) -> Dict[str, Any]: + # """Repair and enhance search results with highlights and bookmarks.""" + # ref_ids = [] + # ref_ids_indexes = {} + + # for i, hit in enumerate(results['hits']['hits']): + # if by_bookmarks: + # refid = hit['_id'] + # if self.current_index_key in ('qasection', 'rgsection'): + # refid = hit['_source']['qanon_id'] + # ref_ids.append(refid) + # ref_ids_indexes[refid] = i + + # # hit['_source']['tbookmark'] = 0 + + # if 'highlight' not in hit: + # continue + + # highlight = hit['highlight'] + + # # Process highlight fields + # if self.hilightField1_ph in highlight: + # highlight[self.hilightKey1] = highlight[self.hilightField1_ph] + # elif self.hilightField1_fa in highlight: + # highlight[self.hilightKey1] = highlight[self.hilightField1_fa] + + # if self.hilightField2_ph in highlight: + # highlight[self.hilightKey2] = highlight[self.hilightField2_ph] + # elif self.hilightField2_fa in highlight: + # highlight[self.hilightKey2] = highlight[self.hilightField2_fa] + + # if self.hilightField3_ph in highlight: + # highlight[self.hilightKey3] = highlight[self.hilightField3_ph] + # elif self.hilightField3_fa in highlight: + # highlight[self.hilightKey3] = highlight[self.hilightField3_fa] + + # if by_bookmarks and self.elastic_helper: + # self.set_bookmarks_flag(ref_ids, ref_ids_indexes, results, None) # Note: elastic_query_builder needed here + + # if 'aggregations' in results and 'total_collapse' in results['aggregations']: + # results['hits']['total']['value'] = results['aggregations']['total_collapse']['value'] + + # return results + + def merge_related(self, result: Dict[str, Any], related_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Merge related data into the result.""" + if not related_data: + return result + merged = dict(result) + merged["related"] = related_data + return merged + + def _normalize_highlight(self, hl: Dict[str, List[str]]) -> Dict[str, List[str]]: + """Normalize highlight fields by deduplicating while preserving order.""" + normalized: Dict[str, List[str]] = {} + for field, frags in hl.items(): + field_new = field + if '.ph' in field : + field_new = field.replace('.ph', '') + elif '.fa' in field : + field_new = field.replace('.fa', '') + elif '.ar' in field : + field_new = field.replace('.ar', '') + normalized[field_new] = list(dict.fromkeys(frags)) # de-duplicate while preserving order + return normalized + + diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..20c7eb6 --- /dev/null +++ b/app/main.py @@ -0,0 +1,126 @@ +from fastapi import FastAPI, Request, Response +# from app.permit.permit import FactorySelector +from app.routes.v1.elastic import router as elastic_router +from app.routes.v1.elastic_map import router as elastic_map_router +from app.routes.v1.elastic_admin import router as elastic_admin_router +from app.routes.rag.rag_chat import router as rag_chat +from app.routes.voice.voice import router as voice_routers +from app.routes.tree.tree_base import router as tree_base_routers +from app.config.settings import get_settings, Settings +from app.core.elastic_client_helper import ElasticClientHelper, create_async_elastic_client +from starlette.middleware.base import BaseHTTPMiddleware +from app.core.response_helper import ResponseHelper +from app.lib.general_functions import extract_jwt_user_id + + + +def create_app() -> FastAPI: + app = FastAPI(title="Flexible Elasticsearch Backend", version="0.1.0") + app.state.settings = get_settings() + + @app.on_event("startup") + async def on_startup() -> None: + # settings:Settings = app.state.settings + + # create_async_redis_client(settings) + app.state.elastic_client = await create_async_elastic_client(app.state.settings) + app.state.elastic_helper = ElasticClientHelper(app.state.elastic_client) + + # permit = getattr(app.state, "permit_client", None) + # if permit is None: + # app.state.permit_client = await FactorySelector.get_factory("db", + # type=settings.permit_db_type, + # host=settings.permit_database_host, + # port=settings.permit_database_port, + # username=settings.permit_database_user, + # db_name=settings.permit_database_name, + # ) + + + @app.on_event("shutdown") + async def on_shutdown() -> None: + client = getattr(app.state, "elastic_client", None) + if client is not None: + await client.close() + + + @app.get("/") + async def Ping(): + return "Elasticsearch Backend Service" + + @app.get("/ping") + async def Ping(): + return "Elasticsearch Backend ping" + + app.include_router(elastic_router, prefix="/v1") + app.include_router(rag_chat, prefix="/rag") + app.include_router(voice_routers, prefix="/voice") + app.include_router(tree_base_routers, prefix="/tree") + + #--------------------------------------------- + # برای نسخه نهایی نیاز است با دسترسی فعال شود + if app.state.settings.is_mode_develop : + app.include_router(elastic_map_router, prefix="/v1/map") + app.include_router(elastic_admin_router, prefix="/v1/admin") + + return app + + +app = create_app() + + +class TavasiMiddleware(BaseHTTPMiddleware): + # routes_to_reroute = ["/indices/entity/insert/{entity_type}/{data_type}/{ref_key}"] + routes_to_reroute = ["/indices/entity/insert"] + + + async def test_rewrite(self, request: Request, call_next): + if request.url.path in self.routes_to_reroute: + request.scope['path'] = '/welcome' + headers = dict(request.scope['headers']) + headers[b'custom-header'] = b'my custom header' + request.scope['headers'] = [(k, v) for k, v in headers.items()] + return await request + # return await call_next(request) + + async def rewriteUrls(self, request): + request.state.app = app + user_id = extract_jwt_user_id(request, app.state.settings.jwtKey, "HS256" ) + if user_id: + # ذخیره موقت در settings یا context + app.state.settings.current_user_id = user_id + + return request + + async def responseNormal(self, response): + responseHelper = ResponseHelper() + return await responseHelper.general(response) + + async def dispatch(self, request: Request, call_next): + # قبل از پردازش درخواست + # print("قبل از پردازش درخواست:", request.url) + await self.rewriteUrls(request) + + # پردازش درخواست + response = await call_next(request) + + # بعد از پردازش درخواست + response = await self.responseNormal(response) + # print("بعد از پردازش درخواست:", response.status_code) + + return response + +# @app.middleware("http") +# async def check_permit(request: Request, call_next): +# # client = app.state.permit_client +# # print(await client.ping()) +# # result = await client.check_perm( +# # user_id=2, action_tag='item_new' +# # ) +# if app.state.settings.enable_back_permition: +# return Response(content={"detail":"The server is temporarily limited."},status_code=503) +# else: +# response = await call_next(request) +# return response + +app.add_middleware(TavasiMiddleware) diff --git a/app/permit/__init__.py b/app/permit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/permit/engine.py b/app/permit/engine.py new file mode 100644 index 0000000..40cd44a --- /dev/null +++ b/app/permit/engine.py @@ -0,0 +1,41 @@ +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from sqlalchemy.orm import declarative_base + + +Base = declarative_base() + +engine = None +AsyncSessionLocal = None + + +async def init_database(connection_string): + """راه‌اندازی اولیه دیتابیس PostgreSQL به صورت Async""" + global engine, AsyncSessionLocal + + + engine = create_async_engine( + connection_string, + echo=True, # برای دیباگ + future=True, + pool_pre_ping=True, + pool_recycle=300 + ) + + AsyncSessionLocal = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False + ) + + print("✅ PostgreSQL Async database initialized successfully") + +async def get_session() -> AsyncSession: + if not AsyncSessionLocal: + raise Exception("Database not initialized. Call init_database() first.") + + return AsyncSessionLocal() + +async def close_database(): + if engine: + await engine.dispose() + print("✅ Database connections closed") \ No newline at end of file diff --git a/app/permit/models.py b/app/permit/models.py new file mode 100644 index 0000000..9a8afcc --- /dev/null +++ b/app/permit/models.py @@ -0,0 +1,189 @@ +from sqlalchemy import Index, create_engine, Column, Integer, String, Boolean, Text, ForeignKey +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +from app.permit.engine import Base + + + +class PMDataPermit(Base): + __tablename__ = 'pm_data_permit' + + id = Column(Integer, primary_key=True, autoincrement=True) + use_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) # utf16 + date_e = Column(String(50), nullable=True) + deleted_at = Column(String(50), nullable=True) + user_id = Column(Integer, nullable=False) + service_id = Column(String(50), nullable=False) + entity_type = Column(String(50), nullable=True) + entity_id = Column(String(50), nullable=False) + create = Column(Boolean, nullable=True) + read = Column(Boolean, nullable=True) + update = Column(Boolean, nullable=True) + delete = Column(Boolean, nullable=True) + perm_type = Column(Integer, nullable=True, comment='read : 1, write: 10, admin: 500, owner: 1000') + + +class PMUserSteps(Base): + __tablename__ = 'pm_user_steps' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + deleted_at = Column(String(50), nullable=True) + section_id = Column(Integer, nullable=True) + project_id = Column(Integer, nullable=True, index=True) + step_id = Column(Integer, nullable=True) + user_id = Column(Integer, nullable=True) + perm_type = Column(Integer, nullable=True) + meta = Column(Text, nullable=True) + data_value = Column(String(255), nullable=True, index=True) + + +class PMFilePermit(Base): + __tablename__ = 'pm_file_permit' + + id = Column(Integer, primary_key=True, autoincrement=True) + use_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + deleted_at = Column(String(50), nullable=True) + user_id = Column(Integer, nullable=False, comment='شناسه کاربر مورد بحث دسترسی') + service_id = Column(String(50), nullable=False, comment='micro_service_id : repo, message,...') + entity_id = Column(String(50), nullable=False, comment='file_id') + file_type = Column(String(50), nullable=True, comment='jpg, audio, video, owl, ....') + file_name = Column(String(50), nullable=False, comment='نامی که به کاربر نشان داده میشود \\ پیش فرض نام اصلی هست') + file_path = Column(String(255), nullable=False, comment='مسیر لوکال سرور برای دسترسی مستقیم میکروسرویس‌ها') + create = Column(Boolean, nullable=True) + read = Column(Boolean, nullable=True) + update = Column(Boolean, nullable=True) + delete = Column(Boolean, nullable=True) + perm_type = Column(Integer, nullable=True, comment='read : 1, write: 10, admin: 500, owner: 1000') + meta = Column(Text, nullable=True, comment='اطلاعات جانبی ذخیره شده در قالب json یا txt') + + +class PMProjects(Base): + __tablename__ = 'pm_projects' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + title = Column(String(255), nullable=True) + users = Column(String(255), nullable=True, default='') + organ = Column(String(255), nullable=True) + owner = Column(Integer, nullable=False, default=0) + comment = Column(Text, nullable=True) + deleted_at = Column(String(50), nullable=True) + link = Column(String(255), nullable=True) + admin_users = Column(String(255), nullable=True) + showInDashboard = Column(Boolean, nullable=True, default=True) + tags = Column(String(255), nullable=True, default='') + + __table_args__ = ( + Index('users', 'users'), + ) + + +class PMRoles(Base): + __tablename__ = 'pm_roles' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + project_id = Column(Integer, nullable=False, index=True) + title = Column(String(255), nullable=True, index=True) + deleted_at = Column(String(50), nullable=True) + + +class PMSectionRole(Base): + __tablename__ = 'pm_section_role' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + section_id = Column(Integer, nullable=False, index=True) + project_id = Column(Integer, nullable=False, index=True) + role_id = Column(Integer, nullable=False, index=True) + state = Column(Integer, nullable=False, default=0) + deleted_at = Column(String(50), nullable=True) + + +class PMSectionSteps(Base): + __tablename__ = 'pm_section_steps' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + deleted_at = Column(String(50), nullable=True) + section_id = Column(Integer, nullable=True) + project_id = Column(Integer, nullable=True, index=True) + title = Column(String(50), nullable=True, index=True) + step_key = Column(String(50), nullable=True) + step_order = Column(Integer, nullable=True) + ok_next = Column(Integer, nullable=True) + no_next = Column(Integer, nullable=True) + meta = Column(Text, nullable=True) + schema = Column(Text, nullable=True) + data_type = Column(String(50), nullable=True) + data_options = Column(Text, nullable=True) + + +class PMSectionUser(Base): + __tablename__ = 'pm_section_user' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + section_id = Column(Integer, nullable=False, index=True) + project_id = Column(Integer, nullable=False, index=True) + user_id = Column(Integer, nullable=False, index=True) + state = Column(Integer, nullable=False, default=0) + deleted_at = Column(String(50), nullable=True) + role_id = Column(Integer, nullable=True) + + +class PMSections(Base): + __tablename__ = 'pm_sections' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + project_id = Column(Integer, nullable=False, index=True) + section_title = Column(String(255), nullable=True, index=True) + action_title = Column(String(255), nullable=True, index=True) + section_tag = Column(String(50), nullable=True) + action_tag = Column(String(50), nullable=True) + deleted_at = Column(String(50), nullable=True) + + +class PMUserRole(Base): + __tablename__ = 'pm_user_role' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_c = Column(Integer, nullable=True) + user_e = Column(Integer, nullable=True) + date_c = Column(String(50), nullable=True) + date_e = Column(String(50), nullable=True) + user_id = Column(Integer, nullable=False, index=True) + project_id = Column(Integer, nullable=True, index=True) + role_id = Column(Integer, nullable=False, index=True) + state = Column(Integer, nullable=False, default=0) + deleted_at = Column(String(50), nullable=True) + \ No newline at end of file diff --git a/app/permit/permit.py b/app/permit/permit.py new file mode 100644 index 0000000..7124453 --- /dev/null +++ b/app/permit/permit.py @@ -0,0 +1,208 @@ +from __future__ import annotations +from abc import ABC, ABCMeta, abstractmethod +from sqlalchemy.ext.asyncio import AsyncSession +import aiohttp + +from app.permit.engine import get_session, init_database +from sqlalchemy import select, and_, or_ +from app.permit.models import PMSectionUser, PMSections + + +# ---------------------- SINGLETON META ---------------------- +class SingletonMeta(type): + """Ensure only one instance of each class exists.""" + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + instance = super().__call__(*args, **kwargs) + cls._instances[cls] = instance + return cls._instances[cls] + +# ---------------------- ABC WITH SINGLETON SUPPORT ---------------------- +class ABCSingletonMeta(ABCMeta, SingletonMeta): + """Metaclass that combines ABC and Singleton functionality.""" + pass + +# ---------------------- ABSTRACT CONNECTION ---------------------- +class Connection(ABC, metaclass=ABCSingletonMeta): + @abstractmethod + async def ping(self): + """Check connection status.""" + pass + + +# ---------------------- HTTP CONNECTION ---------------------- +class HTTPConnection(Connection, metaclass=SingletonMeta): + def __init__(self, base_url: str): + self.base_url = base_url + self.session: aiohttp.ClientSession | None = None + + @classmethod + async def create(cls, base_url: str) -> HTTPConnection: + """Async factory method for HTTPConnection.""" + self = cls(base_url) + self.session = aiohttp.ClientSession(base_url=base_url) + print(f"🌐 HTTPConnection created for {base_url}") + return self + + async def ping(self) -> bool: + print(f"🌐 Ping from HTTPConnection: {self.base_url}") + return True + + async def check_perm(self, action: str, user: str) -> bool: + print(f"🌐 Checking permission for user '{user}' with action '{action}'") + return True + + async def close(self): + if self.session: + await self.session.close() + print("🌐 HTTPConnection session closed") + + +# ---------------------- DATABASE CONNECTION ---------------------- +class DBConnection(Connection, metaclass=SingletonMeta): + def __init__(self, db_type: str, host: str, port: int, username: str, password: str, db_name: str): + self.db_type = db_type + self.host = host + self.port = port + self.username = username + self.password = password + self.db_name = db_name + self.session: AsyncSession | None = None + + def get_connection_string(self) -> str: + """Build a valid async connection string.""" + if self.db_type == "mysql": + return f"mysql+aiomysql://{self.username}:{self.password}@{self.host}:{self.port}/{self.db_name}" + elif self.db_type in ("psql", "postgresql"): + return f"postgresql+asyncpg://{self.username}:{self.password}@{self.host}:{self.port}/{self.db_name}" + else: + raise ValueError(f"Unsupported DB type: {self.db_type}") + + @classmethod + async def create( + cls, + db_type: str, + host: str, + port: int, + username: str, + password: str, + db_name: str, + ) -> DBConnection: + """Async factory method for DBConnection.""" + self = cls(db_type, host, port, username, password, db_name) + + conn_str = self.get_connection_string() + await init_database(conn_str) + self.session = await get_session() + print(f"🗄️ Database connection initialized: {conn_str}") + return self + + async def ping(self) -> bool: + print(f"🗄️ Ping from DBConnection: {self.host}:{self.port}") + + return True + + async def check_perm(self, user_id: int, action_tag: str, section_tag: str = None) -> bool: + """ + بررسی دسترسی کاربر با استفاده از ORM + """ + try: + if not self.session: + raise Exception("Database session not initialized") + + query = ( + select(PMSections.section_tag, PMSections.action_tag) + .join(PMSectionUser, PMSectionUser.section_id == PMSections.id) + .where( + and_( + PMSectionUser.user_id == user_id, + PMSectionUser.deleted_at.is_(None), + PMSections.deleted_at.is_(None), + PMSectionUser.state == 0 + ) + ) + ) + + result = await self.session.execute(query) + permissions = result.fetchall() + + if not permissions: + return False + + if section_tag: + for perm in permissions: + if perm.section_tag == section_tag and perm.action_tag == action_tag: + return True + else: + for perm in permissions: + if perm.action_tag == action_tag: + return True + + return False + + except Exception as e: + print(f"Error checking permission: {str(e)}") + return False + + async def close(self): + if self.session: + await self.session.close() + print("🗄️ Database session closed") + + +# ---------------------- FACTORIES ---------------------- +class ConnectionFactory(ABC): + @abstractmethod + async def create_connection(self) -> Connection: + pass + + +class FactoryHTTPConnection(ConnectionFactory): + def __init__(self, base_url: str): + self.base_url = base_url + + async def create_connection(self) -> HTTPConnection: + return await HTTPConnection.create(self.base_url) + + +class FactoryDBConnection(ConnectionFactory): + def __init__(self, db_type: str, host: str, port: int, username: str, password: str, db_name: str): + self.db_type = db_type + self.host = host + self.port = port + self.username = username + self.password = password + self.db_name = db_name + + async def create_connection(self) -> DBConnection: + return await DBConnection.create( + self.db_type, + self.host, + self.port, + self.username, + self.password, + self.db_name, + ) + + +# ---------------------- FACTORY SELECTOR ---------------------- +class FactorySelector: + @staticmethod + async def get_factory(connection_type: str, **kwargs) -> ConnectionFactory: + if connection_type == "http": + return await FactoryHTTPConnection(kwargs.get("base_url", "http://localhost")).create_connection() + + elif connection_type == "db": + return await FactoryDBConnection( + db_type=kwargs.get("type", "postgresql"), + host=kwargs.get("host", "localhost"), + port=kwargs.get("port", 5432), + username=kwargs.get("username", "admin"), + password=kwargs.get("password", "password"), + db_name=kwargs.get("db_name", "test_db"), + ).create_connection() + + else: + raise ValueError(f"Unknown connection type: {connection_type}") diff --git a/app/routes/__init__.py b/app/routes/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/app/routes/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/app/routes/rag/models.py b/app/routes/rag/models.py new file mode 100644 index 0000000..f42e7c7 --- /dev/null +++ b/app/routes/rag/models.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + + +class RagInsertRequest(BaseModel): + chat_id: Optional[str] = None + title: Optional[str] = None + user_query: str + diff --git a/app/routes/rag/rag_chat.py b/app/routes/rag/rag_chat.py new file mode 100644 index 0000000..6b2bd73 --- /dev/null +++ b/app/routes/rag/rag_chat.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Request + +from app.core.map_index_reader import MapIndexReader +from app.core.elastic_query_builder import ElasticQueryBuilder +from app.core.field_processor import FieldProcessor +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) +from app.routes.rag.models import ( + RagInsertRequest, +) +from typing import Any, Dict, List, Optional +import time +from app.routes.v1.elastic import ( + insert, + search, +) +import uuid +import requests +from datetime import datetime +from app.config.settings import get_settings, Settings + + +router = APIRouter(tags=["ragchat"]) +settings= get_settings() + + +def get_elastic_helper(request: Request): + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + return helper + + +@router.post("/{type_name}/credit_refresh") +async def credit_refresh(type_name: str, request: Request): + try: + print("credit_refresh ...->", settings.ai_rag_host) + if settings.ai_rag_host: + url = settings.ai_rag_host + "/" + "credit_refresh" + body = {} + headers = {"accept": "application/json", "Content-Type": "application/json"} + response = requests.request("POST", url, headers=headers, json=body) + return response.text.replace('"', '') + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.post("/{type_name}/insert") +async def insert_rag(type_name: str, payload: RagInsertRequest, request: Request): + + time_stamp = int(datetime.now().timestamp()) + user_id = 0 + document = { + 'id' : id, + 'title' : title, + 'chat_id' : chat_id, + 'user_id' : user_id, + 'bale_info' : { + "user_name" : "", + "first_name" : "", + "last_name" : "" + }, + 'user_query' : payload.user_query, + 'model_key' : answer.get("model_key", ""), + 'retrived_passage' : answer.get("retrived_passage", ""), + 'retrived_ref_ids' : answer.get("retrived_ref_ids", []), + 'retrived_duration' : int(answer.get("retrived_duration", 0)), + 'prompt_type' : answer.get("prompt_type", ""), + 'llm_duration' : int(answer.get("llm_duration", 0)), + 'full_duration' : int(answer.get("full_duration", 0)), + 'time_create' : time_stamp, + 'used_ref_ids' : answer.get("used_ref_ids", []), + 'prompt_answer' : answer.get("prompt_answer", ""), + 'status_text' : answer.get("status_text", ""), + 'status' : answer.get("status", False) + + } + insertRequest = InsertRequest(id=id, document=document) + response = await insert(type_name, insertRequest, request ) + response['answer'] = document + + +@router.post("/{type_name}/query") +async def query_rag(type_name: str, payload: RagInsertRequest, request: Request): + # + # uid = uuid.uuid4().hex[:8] # فقط ۸ کاراکتر از uuid eg : '9f1c2a7b' + id = uuid.uuid4().hex[:8] + print("insert_rag start ... ", id) + chat_id = payload.chat_id + if not chat_id : + chat_id = uuid.uuid4().hex[:8] + title = payload.title + if not title and payload.user_query : + title = payload.user_query[0:50] + + # --------------------------- + answer = {} + is_gpu_service_ready = False + headers = {"accept": "application/json", "Content-Type": "application/json"} + try: + if settings.ai_rag_host_gpu : + url = settings.ai_rag_host_gpu + '/ping' + response = requests.request("GET", url, headers=headers) + # print(response) + # print(response.status_code) + if response.status_code == 200 : + is_gpu_service_ready = True + else : + is_gpu_service_ready = False + except Exception as exc: # noqa: BLE001 + is_gpu_service_ready = False + + print( " settings.ai_rag_host_gpu ", settings.ai_rag_host_gpu, is_gpu_service_ready) + try: + if payload.user_query : + + if is_gpu_service_ready : + url = settings.ai_rag_host_gpu + "/" + "run_chat" + elif settings.ai_rag_host : + url = settings.ai_rag_host + "/" + "run_chatbot" + else : + print(" ******* error in settings.ai_rag_host ...") + + print("settings.ai_rag_host ...", url) + body = {"query": payload.user_query, "text":""} + response = requests.request("POST", url, headers=headers, json=body) + # print(response.text) + # print(response.json()) + print(response.status_code) + # print(response.headers) + + # print("body ", body) + # print("response ", response) + # print(response.text) + data = response.json() + # print(data) + answer = data.get("answer", {} ) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + # --------------------------- + + + time_stamp = int(datetime.now().timestamp()) + user_id = 0 + document = { + 'id' : id, + 'title' : title, + 'chat_id' : chat_id, + 'user_id' : user_id, + 'user_query' : payload.user_query, + 'model_key' : answer.get("model_key", ""), + 'retrived_passage' : answer.get("retrived_passage", ""), + 'retrived_ref_ids' : answer.get("retrived_ref_ids", []), + 'retrived_duration' : int(answer.get("retrived_duration", 0)), + 'prompt_type' : answer.get("prompt_type", ""), + 'llm_duration' : int(answer.get("llm_duration", 0)), + 'full_duration' : int(answer.get("full_duration", 0)), + 'time_create' : time_stamp, + 'used_ref_ids' : answer.get("used_ref_ids", []), + 'prompt_answer' : answer.get("prompt_answer", ""), + 'status_text' : answer.get("status_text", ""), + 'status' : answer.get("status", False) + + } + insertRequest = InsertRequest(id=id, document=document) + response = await insert(type_name, insertRequest, request ) + response['answer'] = document + return response diff --git a/app/routes/tree/base_models.py b/app/routes/tree/base_models.py new file mode 100644 index 0000000..307961b --- /dev/null +++ b/app/routes/tree/base_models.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field +# from app.routes.v1.models import ( +# SearchRequest, +# # InsertRequest, +# # UpdateByQueryRequest, +# # DeleteByQueryRequest, +# ) + + +class treeSearchRequest(BaseModel): + parent_id: str + version_key: str = 'D49_oss120' + tcode: str = '' + nested: bool = False + query: Optional[str] = '' + all_item: Optional[bool] = False + +class treeInsertRequest(BaseModel): + id: Optional[str] = None + version_key: str = 'D49_oss120' + tcode: str + title: str + parent_id: str + child_order: Optional[float] = -1 + full_path: Optional[str] = '' + +class treeUpdateRequest(BaseModel): + version_key: Optional[str] = None + tcode: Optional[str] = None + title: Optional[str] = None + parent_id: Optional[str] = None + child_order: Optional[float] = None + full_path: Optional[str] = None + content: Optional[str] = None + diff --git a/app/routes/tree/tree_base.py b/app/routes/tree/tree_base.py new file mode 100644 index 0000000..295d83b --- /dev/null +++ b/app/routes/tree/tree_base.py @@ -0,0 +1,346 @@ +# ---------------------- modularity +from __future__ import annotations +from fastapi import APIRouter, Depends, HTTPException, Request +from app.core.map_index_reader import MapIndexReader +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) +from app.routes.tree.base_models import ( + treeSearchRequest, + treeInsertRequest, + treeUpdateRequest, +) +from typing import Any, Dict, List, Optional +from app.routes.v1.elastic import ( + search, + get_by_id, + insert, + update, + delete, +) +import uuid, requests, time, traceback + +# ---------------------- global-params + +router = APIRouter(tags=["tree"]) + + +# ---------------------- base-Func +def get_elastic_helper(request: Request): + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + return helper +# ---------------------- router-Func + + +# ----------- GET +@router.get("/ping") +async def helloworld(request: Request): + return {"message": " tree base hello-world"} + + +def sort_children(node): + node["children"].sort(key=lambda x: x["child_order"]) + for child in node["children"]: + sort_children(child) + +# خروجی بصورت تو در تو بر می گرداند ، یعنی بچه ها داخل فیلد پدر درج میشود +@router.post("/{type_name}/get") +async def search_tree(type_name: str, payload: treeSearchRequest, request: Request): + # print(f"type_name {type_name}", f"payload {payload}", sep="\n") + tcode = payload.tcode + version_key = payload.version_key + parent_id = payload.parent_id + nested = payload.nested + all_item = payload.all_item + query = payload.query + + if parent_id == "0": + parent_id = 0 + + must = [ + {"term": {"version_key": version_key}}, + {"term": {"tcode": tcode}}, + ] + + if not all_item : + must.append({"term": {"parent_id": parent_id}}) + + query = { + "size": 10000, + "_source": [ + "id", + "title", + "parent_id", + "child_order", + "time_create", + "tcode", + "version_key", + "full_path", + "content", + + ], + "query": { + "bool": { + "must": must + } + }, + "sort": [{"parent_id": "asc"}, {"child_order": "asc"}], + } + + if not all_item: + query["query"]["bool"]["must"].append({"term": {"parent_id": parent_id}}) + + if tcode != "": + query["query"]["bool"]["must"].append({"term": {"tcode": tcode}}) + + reader = MapIndexReader(type_name) # mj_plan + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + + # "قانون تشکل‌های مدنی و احزاب" + try: + index_name = reader.get_index_name() + es_res = await helper.search(index_name, query) + # return es_res + es_res = dict(es_res) + + items = [] + for item in es_res["hits"]["hits"] : + item_new = { + "id": item["_id"], + "title": item["_source"]["title"], + "version_key": item["_source"]["version_key"], + "parent_id": item["_source"]["parent_id"], + "child_order": item["_source"]["child_order"], + "full_path": item["_source"]["full_path"], + "tcode": item["_source"]["tcode"], + "children": [], + } + if "content" in item["_source"] : + item_new["content"] = item["_source"]["content"] + items.append(item_new) + + + + + if not nested: + return items + + # 2️⃣ ساخت دیکشنری lookup بر اساس id برای دسترسی سریع + lookup = {item["id"]: item for item in items} + + # 3️⃣ ساخت ساختار درختی + root_nodes = [] + p = 0 + c = 0 + for item in items: + parent_id = item["parent_id"] + if parent_id == 0: + # ریشه‌ها + root_nodes.append(item) + # print(f'parent {p}') + p += 1 + else: + # print(f'parent_id {parent_id}') + parent = lookup.get(parent_id) + if parent: + parent["children"].append(item) + # print(f'children {c}') + c += 1 + + for root in root_nodes: + sort_children(root) + + # print(f'es_res {result}', ) + return root_nodes + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=404, detail=str(traceback.print_exc())) + + +@router.get("/{type_name}/get/{id}") +async def get_tree(type_name: str, id: str, request: Request): + doc_id = id + response = await get_by_id(type_name, doc_id, request ) + return response + + +@router.post("/{type_name}/insert") +async def insert_tree(type_name: str, payload:treeInsertRequest, request: Request): + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + index_name = reader.get_index_name() + + if not payload.id or payload.id == None or payload.id == "None" : + payload.id = 'mp_' + uuid.uuid4().hex[:8] + + if not payload.parent_id or payload.parent_id == None: + payload.parent_id = "0" + + if not payload.child_order or payload.child_order == -1 : + payload.child_order = get_max_child_order(helper, index_name, payload ) + else : + update_next_child_order(helper, index_name, payload ) + + #????? + if not payload.full_path : + payload.full_path = '' + + payload_new = InsertRequest.model_construct() + payload_new.id = payload.id + payload_new.document = payload.model_dump(exclude_none=True) + response = await insert(type_name, payload_new, request ) + return response + +@router.post("/{type_name}/update/{id}") +async def update_tree(type_name: str, id: str, payload:treeUpdateRequest, request: Request): + + # print("################################update_tree ", id) + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + index_name = reader.get_index_name() + + # print("################################update_tree ", payload) + + if payload.child_order and payload.child_order == -1 : + payload.child_order = get_max_child_order(helper, index_name, payload ) + + payload_new = InsertRequest.model_construct() + payload_new.id = id + payload_new.document = payload.model_dump(exclude_none=True) + # print("####################################### payload_new ", payload_new) + response = await update(type_name, id, payload_new, request ) + return response + return "" + +@router.post("/{type_name}/delete/{id}") +async def delete_tree(type_name: str, id: str, request: Request): + + response = await delete(type_name, id, request ) + return response + +@router.post("/{type_name}/move_prev/{to_id}/{id}") +async def move_prev_tree(type_name: str, id: str, to_id: str, request: Request): + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + index_name = reader.get_index_name() + + # node1 = await helper.get_by_id(index_name, id) + node2 = await helper.get_by_id(index_name, to_id) + + #?????? + if id == to_id : + message = " error id " + raise HTTPException(status_code=404, detail=message) + + if not node2 : + message = "not found id " + raise HTTPException(status_code=404, detail=message) + + + payload=node2["_source"] + child_order = get_max_child_order(helper, index_name, payload ) + + payload_new = InsertRequest() + payload_new.id = id + payload_new.document = { + "parent_id" : to_id, + "child_order" : child_order + } + response = await update(type_name, id, payload_new, request ) + return response + +@router.post("/{type_name}/move_in/{to_id}/{id}") +async def move_in_tree(type_name: str, id: str, to_id: str, request: Request): + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + index_name = reader.get_index_name() + + # node1 = await helper.get_by_id(index_name, id) + node2 = await helper.get_by_id(index_name, to_id) + + #?????? + if id == to_id : + message = " error id " + raise HTTPException(status_code=404, detail=message) + + if not node2 : + message = "not found id " + raise HTTPException(status_code=404, detail=message) + + # child_order1 = node1["_source"]["child_order"] + child_order2 = node2["_source"]["child_order"] + + # parent_id1 = node1["_source"]["parent_id"] + parent_id2 = node2["_source"]["parent_id"] + + payload=node2["_source"] + update_next_child_order(helper, index_name, payload ) + + payload_new = InsertRequest.model_construct() + payload_new.id = id + payload_new.document = { + "parent_id" : parent_id2, + "child_order" : child_order2 + } + response = await update(type_name, id, payload_new, request ) + return response + + +async def get_max_child_order(helper, index_name, payload:treeInsertRequest) : + must = [ + {"term": {"version_key": payload.version_key}}, + {"term": {"tcode": payload.tcode}}, + {"term": {"parent_id": payload.parent_id}} + ] + + query = { + "size": 1, + "_source": ["child_order"], + "query": { + "bool": { + "must": must + } + }, + "sort": [{"child_order": "asc"}], + } + es_res = await helper.search(index_name, query) + child_order = 0 + if len(es_res["hits"]["hits"]) > 0 : + child_order = es_res["hits"]["hits"][0]["_source"]["child_order"] + + return child_order + 1 + +async def update_next_child_order(helper, index_name, payload:treeInsertRequest) : + must = [ + {"term": {"version_key": payload.version_key}}, + {"term": {"tcode": payload.tcode}}, + {"term": {"parent_id": payload.parent_id}}, + {"range": {"child_order": { "gte": payload.child_order}}} + ] + + query = { + "query": { + "bool": { + "must": must + } + }, + "script": { + "source": "ctx._source.child_order = ctx._source.child_order + 1; ", + "lang": "painless" + } + } + es_res = await helper.update_by_query(index_name, query, True) + + return es_res + diff --git a/app/routes/v1/__init__.py b/app/routes/v1/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/app/routes/v1/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/app/routes/v1/elastic.py b/app/routes/v1/elastic.py new file mode 100644 index 0000000..c788745 --- /dev/null +++ b/app/routes/v1/elastic.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Request + +from app.core.elastic_client_helper import ElasticClientHelper +from app.core.map_index_reader import MapIndexReader +from app.core.elastic_query_builder import ElasticQueryBuilder +from app.core.field_processor import FieldProcessor +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + ExportToFileRequest, + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) +from typing import Any, Dict, List, Optional +import time +from app.config.settings import get_settings, Settings +from app.lib.general_functions import is_user_permit_action + + +router = APIRouter(tags=["elasticsearch"]) +settings: Settings = get_settings() + + +def get_elastic_helper(request: Request): + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + return helper + +@router.post("/indices/{type_name}/insert") +async def insert(type_name: str, payload: InsertRequest, request: Request): + + + if not is_user_permit_action(request.state.user_id, f"{type_name}_update", f"{type_name}_insert", request.state.app) : + message = "------ > not access " + str(request.state.user_id) + f" {type_name}_update" + print(message ) + raise HTTPException(status_code=400, detail=message) + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + field_processor = FieldProcessor(reader) + + validate_doc, document = field_processor.validate_document(payload.document, payload.id) + edition_doc = field_processor.edition_document(payload.document) + document = edition_doc + if not validate_doc["valid"]: + raise HTTPException(status_code=400, detail=str(validate_doc)) + + processed_doc = field_processor.process_joinning_document(document) + doc = processed_doc + try: + es_res = await helper.index_document(reader.get_index_name(), doc, id=payload.id, refresh="wait_for") + return {"success": True, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.post("/indices/{type_name}/update/{id}") +async def update(type_name: str, id: str , payload: InsertRequest, request: Request): + payload.id = id + + if not is_user_permit_action(request.state.user_id, f"{type_name}_update", f"{type_name}_insert", request.state.app) : + message = "------ > not access " + str(request.state.user_id) + f" {type_name}_update" + print(message ) + raise HTTPException(status_code=400, detail=message) + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + field_processor = FieldProcessor(reader) + + validate_doc, document = field_processor.validate_document(payload.document, payload.id, False) + edition_doc = field_processor.edition_document(payload.document) + document = edition_doc + if not validate_doc["valid"]: + raise HTTPException(status_code=400, detail=str(validate_doc)) + + processed_doc = field_processor.process_joinning_document(document) + doc = processed_doc + try: + es_res = await helper.update_or_index_document(index=reader.get_index_name(), data=doc, document_id=payload.id, operation_type="update", refresh="wait_for") + return {"success": True, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail= 'ssss' + str(exc)) + +@router.post("/indices/{type_name}/delete/{id}") +async def delete(type_name: str, id: str , request: Request): + + if not is_user_permit_action(request.state.user_id, f"{type_name}_update", f"{type_name}_delete", request.state.app) : + message = "------ > not access " + str(request.state.user_id) + f" {type_name}_update" + print(message ) + raise HTTPException(status_code=400, detail=message) + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + try: + es_res = await helper.delete_document(index=reader.get_index_name(), id=id, refresh="wait_for") + return {"success": True, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.get("/indices/{type_name}/{doc_id}") +async def get_by_id(type_name: str, doc_id: str, request: Request): + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + try: + es_res = await helper.get_by_id(reader.get_index_name(), doc_id) + return ResponseHelper().normalize_get_response(es_res) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=404, detail=str(exc)) + + + +#--------------------------------------------------- +#-- +#-- payload --> body = [ {....}, {....},{....} ] +#-- +@router.post("/indices/{type_name}/insert/multi") +async def insertMulti(type_name: str, payload: List[InsertRequest], request: Request): + + if not is_user_permit_action(request.state.user_id, f"{type_name}_update", f"{type_name}_insert", request.state.app) : + message = "------ > not access " + str(request.state.user_id) + f" {type_name}_update" + print(message ) + raise HTTPException(status_code=400, detail=message) + + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + field_processor = FieldProcessor(reader) + + list_new = [] + list_errors = [] + for i, item in enumerate(payload) : + id = item.id if item.id else '' + validate_doc, document = field_processor.validate_document(item.document, id) + if not validate_doc["valid"]: + validate_doc["index"] = i + validate_doc["doc"] = item + list_errors.append(validate_doc) + else: + processed_doc = field_processor.process_joinning_document(document) + if not "id" in processed_doc and id: + processed_doc["id"] = id + list_new.append(processed_doc) + + try: + if list_new : + es_res = await helper.bulk_insert(reader.get_index_name(), list_new, refresh="wait_for") + return {"success": True, "validation_errors": list_errors, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +#--------------------------------------------------- +#-- +@router.post("/indices/{type_name}/export-to-file") +async def export_to_file(type_name:str, payload: ExportToFileRequest,request:Request): + print("1 -->", time.time()) + reader = MapIndexReader(type_name) + helper :ElasticClientHelper= get_elastic_helper(request) + builder = ElasticQueryBuilder() + result = await helper.export_to_file( + **dict(payload),index_name=reader.get_index_name() + ) + return result + + + + +@router.post("/indices/{type_name}/search") +async def search(type_name: str, payload: SearchRequest, request: Request): + print("1 -->", time.time()) + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + builder = ElasticQueryBuilder() + print("2 -->", time.time()) + # print(payload) + body = builder.build_search_query(reader, payload) + print("body -->", body) + + try: + es_res = await helper.search(reader.get_index_name(), body) + print("3 -->", time.time()) + collapse_field = body.get("collapse", {}).get("field") if body.get("collapse") else None + bookmark_id = payload.bookmark_id if payload.bookmark_id else '' + # print(es_res) + res = ResponseHelper(helper, reader).normalize_search_response(es_res, collapse_field, bookmark_id, payload.mode_response) + print("4 -->", time.time()) + return res + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +# @router.post("/indices/{type_name}/list") +# async def search(type_name: str, payload: SearchRequest, request: Request): +# reader = MapIndexReader(type_name) +# helper = get_elastic_helper(request) +# builder = ElasticQueryBuilder() + +# body = builder.build_search_query(reader, payload) + +# try: +# es_res = await helper.search(reader.get_index_name(), body) +# return ResponseHelper().normalize_search_response(es_res, body.get("collapse", {}).get("field") if body.get("collapse") else None) +# except Exception as exc: # noqa: BLE001 +# raise HTTPException(status_code=400, detail=str(exc)) + + + + +# @router.post("/indices/{type_name}/insert/{entity_type}/{data_type}/{ref_key}") +# @router.post("/indices/{type_name}/insert/favorite/{data_type}/{ref_key}") +# @router.post("/indices/{type_name}/insert/favorite/entity/{ref_key}") +# @router.post("/indices/{type_name}/insert/history/{data_type}/{ref_key}") +# @router.post("/indices/{type_name}/insert/history/{data_type}/{ref_key}") +# @router.post("/indices/{type_name}/insert/") +# async def insert1(type_name: str, payload: InsertRequest, request: Request): +# payload.document.entity_type = entity_type +# payload.document.data_type = data_type +# payload.document.ref_key = ref_key + diff --git a/app/routes/v1/elastic_admin.py b/app/routes/v1/elastic_admin.py new file mode 100644 index 0000000..de252f2 --- /dev/null +++ b/app/routes/v1/elastic_admin.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Request + +from app.core.map_index_reader import MapIndexReader +from app.core.elastic_query_builder import ElasticQueryBuilder +from app.core.field_processor import FieldProcessor +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) + + +router = APIRouter(tags=["elasticsearch"]) + + +def get_elastic_helper(request: Request): + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + return helper + + +@router.post("/indices/{type_name}/create-index") +async def create_index(type_name: str, request: Request): + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + try: + res = await helper.create_index_if_not_exists(reader.get_index_name(), reader.mapping) + return ResponseHelper().merge_related({"success": True, "result": res}, None) + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.post("/indices/{type_name}/update-by-query") +async def update_by_query(type_name: str, payload: UpdateByQueryRequest, request: Request): + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + builder = ElasticQueryBuilder() + body = builder.build_update_by_query(reader, payload) + try: + es_res = await helper.update_by_query(reader.get_index_name(), body, refresh=True) + return {"success": True, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.post("/indices/{type_name}/delete-by-query") +async def delete_by_query(type_name: str, payload: DeleteByQueryRequest, request: Request): + reader = MapIndexReader(type_name) + helper = get_elastic_helper(request) + builder = ElasticQueryBuilder() + body = builder.build_delete_by_query(reader, payload) + try: + es_res = await helper.delete_by_query(reader.get_index_name(), body, refresh=True) + return {"success": True, "result": es_res} + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + diff --git a/app/routes/v1/elastic_map.py b/app/routes/v1/elastic_map.py new file mode 100644 index 0000000..6e572d8 --- /dev/null +++ b/app/routes/v1/elastic_map.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Request + +from app.core.map_index_reader import MapIndexReader +from app.core.elastic_query_builder import ElasticQueryBuilder +from app.core.field_processor import FieldProcessor +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) + + +router = APIRouter(tags=["elasticsearch"]) + + +def get_elastic_helper(request: Request): + helper = getattr(request.app.state, "elastic_helper", None) + if helper is None: + raise RuntimeError("Elasticsearch helper not initialized") + return helper + + +# New enhanced endpoints +@router.get("/indices/{type_name}/schema") +async def get_schema(type_name: str, request: Request): + """Get complete schema information for a type.""" + reader = MapIndexReader(type_name) + try: + return { + "success": True, + "result": { + "index_info": reader.get_index_info(), + "query_config": reader.get_query_config(), + "fields": {field: reader.get_field_metadata(field) for field in reader.list_fields()} + } + } + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.get("/indices/{type_name}/fields") +async def get_fields(type_name: str, request: Request): + """Get field information for a type.""" + reader = MapIndexReader(type_name) + try: + return { + "success": True, + "result": { + "all_fields": reader.list_fields(), + "searchable_fields": reader.get_searchable_fields(), + "sortable_fields": reader.get_sortable_fields(), + "exportable_fields": [f for f in reader.list_fields() if reader.get_export_state(f)], + "array_fields": reader.get_array_fields(), + "object_fields": reader.get_object_fields(), + "date_fields": reader.get_date_fields(), + "numeric_fields": reader.get_numeric_fields(), + "boolean_fields": reader.get_boolean_fields(), + "vector_fields": reader.get_vector_fields() + } + } + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.get("/indices/{type_name}/fields/{field_name}") +async def get_field_info(type_name: str, field_name: str, request: Request): + """Get detailed information for a specific field.""" + reader = MapIndexReader(type_name) + try: + field_meta = reader.get_field_meta(field_name) + return { + "success": True, + "result": { + "field_name": field_name, + "metadata": reader.get_field_metadata(field_name), + # "label": reader.get_field_label(field_name), + "validation": reader.validate_field_configuration(field_name), + "join_processes": reader.get_join_processes(field_name), + "processed_fields": reader.get_processed_fields(field_name) + } + } + except KeyError: + raise HTTPException(status_code=404, detail=f"Field '{field_name}' not found") + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +@router.post("/indices/{type_name}/validate") +async def validate_document(type_name: str, document: dict, request: Request): + """Validate a document against the schema.""" + reader = MapIndexReader(type_name) + field_processor = FieldProcessor(reader) + + try: + validation_result = field_processor.validate_document(document) + return { + "success": True, + "result": validation_result + } + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +# @router.post("/indices/{type_name}/process") +# async def process_document(type_name: str, document: dict, request: Request): +# """Process a document based on field configurations.""" +# reader = MapIndexReader(type_name) +# field_processor = FieldProcessor(reader) + +# try: +# processed_doc = field_processor.process_joinning_document(document) +# return { +# "success": True, +# "result": processed_doc +# } +# except Exception as exc: # noqa: BLE001 +# raise HTTPException(status_code=400, detail=str(exc)) + + +@router.get("/indices/{type_name}/advanced-tags") +async def get_advanced_tags(type_name: str, request: Request): + """Get advanced search tags configuration.""" + reader = MapIndexReader(type_name) + try: + return { + "success": True, + "result": { + "tags": reader.get_search_advance_tags(), + "tag_names": reader.list_advanced_tags(), + "collapse_fields": reader.get_collapse_fields() + } + } + except Exception as exc: # noqa: BLE001 + raise HTTPException(status_code=400, detail=str(exc)) + + +# @router.get("/indices/{type_name}/labels") +# async def get_field_labels(type_name: str, request: Request): +# """Get field labels mapping.""" +# reader = MapIndexReader(type_name) +# try: +# labels = {} +# for field in reader.list_fields(): +# label = reader.get_field_label(field) +# if label: +# labels[field] = label + +# return { +# "success": True, +# "result": labels +# } +# except Exception as exc: # noqa: BLE001 +# raise HTTPException(status_code=400, detail=str(exc)) + + diff --git a/app/routes/v1/models.py b/app/routes/v1/models.py new file mode 100644 index 0000000..9c1babc --- /dev/null +++ b/app/routes/v1/models.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator +from typing import Optional, Dict, Any, List +import os + +class SearchRequest(BaseModel): + track_total_hits: Optional[bool] = True + mode_response : str = "elastic" # "normal" , "elastic" + query: Optional[str] = '' + search_type: Optional[str] = "normal" # "normal", "phrase" , "and" + filters: Optional[Dict[str, Any]] = None # { "f_ud": 2, "f_cd": "435234234"} + sort: Optional[List[str]] = None # time_edit:desc, lastTitle, ... + from_: int = Field(0, ge=0, alias="from") + size: int = Field(10, ge=0, le=10000) + collapse_field: Optional[str] = '' + bookmark_id: Optional[str] = '' + + highlight: Optional[Dict[str, Any]] = None + aggregation_fields: Optional[Dict[str, Any]] = None + + # Enhanced fields for new backend.json properties + export_mode: bool = False + validate_fields: bool = True + use_field_boosts: bool = True + aggregation_fields: Optional[List[str]] = None + advanced_search_tags: Optional[List[str]] = None + include_metadata: bool = False + field_type_filter: Optional[str] = None + + search_after: Optional[List[Any]] = None + search_fields: Optional[List[str]] = None + default_search_field: str = "_all" + + + include_fields: Optional[List[str]] = None + exclude_fields: Optional[List[str]] = None + + class Config: + populate_by_name = True + + +class InsertRequest(BaseModel): + id: Optional[str] = None + document: Dict[str, Any] + + +class UpdateByQueryRequest(BaseModel): + filters: Optional[Dict[str, Any]] = None + set_fields: Optional[Dict[str, Any]] = None + script: Optional[Dict[str, Any]] = None + + +class DeleteByQueryRequest(BaseModel): + filters: Optional[Dict[str, Any]] = None + +class ExportToFileRequest(BaseModel): + path_back: str = Field( + ..., + description="Backup directory path", + example="/backup/data" + ) + out_name: str = Field( + "", + description="Output file name (default: index_name)", + example="backup_2024", + max_length=255 + ) + body: Optional[Dict[str, Any]] = Field( + None, + description="Optional search query body", + example={ + "query": { + "match_all": {} + } + } + ) + fields: Optional[List[str]] = Field( + None, + description="List of fields to include (if empty, include all)", + example=["title", "content", "timestamp"] + ) + chunk_size: int = Field( + 1000, + description="Number of documents per chunk", + ge=100, + le=10000, + example=1000 + ) + scroll_timeout: str = Field( + "5m", + description="Scroll timeout for Elasticsearch", + pattern="^[0-9]+[smh]$", + example="5m" + ) + max_documents: Optional[int] = Field( + None, + description="Maximum number of documents to export", + ge=1, + le=10000000, + example=50000 + ) + delay_between_chunks: float = Field( + 0.1, + description="Delay between processing chunks in seconds", + ge=0.0, + le=10.0, + example=0.1 + ) + to_zip: bool = Field( + False, + description="Whether to compress output to ZIP file", + example=True + ) + + @validator('path_back') + def validate_path_back(cls, v): + """Validate backup directory path""" + if not v: + raise ValueError('path_back cannot be empty') + + # Check if path contains invalid characters + invalid_chars = ['..', '~', '*', '?', '"', '<', '>', '|'] + if any(char in v for char in invalid_chars): + raise ValueError(f'Invalid characters in path: {v}') + + return v + + @validator('out_name') + def validate_out_name(cls, v, values): + """Validate output file name""" + if not v: + # Use index_name as default + return values.get('index_name', '') + + # File name validation + invalid_chars = ['/', '\\', ':', '*', '?', '"', '<', '>', '|'] + if any(char in v for char in invalid_chars): + raise ValueError(f'Invalid characters in output name: {v}') + + return v + + @validator('chunk_size') + def validate_chunk_size(cls, v): + """Validate chunk size""" + if v < 100: + raise ValueError('chunk_size must be at least 100') + if v > 10000: + raise ValueError('chunk_size cannot exceed 10000') + return v + + @validator('scroll_timeout') + def validate_scroll_timeout(cls, v): + """Validate scroll timeout format""" + import re + pattern = re.compile(r'^[0-9]+[smh]$') + if not pattern.match(v): + raise ValueError('scroll_timeout must be in format: [number][s|m|h] (e.g., 5m, 30s, 1h)') + + # Extract number and unit + num = int(v[:-1]) + unit = v[-1] + + if unit == 's' and num > 3600: # 1 hour + raise ValueError('Scroll timeout in seconds cannot exceed 3600 (1 hour)') + elif unit == 'm' and num > 60: # 1 hour + raise ValueError('Scroll timeout in minutes cannot exceed 60 (1 hour)') + elif unit == 'h' and num > 24: # 1 day + raise ValueError('Scroll timeout in hours cannot exceed 24 (1 day)') + + return v + + @validator('max_documents') + def validate_max_documents(cls, v): + """Validate maximum documents""" + if v is not None and v <= 0: + raise ValueError('max_documents must be positive') + return v + + @validator('delay_between_chunks') + def validate_delay_between_chunks(cls, v): + """Validate delay between chunks""" + if v < 0: + raise ValueError('delay_between_chunks cannot be negative') + if v > 10.0: + raise ValueError('delay_between_chunks cannot exceed 10 seconds') + return v + + @validator('fields') + def validate_fields(cls, v): + """Validate fields list""" + if v is not None: + if len(v) == 0: + raise ValueError('fields list cannot be empty') + + # Check for duplicate fields + if len(v) != len(set(v)): + raise ValueError('fields list contains duplicates') + + # Validate each field name + for field in v: + if not field or not field.strip(): + raise ValueError('field name cannot be empty') + if len(field) > 255: + raise ValueError(f'field name too long: {field}') + + return v + + class Config: + schema_extra = { + "example": { + "path_back": "/backup/data", + "out_name": "backup_2024", + "body": { + "query": { + "match_all": {} + } + }, + "fields": ["title", "content", "timestamp"], + "chunk_size": 1000, + "scroll_timeout": "5m", + "max_documents": 50000, + "delay_between_chunks": 0.1, + "to_zip": True + } + } diff --git a/app/routes/voice/models.py b/app/routes/voice/models.py new file mode 100644 index 0000000..566749b --- /dev/null +++ b/app/routes/voice/models.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field +from app.routes.v1.models import ( + SearchRequest, +# InsertRequest, +# UpdateByQueryRequest, +# DeleteByQueryRequest, + ) + + +class voiceSearchRequest(SearchRequest): + query: str = '' + diff --git a/app/routes/voice/voice.py b/app/routes/voice/voice.py new file mode 100644 index 0000000..790cc70 --- /dev/null +++ b/app/routes/voice/voice.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException, Request + +from app.core.map_index_reader import MapIndexReader +from app.core.elastic_query_builder import ElasticQueryBuilder +from app.core.field_processor import FieldProcessor +from app.core.response_helper import ResponseHelper +from app.routes.v1.models import ( + SearchRequest, + InsertRequest, + UpdateByQueryRequest, + DeleteByQueryRequest, +) +# from app.routes.voice.models import ( +# voiceSearchRequest, +# ) +from typing import Any, Dict, List, Optional +import time +from app.routes.v1.elastic import ( + search, +) +import uuid +import requests +from app.config.settings import get_settings, Settings +from bs4 import BeautifulSoup + +router = APIRouter(tags=["voice"]) + + +@router.post("/{type_name}/search") +async def search_voice(type_name: str, payload: SearchRequest, request: Request): + + payload.track_total_hits = True + payload.search_type = "phrase" + # payload.size = 2 + + response = await search(type_name, payload, request ) + + # print(response) + took = response.get("took", 0) + aggregations = response.get("aggregations", {}) + hits_section = response.get("hits", {}) + items = hits_section.get("hits", []) + total = hits_section.get("total", {}) + # print(total) + res = [] + highlight_ext = [] + for item in items: + _id = item.get("_id", 0) + source = item.get("_source", {}) + content = source.get("content", "") + time_words = source.get("time_words", "") + highlight = item.get("highlight", []) + highlight_content = [] + h_key = '' + if 'content.ph' in highlight : + highlight_content = highlight["content.ph"] + h_key = 'content.ph' + elif 'content.fa' in highlight : + highlight_content = highlight["content.fa"] + h_key = 'content.fa' + elif 'content' in highlight : + highlight_content = highlight["content"] + h_key = 'content' + + highlight_ext = [] + for i in highlight_content: + soup = BeautifulSoup(i, 'html.parser') + clean = soup.get_text() + start_word = {} + end_word = {} + start_pos = content.find(clean) + if start_pos != -1: + end_pos = start_pos + len(clean) + else : + continue + + for word in time_words: + if word["start_offset"] == start_pos: + start_word = word + break + for word in time_words: + if word["end_offset"] == end_pos: + end_word = word + break + + highlight_ext.append({ + "start": start_word.get("start",""), + "end": end_word.get("end",""), + "start_offset": start_pos, + "end_offset": end_pos + }) + + if "content" in source : + del source["content"] + if "time_words" in source : + del source["time_words"] + + new_item = { + "_id" : _id, + "_source" : source, + "highlight": { 'content' : highlight[h_key] }, + # "highlight" : highlight, + "voice_times" : highlight_ext + } + + res.append(new_item) + + result_end = { + "took" : took, + "status" : 0, + "success": True, + "message" : "", + "hits" : { + "hits": res, + "total": total + } + } + + return result_end + diff --git a/app/schemas/aiword_backend.json b/app/schemas/aiword_backend.json new file mode 100644 index 0000000..2fe6a9b --- /dev/null +++ b/app/schemas/aiword_backend.json @@ -0,0 +1,127 @@ +{ + "index": { + "name": "ai_nlp_word", + "aliases": [], + "index_key": "aiword" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "is_correct": { + "type": "keyword" + }, + "nearest_correct_word": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "stem": { + "type": "keyword" + }, + "origin": { + "type": "keyword" + }, + "word_classes": { + "type": "keyword" + }, + "word_tags": { + "type": "keyword" + }, + "is_proper_noun": { + "type": "keyword" + }, + "ner_description": { + "type": "text" + }, + "llm_description": { + "type": "text" + }, + "user_description": { + "type": "text" + }, + "admin_description": { + "type": "text" + }, + "confidence": { + "type": "float" + }, + "ref_key": { + "type": "keyword" + }, + + "language_key": { + "type": "keyword" + }, + "domain_tags": { + "type": "keyword" + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + }, + "validation": { + "required": [ + "title" + ], + "default_value":{ + "is_correct" : "نامشخص" + } + }, + "include_fields": [], + "is_array": [ + "word_classes", + "word_tags", + "domain_tags" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "title": "asc" + }, + { + "time_edit": "desc" + } + ], + "normal": { + "phrase": { + "title": 5, + "nearest_correct_word": 3, + "stem": 2, + "origin": 2 + }, + "match": { + "title": 3, + "nearest_correct_word": 2, + "stem": 1, + "origin": 1 + } + }, + "filter_keys": { + "f_wc": "word_classes", + "f_wt": "word_tags", + "f_lk": "language_key", + "f_dt": "domain_tags", + "f_rk": "ref_key" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/aiword_map.json b/app/schemas/mappings/aiword_map.json new file mode 100644 index 0000000..f58b233 --- /dev/null +++ b/app/schemas/mappings/aiword_map.json @@ -0,0 +1,216 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "is_correct": { + "type": "keyword" + }, + "nearest_correct_word": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "stem": { + "type": "keyword" + }, + "origin": { + "type": "keyword" + }, + "word_classes": { + "type": "keyword" + }, + "word_tags": { + "type": "keyword" + }, + "is_proper_noun": { + "type": "keyword" + }, + "ner_description": { + "type": "text" + }, + "llm_description": { + "type": "text" + }, + "user_description": { + "type": "text" + }, + "admin_description": { + "type": "text" + }, + "confidence": { + "type": "float" + }, + "ref_key": { + "type": "keyword" + }, + "language_key": { + "type": "keyword" + }, + "domain_tags": { + "type": "keyword" + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/mjplan_map.json b/app/schemas/mappings/mjplan_map.json new file mode 100644 index 0000000..baec967 --- /dev/null +++ b/app/schemas/mappings/mjplan_map.json @@ -0,0 +1,217 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text" + }, + "tcode": { + "type": "keyword" + }, + "version_key": { + "type": "keyword" + }, + "parent_id": { + "type": "keyword" + }, + "child_order": { + "type": "float" + }, + "full_path": { + "type": "text" + }, + "level": { + "type": "integer" + }, + "ai_section_ids": { + "type": "keyword" + }, + "section_ids": { + "type": "keyword" + }, + "user_actions": { + "properties": { + "user_id": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "description": { + "type": "text" + }, + "property_key": { + "type": "keyword" + }, + "action_key": { + "type": "keyword" + }, + "action_value": { + "type": "text" + }, + "action": { + "type": "object" + } + } + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/mjpsection_map.json b/app/schemas/mappings/mjpsection_map.json new file mode 100644 index 0000000..2ce2c65 --- /dev/null +++ b/app/schemas/mappings/mjpsection_map.json @@ -0,0 +1,243 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "plan_id": { + "type": "keyword" + }, + "tcode": { + "type": "keyword" + }, + "version_key": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text" + }, + "footnotes": { + "properties": { + "text": { + "type": "text" + }, + "main_type": { + "type": "keyword" + }, + "refrence_ids": { + "type": "keyword" + } + } + }, + "section_info": { + "properties": { + "qanon_title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "full_path": { + "type": "text" + }, + "ts_date": { + "type": "date" + }, + "ts_ref": { + "type": "keyword" + }, + "state_etebar": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "user_actions": { + "properties": { + "user_id": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "description": { + "type": "text" + }, + "property_key": { + "type": "keyword" + }, + "action_key": { + "type": "keyword" + }, + "action_value": { + "type": "text" + }, + "action": { + "type": "object" + } + } + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/mn_term_map.json b/app/schemas/mappings/mn_term_map.json new file mode 100644 index 0000000..bedc96c --- /dev/null +++ b/app/schemas/mappings/mn_term_map.json @@ -0,0 +1,224 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "main_type": { + "type": "keyword" + }, + "begin_date": { + "type": "date" + }, + "end_date": { + "type": "date" + }, + "begin_year": { + "type": "integer" + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "format": { + "type": "keyword" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/mnvoice_map.json b/app/schemas/mappings/mnvoice_map.json new file mode 100644 index 0000000..e7e0939 --- /dev/null +++ b/app/schemas/mappings/mnvoice_map.json @@ -0,0 +1,249 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "ref_id": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "branch": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "begin_year": { + "type": "integer", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 20 + } + } + }, + "format": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "sound_title": { + "type": "keyword" + }, + "sound_link": { + "type": "keyword" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "time_words": { + "properties": { + "start_offset": { + "type": "integer" + }, + "end_offset": { + "type": "integer" + }, + "word": { + "type": "text" + }, + "start": { + "type": "float" + }, + "end": { + "type": "float" + } + } + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/qachat_map.json b/app/schemas/mappings/qachat_map.json new file mode 100644 index 0000000..2e9d997 --- /dev/null +++ b/app/schemas/mappings/qachat_map.json @@ -0,0 +1,201 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "user_id": { + "type": "integer" + }, + "user_query": { + "type": "text" + }, + "model_key": { + "type": "keyword" + }, + "retrived_passage": { + "type": "text" + }, + "retrived_ref_ids": { + "type": "keyword" + }, + "retrived_duration": { + "type": "integer" + }, + "prompt_type": { + "type": "keyword" + }, + "llm_duration": { + "type": "integer" + }, + "full_duration": { + "type": "integer" + }, + "time_create": { + "type": "date" + }, + "used_ref_ids": { + "type": "keyword" + }, + "prompt_answer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/qaqanon_map.json b/app/schemas/mappings/qaqanon_map.json new file mode 100644 index 0000000..276a5d2 --- /dev/null +++ b/app/schemas/mappings/qaqanon_map.json @@ -0,0 +1,778 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "embeddings": { + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/test_map.json b/app/schemas/mappings/test_map.json new file mode 100644 index 0000000..bb6c086 --- /dev/null +++ b/app/schemas/mappings/test_map.json @@ -0,0 +1,774 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "keyword": { + "type": "keyword" + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "embeddings": { + "type": "dense_vector" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mappings/use_for_test_map.json b/app/schemas/mappings/use_for_test_map.json new file mode 100644 index 0000000..60cfa05 --- /dev/null +++ b/app/schemas/mappings/use_for_test_map.json @@ -0,0 +1,777 @@ +{ + "mappings": { + "date_detection": false, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "keyword": { + "type": "keyword" + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "embeddings": { + "type": "dense_vector" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "id": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + } + }, + "settings": { + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } + } +} \ No newline at end of file diff --git a/app/schemas/mjdomain_backend.json b/app/schemas/mjdomain_backend.json new file mode 100644 index 0000000..c998d1a --- /dev/null +++ b/app/schemas/mjdomain_backend.json @@ -0,0 +1,170 @@ +{ + "index": { + "name": "mj_qa_domain", + "aliases": [], + "index_key": "mjdomain" + }, + "properties": { + "domain_id": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "main_type": { + "type": "keyword" + }, + "relevancies": { + "properties": { + "domain_id": { + "type": "keyword" + }, + "domain_name": { + "type": "keyword" + }, + "domain_title": { + "type": "keyword" + }, + "relevancy_percentage": { + "type": "float" + }, + "relevancy_score": { + "type": "float" + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "top_qanons": { + "properties": { + "importance_score": { + "type": "float" + }, + "max_label": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "max_score": { + "type": "float" + }, + "qanon_id": { + "type": "keyword" + }, + "qanon_title": { + "type": "keyword" + }, + "scaled_importance": { + "type": "float" + } + } + }, + "general_words": { + "properties": { + "word": { + "type": "keyword" + }, + "count": { + "type": "integer" + } + } + }, + "own_words": { + "properties": { + "word": { + "type": "keyword" + }, + "count": { + "type": "integer" + } + } + }, + "code_words": { + "properties": { + "domain_id": { + "type": "keyword" + }, + "domain_name": { + "type": "keyword" + }, + "shared_count": { + "type": "integer" + }, + "union_words": { + "properties": { + "word": { + "type": "keyword" + }, + "count": { + "type": "integer" + } + } + }, + "delta_words": { + "properties": { + "word": { + "type": "keyword" + }, + "count": { + "type": "integer" + } + } + }, + "count": { + "type": "integer" + } + } + } + }, + "validation": { + "required": [] + }, + "include_fields": [], + "exclude_fields": [ + "general_words", "own_words", "code_words" + ], + "is_array": [ + "relevancies", + "top_qanons", + "general_words", + "own_words", + "code_words" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "domain_id": "asc" + } + ], + "normal": { + "phrase": { + "title": 12, + "relevancies.domain_name": 10, + "top_qanons.qanon_title": 9 + }, + "match": { + "title": 4, + "relevancies.domain_name": 2, + "top_qanons.qanon_title": 2 + } + }, + "filter_keys": { + "f_mt": "main_type", + "f_di": "domain_id" + } + } +} \ No newline at end of file diff --git a/app/schemas/mjplan_backend.json b/app/schemas/mjplan_backend.json new file mode 100644 index 0000000..f99ac4f --- /dev/null +++ b/app/schemas/mjplan_backend.json @@ -0,0 +1,115 @@ +{ + "index": { + "name": "mj_plan", + "aliases": [], + "index_key": "mjplan" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text" + }, + "tcode": { + "type": "keyword" + }, + "version_key": { + "type": "keyword" + }, + "parent_id": { + "type": "keyword" + }, + "child_order": { + "type": "float" + }, + "full_path": { + "type": "text" + }, + "level": { + "type": "integer" + }, + "ai_section_ids": { + "type": "keyword" + }, + "section_ids": { + "type": "keyword" + }, + "user_actions": { + "properties": { + "user_id": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "description": { + "type": "text" + }, + "property_key": { + "type": "keyword" + }, + "action_key": { + "type": "keyword" + }, + "action_value": { + "type": "text" + }, + "action": { + "type": "object" + } + } + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + }, + "validation": { + "required": [ + "version_key", + "tcode", + "title" + ] + }, + "include_fields": [], + "exclude_fields": [], + "is_array": [ + "section_ids", + "ai_section_ids", + "user_actions" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "time_create": "desc" + } + ], + "normal": { + "phrase": { + "title": 6, + "content": 4 + }, + "match": { + "title": 3, + "content": 2 + } + }, + "filter_keys": { + } + } +} \ No newline at end of file diff --git a/app/schemas/mjpsection_backend.json b/app/schemas/mjpsection_backend.json new file mode 100644 index 0000000..b295ff6 --- /dev/null +++ b/app/schemas/mjpsection_backend.json @@ -0,0 +1,141 @@ +{ + "index": { + "name": "mj_plan_section", + "aliases": [], + "index_key": "mjpsection" + }, + "properties": { + "id": { + "type": "keyword" + }, + "plan_id": { + "type": "keyword" + }, + "tcode": { + "type": "keyword" + }, + "version_key": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text" + }, + "footnotes": { + "properties": { + "text": { + "type": "text" + }, + "main_type": { + "type": "keyword" + }, + "refrence_ids": { + "type": "keyword" + } + } + }, + "section_info": { + "properties": { + "qanon_title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "full_path": { + "type": "text" + }, + "ts_date": { + "type": "date" + }, + "ts_ref": { + "type": "keyword" + }, + "state_etebar": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "user_actions": { + "properties": { + "user_id": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "description": { + "type": "text" + }, + "property_key": { + "type": "keyword" + }, + "action_key": { + "type": "keyword" + }, + "action_value": { + "type": "text" + }, + "action": { + "type": "object" + } + } + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + }, + "validation": { + "required": [ + "plan_id", + "tcode" + ] + }, + "include_fields": [], + "exclude_fields": [], + "is_array": [ + "footnotes", + "user_actions" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "time_create": "desc" + } + ], + "normal": { + "phrase": { + "title": 6, + "content": 4, + "section.content": 4 + }, + "match": { + "title": 3, + "content": 2, + "section.content": 2 + } + }, + "filter_keys": { + } + } +} \ No newline at end of file diff --git a/app/schemas/mn_term_backend.json b/app/schemas/mn_term_backend.json new file mode 100644 index 0000000..154036a --- /dev/null +++ b/app/schemas/mn_term_backend.json @@ -0,0 +1,86 @@ +{ + "index": { + "name": "mn_term", + "aliases": [], + "index_key": "mn_term" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "main_type": { + "type": "keyword" + }, + "begin_date": { + "type": "date" + }, + "end_date": { + "type": "date" + }, + "begin_year": { + "type": "integer" + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "format": { + "type": "keyword" + } + } +} \ No newline at end of file diff --git a/app/schemas/mnvoice_backend.json b/app/schemas/mnvoice_backend.json new file mode 100644 index 0000000..7ecea6e --- /dev/null +++ b/app/schemas/mnvoice_backend.json @@ -0,0 +1,154 @@ +{ + "index": { + "name": "mn_spr_sanad", + "aliases": [], + "index_key": "mnvoice" + }, + "properties": { + "ref_id": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "branch": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "begin_year": { + "type": "integer", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 20 + } + } + }, + "format": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "sound_title": { + "type": "keyword" + }, + "sound_link": { + "type": "keyword" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "time_words": { + "properties": { + "start_offset": { + "type": "integer" + }, + "end_offset": { + "type": "integer" + }, + "word": { + "type": "text" + }, + "start": { + "type": "float" + }, + "end": { + "type": "float" + } + } + } + }, + "validation": { + "required": [] + }, + "include_fields": [], + "is_array": [ + "time_words" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ref_id": "asc" + }, + { + "sound_link": "asc" + } + ], + "normal": { + "phrase": { + "content.ph": 4 + }, + "match": { + "content": 2 + } + }, + "filter_keys": { + "f_au": "author", + "f_tg": "tags", + "f_br": "branch", + "f_fo": "format", + "f_ye": "begin_year" + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "content.fa": {}, + "content.ph": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/monir/mn_meet.json b/app/schemas/monir/mn_meet.json new file mode 100644 index 0000000..12f48c5 --- /dev/null +++ b/app/schemas/monir/mn_meet.json @@ -0,0 +1,275 @@ +{ + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "person_code": { + "type": "keyword" + }, + "research_code": { + "type": "keyword" + }, + "meet_code": { + "type": "keyword" + }, + "old_meet_id": { + "type": "integer" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "meet_no": { + "type": "integer" + }, + "term_info": { + "properties": { + "id": { + "type": "keyword" + }, + "format": { + "type": "keyword" + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "subtitle": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "subjects": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "allwords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "verb": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "sanad_year": { + "type": "integer" + }, + "sanad_date": { + "type": "date" + }, + "amplify": { + "type": "text" + }, + "ralation": { + "type": "keyword" + }, + "city": { + "type": "keyword" + }, + "place": { + "type": "keyword" + }, + "address": { + "type": "text" + }, + "audience": { + "type": "text" + }, + "attendees": { + "type": "text" + }, + "report_info": { + "properties": { + "film_count": { + "type": "integer" + }, + "sound_count": { + "type": "integer" + }, + "photo_count": { + "type": "integer" + }, + "file_count": { + "type": "integer" + }, + "is_address": { + "type": "integer" + }, + "is_mindex": { + "type": "integer" + }, + "is_mintro": { + "type": "integer" + }, + "is_sanad_data1": { + "type": "integer" + }, + "is_sanad_data2": { + "type": "integer" + } + } + }, + "mindex": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "mintro": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "completion": { + "type": "completion", + "analyzer": "simple", + "preserve_separators": true, + "preserve_position_increments": true, + "max_input_length": 50 + }, + "sort_date_timestamp": { + "type": "long" + }, + "permit_tags": { + "type": "keyword" + }, + "resource_info": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + } +} \ No newline at end of file diff --git a/app/schemas/monir/mn_meet_entity.json b/app/schemas/monir/mn_meet_entity.json new file mode 100644 index 0000000..bdf9970 --- /dev/null +++ b/app/schemas/monir/mn_meet_entity.json @@ -0,0 +1,130 @@ +{ + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "permit_tags": { + "type": "keyword" + }, + "search_state": { + "type": "keyword" + }, + "user_create": { + "type": "keyword" + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + }, + "file_links": { + "properties": { + "title": { + "type": "keyword" + }, + "link": { + "type": "text" + }, + "type": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "meet_info": { + "properties": { + "id": { + "type": "keyword" + }, + "format": { + "type": "keyword" + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "term_info": { + "properties": { + "id": { + "type": "keyword" + }, + "format": { + "type": "keyword" + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } +} \ No newline at end of file diff --git a/app/schemas/monir/mn_sections.json b/app/schemas/monir/mn_sections.json new file mode 100644 index 0000000..73894d7 --- /dev/null +++ b/app/schemas/monir/mn_sections.json @@ -0,0 +1,208 @@ +{ + "main_type": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "html": { + "type": "text" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_fa", + "search_analyzer": "phrase_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + } + } + }, + "meet_info": { + "properties": { + "id": { + "type": "keyword" + }, + "format": { + "type": "keyword" + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "term_info": { + "properties": { + "id": { + "type": "keyword" + }, + "format": { + "type": "keyword" + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "tree_info": { + "properties": { + "parent_id": { + "type": "keyword" + }, + "child_order": { + "type": "double" + }, + "level": { + "type": "integer" + }, + "full_path": { + "type": "text" + }, + "title": { + "type": "text" + }, + "path_headings": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "content_len": { + "type": "integer" + }, + "word_len": { + "type": "integer" + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "nlp_parses": { + "properties": { + "main_type": { + "type": "keyword" + }, + "nlp_type": { + "type": "keyword" + }, + "begin": { + "type": "integer" + }, + "end": { + "type": "integer" + }, + "text": { + "type": "text" + }, + "referes": { + "type": "keyword" + }, + "dependency_infos": { + "type": "nested" + } + } + }, + "embeddings": { + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine" + }, + "file_links": { + "properties": { + "title": { + "type": "keyword" + }, + "link": { + "type": "text" + }, + "type": { + "type": "text" + } + } + }, + "time_edit": { + "type": "date" + }, + "user_edit": { + "type": "integer" + }, + "user_logs": { + "properties": { + "id": { + "type": "keyword" + }, + "user_id": { + "type": "integer" + }, + "username": { + "type": "keyword" + }, + "time_edit": { + "type": "date" + }, + "property": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/app/schemas/monir/mn_term.json b/app/schemas/monir/mn_term.json new file mode 100644 index 0000000..b6e77c6 --- /dev/null +++ b/app/schemas/monir/mn_term.json @@ -0,0 +1,79 @@ +{ + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text", + "analyzer": "normal_analyzer_persian", + "search_analyzer": "normal_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + }, + "ar": { + "type": "text", + "analyzer": "normal_analyzer_arabic", + "search_analyzer": "normal_analyzer_arabic", + "search_quote_analyzer": "phrase_analyzer_arabic" + }, + "ph": { + "type": "text", + "analyzer": "phrase_analyzer_persian", + "search_analyzer": "phrase_analyzer_persian", + "search_quote_analyzer": "phrase_analyzer_persian" + } + } + }, + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "main_type": { + "type": "keyword" + }, + "begin_date": { + "type": "date" + }, + "end_date": { + "type": "date" + }, + "begin_year": { + "type": "integer" + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "format": { + "type": "keyword" + } +} \ No newline at end of file diff --git a/app/schemas/monir/mngroup_backend.json b/app/schemas/monir/mngroup_backend.json new file mode 100644 index 0000000..eca02b7 --- /dev/null +++ b/app/schemas/monir/mngroup_backend.json @@ -0,0 +1,49 @@ +{ + "index": { + "name": "mn_group", + "aliases": [], + "index_key": "mngroup" + }, + "properties": { + "id": { + "type": "keyword" + }, + "main_type": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "content" + }, + "user_id": { + "type": "integer" + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + + }, + "validation": { + "required": [ + + ], + "default_value": { + } + }, + "include_fields": [], + "exclude_fields": [], + "is_array": [ + ] + +} \ No newline at end of file diff --git a/app/schemas/monir/mnmeet_backend.json b/app/schemas/monir/mnmeet_backend.json new file mode 100644 index 0000000..1af7012 --- /dev/null +++ b/app/schemas/monir/mnmeet_backend.json @@ -0,0 +1,49 @@ +{ + "index": { + "name": "mn_meet", + "aliases": [], + "index_key": "mnmeet" + }, + "properties": { + "id": { + "type": "keyword" + }, + "main_type": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content": { + "type": "content" + }, + "user_id": { + "type": "integer" + }, + "time_create": { + "type": "date" + }, + "time_edit": { + "type": "date" + } + + }, + "validation": { + "required": [ + + ], + "default_value": { + } + }, + "include_fields": [], + "exclude_fields": [], + "is_array": [ + ] + +} \ No newline at end of file diff --git a/app/schemas/old1/1_qanon_info_old_schema.json b/app/schemas/old1/1_qanon_info_old_schema.json new file mode 100644 index 0000000..3c501a6 --- /dev/null +++ b/app/schemas/old1/1_qanon_info_old_schema.json @@ -0,0 +1,207 @@ +{ + "index_name": "mj_qa_qanon", + "file_map": "mj_qa_qanon.json", + "index_key": "qaqanon", + "fields": [ + { + "key": "id", + "label": "شناسه", + "is_array": 0, + "is_sortable": 1, + "is_need_keyword": 0, + "filter_key": "", + "filter_by_more": 0, + "export_state": 1, + "is_autoid": 0 + }, + { + "key": "title", + "lable": "عنوان", + "is_sortable": 1, + "is_need_keyword": 1, + "query_normal_boost": 3 + }, + { + "key": "content", + "lable": "متن اصلی", + "is_sortable": 1, + "is_need_keyword": 1, + "query_normal_boost": 1 + }, + { + "key": "embeddings", + "label": "بردار معنایی", + "export_state": 0 + }, + { + "key": "qanon_etebar", + "lable": "اعتبار قانون", + "is_sortable": 1 + }, + { + "key": "expire_date", + "lable": "تاریخ انقضاء", + "is_sortable": 1 + }, + { + "key": "fixed_date", + "lable": "تاریخ دائمی شدن", + "is_sortable": 1 + }, + { + "key": "renewal_date", + "lable": "تاریخ تمدید", + "is_sortable": 1 + }, + { + "key": "renewal_date", + "lable": "تاریخ تمدید", + "is_sortable": 1 + }, + { + "key": "exec_duration", + "lable": "مدت اجراء" + }, + { + "key": "ref_key", + "lable": "منبع اخذ داده", + "is_sortable": 1, + "export_state": 0 + }, + { + "key": "ts_date", + "lable": "تاریخ تصویب", + "is_sortable": 1, + "join_to": [ + { + "process": "jalaliTotsmp", + "property": "sort_date_timestamp", + "desc": "jalaliTotsmp(\"ts_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "ts_year", + "desc": "1403/05/06 --> 1403" + } + ] + }, + { + "key": "ts_year", + "lable": "سال تصویب", + "is_sortable": 1 + }, + { + "key": "ts_ref", + "lable": "مرجع تصویب", + "is_sortable": 1, + "is_need_keyword": 1 + }, + { + "key": "term", + "lable": "عنوان دوره", + "is_sortable": 1 + }, + { + "key": "term_number", + "lable": "شماره دوره", + "is_sortable": 1 + }, + { + "key": "main_type", + "lable": "نوع اصلی/تبعی", + "is_sortable": 1 + }, + { + "key": "sub_type", + "lable": "نوع", + "is_sortable": 1 + }, + { + "key": "title_type", + "lable": "نوع قالب", + "is_sortable": 1, + "is_need_keyword": 1 + }, + { + "key": "section_len", + "lable": "تعداد حرف", + "export_state": 0 + }, + { + "key": "exceuter_organs", + "lable": "سازمان مجری", + "is_array": 1, + "is_object": 1, + "un_repeat_keys": ["id"] + }, + { + "key": "receiver_organs", + "lable": "سازمان‌های دریافت کننده", + "is_array": 1, + "is_object": 1, + "un_repeat_keys": ["id"] + }, + { + "key": "sort_date_timestamp", + "lable": "تاریخ مرتب سازی", + "export_state": 0 + }, + { + "key": "qanon_relations", + "lable": "قوانین مرتبط", + "is_array": 1, + "is_object": 1, + "un_repeat_keys": ["to_id", "from_section_id", "from_type"], + "export_state": 0 + }, + { + "key": "opinion_relations", + "lable": "آراء مرتبط", + "is_array": 1, + "is_object": 1, + "un_repeat_keys": ["to_id", "from_section_id", "from_type"], + "export_state": 0 + }, + { + "key": "before_info", + "lable": "اطلاعات پیشا قانون", + "is_array": 0, + "is_object": 1, + "un_repeat_keys": ["ref_id"], + "export_state": 0 + }, + { + "key": "letters", + "lable": "ارجاعات و ابلاغات", + "is_array": 1, + "is_object": 1, + "un_repeat_keys": ["type", "date", "number"], + "export_state": 0 + }, + { + "key": "is_delete", + "lable": "وضعیت حذف", + "is_sortable": 1, + "export_state": 0 + }, + { + "key": "time_edit", + "lable": "زمان ویرایش", + "is_sortable": 1 + }, + { + "key": "doc_tags", + "lable": "ویژگیهای سند", + "is_sortable": 1, + "is_array": 1, + "is_object": 0 + }, + { + "key": "doc_states", + "lable": "برچسبهای وضعیت سند", + "is_sortable": 1, + "is_array": 1, + "is_object": 0 + } + ] +} \ No newline at end of file diff --git a/app/schemas/old1/2_test_backend_old.json b/app/schemas/old1/2_test_backend_old.json new file mode 100644 index 0000000..b729493 --- /dev/null +++ b/app/schemas/old1/2_test_backend_old.json @@ -0,0 +1,345 @@ +{ + "index": { + "name": "test_index", + "aliases": [ + "test_index_alias" + ], + "index_key": "test", + "file_map": "backend_map.json" + }, + "properties": { + "id": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": true, + "join_to": [], + "label": "شناسه", + "filter_key": "", + "is_autoid": false + }, + "title": { + "type": "text", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": true, + "query_boost": 2.0, + "query_normal_boost": 3.0, + "needs_keyword": true, + "required": true, + "join_to": [], + "label": "عنوان", + "filter_key": "", + "is_autoid": false + }, + "content": { + "type": "text", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": true, + "query_boost": 1.0, + "query_normal_boost": 1.0, + "needs_keyword": true, + "required": false, + "join_to": [], + "label": "متن اصلی", + "filter_key": "", + "is_autoid": false + }, + "embeddings": { + "type": "dense_vector", + "return": false, + "sortable": false, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "بردار معنایی", + "filter_key": "", + "is_autoid": false + }, + "category": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 1.5, + "query_normal_boost": 1.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "دسته‌بندی", + "filter_key": "", + "is_autoid": false + }, + "tags": { + "type": "keyword", + "return": true, + "sortable": false, + "array": true, + "object": false, + "highlight": false, + "query_boost": 1.0, + "query_normal_boost": 0.8, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "برچسب‌ها", + "filter_key": "", + "is_autoid": false + }, + "author": { + "type": "object", + "return": true, + "sortable": false, + "array": false, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "نویسنده", + "filter_key": "", + "is_autoid": false, + "un_repeat_keys": [ + "id" + ] + }, + "status": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "وضعیت", + "filter_key": "", + "is_autoid": false + }, + "created_date": { + "type": "date", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [ + { + "process": "jalaliTotsmp", + "property": "sort_date_timestamp", + "desc": "jalaliTotsmp(\"created_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "created_year", + "desc": "1403/05/06 --> 1403" + } + ], + "label": "تاریخ ایجاد", + "filter_key": "", + "is_autoid": false + }, + "modified_date": { + "type": "date", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "تاریخ ویرایش", + "filter_key": "", + "is_autoid": false + }, + "sort_date_timestamp": { + "type": "long", + "return": false, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "تاریخ مرتب سازی", + "filter_key": "", + "is_autoid": false + }, + "created_year": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "سال ایجاد", + "filter_key": "", + "is_autoid": false + }, + "priority": { + "type": "integer", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "اولویت", + "filter_key": "", + "is_autoid": false + }, + "is_deleted": { + "type": "boolean", + "return": false, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "وضعیت حذف", + "filter_key": "", + "is_autoid": false + }, + "metadata": { + "type": "object", + "return": true, + "sortable": false, + "array": false, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "متادیتا", + "filter_key": "", + "is_autoid": false + }, + "related_items": { + "type": "object", + "return": true, + "sortable": false, + "array": true, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "موارد مرتبط", + "filter_key": "", + "is_autoid": false, + "un_repeat_keys": [ + "id", + "type" + ] + } + }, + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "created_date": "desc" + } + ], + "collapse_field": null, + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title": {}, + "content": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/old1/qaqanon_backend_old1.json b/app/schemas/old1/qaqanon_backend_old1.json new file mode 100644 index 0000000..599215c --- /dev/null +++ b/app/schemas/old1/qaqanon_backend_old1.json @@ -0,0 +1,479 @@ +{ + "index": { + "name": "mj_qa_qanon", + "aliases": [], + "index_key": "qaqanon", + "file_map": "qaqanon_map.json" + }, + "fields": { + "id": { + "type": "keyword", + "label": "شناسه", + "sortable": true, + "required": true + }, + "title": { + "type": "text", + "label": "عنوان", + "sortable": true, + "highlight": true, + "query_boost": 2.0, + "query_normal_boost": 3.0, + "needs_keyword": true, + "required": true, + "sub_fields": [ + "fa", + "ph", + "keyword" + ] + }, + "title_popular": { + "type": "text", + "label": "عنوان مشهور", + "highlight": true, + "query_boost": 2.0, + "query_normal_boost": 3.0 + }, + "content": { + "type": "text", + "label": "متن اصلی", + "highlight": true, + "query_boost": 1.0, + "query_normal_boost": 1.0, + "sub_fields": [ + "fa", + "ph" + ] + }, + "embeddings": { + "type": "dense_vector", + "label": "بردار معنایی", + "return": false + }, + "initial": { + "type": "properties", + "label": "مرحله تقدیم", + "object": true, + "fields": { + "title": { + "type": "text", + "label": "عنوان ابتدائی" + }, + "content": { + "type": "text", + "label": "متن ابتدائی" + }, + "approuve_type": { + "type": "keyword", + "label": "نوع مصوبه" + }, + "creators": { + "type": "keyword", + "array": true, + "label": "اسامی نمایندگان" + }, + "signers": { + "type": "keyword", + "array": true, + "label": "اسامی سایر نمایندگان امضاء کننده" + }, + "approuve_number": { + "type": "text", + "label": "" + }, + "approuve_date": { + "type": "date", + "label": "" + }, + "letter_number": { + "type": "text", + "label": "" + }, + "letter_date": { + "type": "date", + "label": "" + }, + "letter_signer": { + "type": "keyword", + "label": "" + }, + "ministers": { + "type": "keyword", + "label": "" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "category": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 1.5, + "query_normal_boost": 1.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "دسته‌بندی", + "filter_key": "", + "is_autoid": false + }, + "tags": { + "type": "keyword", + "return": true, + "sortable": false, + "array": true, + "object": false, + "highlight": false, + "query_boost": 1.0, + "query_normal_boost": 0.8, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "برچسب‌ها", + "filter_key": "", + "is_autoid": false + }, + "author": { + "type": "object", + "return": true, + "sortable": false, + "array": false, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "نویسنده", + "filter_key": "", + "is_autoid": false, + "un_repeat_keys": [ + "id" + ] + }, + "status": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "وضعیت", + "filter_key": "", + "is_autoid": false + }, + "created_date": { + "type": "date", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [ + { + "process": "jalaliTotsmp", + "property": "sort_date_timestamp", + "desc": "jalaliTotsmp(\"created_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "created_year", + "desc": "1403/05/06 --> 1403" + } + ], + "label": "تاریخ ایجاد", + "filter_key": "", + "is_autoid": false + }, + "modified_date": { + "type": "date", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "تاریخ ویرایش", + "filter_key": "", + "is_autoid": false + }, + "sort_date_timestamp": { + "type": "long", + "return": false, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "تاریخ مرتب سازی", + "filter_key": "", + "is_autoid": false + }, + "created_year": { + "type": "keyword", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "سال ایجاد", + "filter_key": "", + "is_autoid": false + }, + "priority": { + "type": "integer", + "return": true, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "اولویت", + "filter_key": "", + "is_autoid": false + }, + "is_deleted": { + "type": "boolean", + "return": false, + "sortable": true, + "array": false, + "object": false, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "وضعیت حذف", + "filter_key": "", + "is_autoid": false + }, + "metadata": { + "type": "object", + "return": true, + "sortable": false, + "array": false, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "متادیتا", + "filter_key": "", + "is_autoid": false + }, + "related_items": { + "type": "object", + "return": true, + "sortable": false, + "array": true, + "object": true, + "highlight": false, + "query_boost": 0.0, + "query_normal_boost": 0.0, + "needs_keyword": false, + "required": false, + "join_to": [], + "label": "موارد مرتبط", + "filter_key": "", + "is_autoid": false, + "un_repeat_keys": [ + "id", + "type" + ] + } + }, + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "created_date": "desc" + } + ], + "collapse_field": null, + "normal": [ + { + "match_phrase": { + "title.ph": { + "boost": 12, + "query": "@query_value" + } + } + }, + { + "match_phrase": { + "tags": { + "boost": 10, + "query": "@query_value" + } + } + }, + { + "match_phrase": { + "categories": { + "boost": 10, + "query": "@query_value" + } + } + }, + { + "match_phrase": { + "content.ph": { + "boost": 9, + "query": "@query_value" + } + } + }, + { + "match": { + "title.fa": { + "boost": 4, + "query": "@query_value" + } + } + }, + { + "match": { + "title.fa_syn": { + "boost": 2, + "query": "@query_value" + } + } + }, + { + "match": { + "tags": { + "boost": 3, + "query": "@query_value" + } + } + }, + { + "match": { + "categories": { + "boost": 3, + "query": "@query_value" + } + } + }, + { + "match": { + "te_ref": { + "boost": 3, + "query": "@query_value" + } + } + }, + { + "match": { + "content.fa": { + "boost": 3, + "query": "@query_value" + } + } + }, + { + "match": { + "content.fa_syn": { + "boost": 1, + "query": "@query_value" + } + } + } + ], + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title": {}, + "content": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/qachat_backend.json b/app/schemas/qachat_backend.json new file mode 100644 index 0000000..5cea1b3 --- /dev/null +++ b/app/schemas/qachat_backend.json @@ -0,0 +1,121 @@ +{ + "index": { + "name": "mj_qa_chat", + "aliases": [], + "index_key": "qachat" + }, + "properties": { + "id": { + "type": "keyword" + }, + "chat_id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "user_id": { + "type": "integer" + }, + "user_query": { + "type": "text" + }, + "model_key": { + "type": "keyword" + }, + "retrived_passage": { + "type": "text" + }, + "retrived_ref_ids": { + "type": "keyword" + }, + "retrived_duration": { + "type": "integer" + }, + "prompt_type": { + "type": "keyword" + }, + "llm_duration": { + "type": "integer" + }, + "full_duration": { + "type": "integer" + }, + "time_create": { + "type": "date" + }, + "used_ref_ids": { + "type": "keyword" + }, + "status_text": { + "type": "keyword" + }, + "status": { + "type": "integer" + }, + "prompt_answer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + }, + "validation": { + "required": [ + "user_query", + "model_key" + ], + "default_value": { + "prompt_type" : "question" + } + }, + "include_fields": [], + "exclude_fields": [], + "is_array": [ + "retrived_ref_ids", + "used_ref_ids" + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "time_create": "desc" + }, + { + "user_id": "asc" + } + ], + "normal": { + "phrase": { + "title": 5, + "user_query": 5, + "prompt_answer": 4 + }, + "match": { + "title": 3, + "user_query": 2, + "prompt_answer": 1 + } + }, + "filter_keys": { + "f_ud": "user_id", + "f_cd": "chat_id", + "f_tt": "title", + "f_pt": "prompt_type", + "f_urd": "used_ref_ids", + "f_md": "model_key", + "f_rrd": "retrived_ref_ids" + } + } +} \ No newline at end of file diff --git a/app/schemas/qaplan_backend.json b/app/schemas/qaplan_backend.json new file mode 100644 index 0000000..641ac18 --- /dev/null +++ b/app/schemas/qaplan_backend.json @@ -0,0 +1,135 @@ +{ + "index": { + "name": "mj_qa_plan", + "aliases": [], + "index_key": "qaplan" + }, + "properties": { + "plan_id": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "urgency_state": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "introduction": { + "type": "text" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "result_sections": { + "properties": { + "number": { + "type": "integer" + }, + "title": { + "type": "float" + }, + "content": { + "type": "text" + }, + "notes": { + "type": "text" + }, + "reasons": { + "type": "text" + }, + "relation_sections": { + "type": "nested" + } + } + }, + "ai_result": { + "properties": { + "start_offset": { + "type": "integer" + }, + "end_offset": { + "type": "integer" + }, + "word": { + "type": "text" + }, + "start": { + "type": "float" + }, + "end": { + "type": "float" + } + } + }, + "time_create": { + "type": "date" + }, + "user_id": { + "type": "integer" + }, + "status_code": { + "type": "integer" + }, + "status_message": { + "type": "text" + } + }, + "validation": { + "required": [] + }, + "include_fields": [], + "is_array": [], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ref_id": "asc" + }, + { + "sound_link": "asc" + } + ], + "normal": { + "phrase": { + "content.ph": 4 + }, + "match": { + "content": 2 + } + }, + "filter_keys": {}, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "content.fa": {}, + "content.ph": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/qaqanon.text b/app/schemas/qaqanon.text new file mode 100644 index 0000000..4aba0f2 --- /dev/null +++ b/app/schemas/qaqanon.text @@ -0,0 +1,17 @@ +امضاء به دستگاه اجرایی +ابلاغ به رئیس جمهور +اصلاحیه ابلاغیه +ابلاغیه استنکافی +ارسال رئیس جمهور به روزنامه رسمی +انتشار روزنامه رسمی +لازم الاجراء شدن -اصلاح متن مغایر + + + + + +text -> date : + + expire_date + exec_date + ts_date \ No newline at end of file diff --git a/app/schemas/qaqanon_backend.json b/app/schemas/qaqanon_backend.json new file mode 100644 index 0000000..edd2aa9 --- /dev/null +++ b/app/schemas/qaqanon_backend.json @@ -0,0 +1,879 @@ +{ + "index": { + "name": "mj_qa_qanon", + "aliases": [], + "index_key": "qaqanon", + "file_map": "qaqanon_map.json" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + }, + "keyword": { + "type": "keyword" + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "embeddings": { + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + }, + "validation": { + "required": [ + "id", + "title" + ] + }, + "include_fields": [ + "id", + "title", + "title_popular", + "sort_date_timestamp", + "time_edit", + "ts_date", + "ts_ref", + "content", + "doc_states" + ], + "exclude_fields": [ + "embeddings" + ], + "field_joins": { + "ts_date": [ + { + "process": "dateTotsmp", + "property": "sort_date_timestamp", + "desc": "dateTotsmp(\"ts_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "ts_year", + "desc": "1403/05/06 --> 1403" + } + ] + }, + "is_array": [ + "initial.creators", + "initial.signers", + "initial.ministers", + "initial.files", + "refer.commission_mirrors", + "refer.commission_common", + "refer.commission_common.member_names", + "commission_report.agendas", + "commission_report.meets", + "commission_report.report_nexts", + "parl_handle.agendas", + "parl_handle.meets", + "parl_handle.agree_names", + "parl_handle.against_names", + "parl_handle.demands", + "parl_handle.demand85_names", + "parl_handle.handle_infos", + "before_info.ministers", + "before_info.creators", + "before_info.commission_mirrors", + "letters", + "letters.files", + "counts", + "exceuter_organs", + "receiver_organs", + "qanon_relations", + "opinion_relations", + "relation_organs", + "files", + "doc_states", + "doc_tags" + ], + "is_autoid": [], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ts_date": "desc" + } + ], + "aggregation_fields": { + "ai-codes.label": 100, + "code-ai.label": 100, + "ts_ref": 100, + "state_etebar": 200, + "qanon_etebar": 200, + "sub_type": 100, + "other_type": 100, + "ts_year": 100, + "qanon_title": 100, + "topics.title": 100, + "title_type": 20, + "tvalid_state.tstate": 20, + "tcode_mains.label": 20, + "tcode_subs.label": 20, + "user_edit": { + "size": 100, + "is_value_user_id": true + }, + "user_actions.action_key": { + "size": 100, + "name": "user_actions", + "rename_values": { + "expert_tvalid_state": "کارشناس-اعتبارسنجی", + "supervisor_tvalid_state": "ناظر-اعتبارسنجی", + "manager_tvalid_state": "مدیر-اعتبارسنجی", + "expert_tcode_mains": "کارشناس-کداصلی", + "expert_tcode_subs": "کارشناس-کد فرعی", + "supervisor_tcode_mains": "ناظر-کداصلی", + "supervisor_tcode_subs": "ناظر-کد فرعی", + "manager_tcode_mains": "مدیر-کداصلی", + "manager_tcode_subs": "مدیر-کد فرعی" + } + } + }, + "collapse_fields": { + "qanon_id": { + "sort": { + "child_order": "asc" + }, + "size": 5 + } + }, + "normal": { + "phrase": { + "title.ph": 12, + "title_popular": 10, + "content.ph": 9 + }, + "match": { + "title.ph": 4, + "title_popular": 2, + "content.ph": 2 + } + }, + "sort_keys": { + "lasttitle": { + "sort_date_timestamp": "desc" + }, + "firsttitle": { + "sort_date_timestamp": "asc" + }, + "title": { + "qanon_title": "asc" + } + }, + "filter_keys": { + "f_rk": "ref_key", + "f_td": "ts_date", + "f_ey": "eb_year", + "f_ws": "work_state.title", + "f_wt": "work_tags.title", + "f_tr": "ts_ref.keyword", + "f_qty": "qanon_type", + "f_se": "state_etebar", + "f_qe": "qanon_etebar", + "f_st": "state_tanghih", + "f_ty": "ts_year", + "f_qt": "qanon_title.keyword", + "f_tt": "title_type", + "f_ttk": "title_type.keyword", + "f_tp": "topics.title", + "f_eo": "exceuter_organs.title", + "f_ro": "receiver_organs.title", + "f_tg": "tags.keyword", + "f_ct": "categories.keyword", + "f_mn": "majles_name", + "f_tn": "term_number", + "f_ed": "error_date.keyword", + "f_mb": "meet_number", + "f_tm": "talker.name", + "f_to": "talker.organ", + "f_md": "meet_date.keyword", + "f_dn": "dastur_number", + "f_mc": "member_count", + "f_nt": "content_type", + "f_mt": "main_type", + "f_sty": "sub_type", + "f_ot": "other_type.keyword", + "f_cal": "code-ai.label", + "f_vs": "tvalid_state.tstate", + "f_cm": "tcode_mains.label", + "f_cs": "tcode_subs.label", + "f_ua": "user_actions.action_key", + "f_ue": "user_edit", + "fr_mid": "tcode_mains.id", + "fr_sid": "tcode_subs.id", + "qanon_id": "qanon_id", + "mqanon_tp": { + "type": "api", + "key": "topics.id", + "service": "tlist", + "url": "/subject/get/childs", + "result": "meta.child_ids", + "body": { + "parents": "@value", + "item_state": 1 + } + } + }, + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "qanon_title.fa": {}, + "qanon_title.ph": {}, + "title_popular": {}, + "content.fa": {}, + "content.ph": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/qasection_backend.json b/app/schemas/qasection_backend.json new file mode 100644 index 0000000..2aab035 --- /dev/null +++ b/app/schemas/qasection_backend.json @@ -0,0 +1,963 @@ +{ + "index": { + "name": "mj_qa_section", + "aliases": [], + "index_key": "qasection", + "file_map": "qasection_map.json" + }, + "properties": { + "id": { + "type": "keyword" + }, + "html": { + "type": "text" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "qanon_title": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "keyword" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "text" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "state_etebar": { + "type": "keyword" + }, + "state_tanghih": { + "type": "keyword" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "other_type": { + "type": "keyword" + }, + "qanon_id": { + "type": "keyword" + }, + "phase_id": { + "type": "keyword" + }, + "parent_id": { + "type": "keyword" + }, + "child_order": { + "type": "double" + }, + "other_info": { + "properties": { + "level": { + "type": "integer" + }, + "full_path": { + "type": "text" + }, + "number_text": { + "type": "text" + }, + "hierarchy": { + "type": "text" + } + } + }, + "qanon_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "content_len": { + "type": "integer" + }, + "word_len": { + "type": "integer" + }, + "content_emb": { + "type": "text" + }, + "content_normal": { + "type": "text" + }, + "nlp_tags": { + "type": "keyword" + }, + "nlp_childs": { + "properties": { + "id": { + "type": "keyword" + }, + "word_len": { + "type": "integer" + } + } + }, + "path_headings": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "level": { + "type": "integer" + }, + "simplifies": { + "type": "text" + }, + "ai_keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "topics": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "categories": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "tags": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "keywords": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "nlp_parses": { + "properties": { + "main_type": { + "type": "keyword" + }, + "nlp_type": { + "type": "keyword" + }, + "begin": { + "type": "integer" + }, + "end": { + "type": "integer" + }, + "text": { + "type": "text" + }, + "referes": { + "type": "keyword" + }, + "dependency_infos": { + "type": "nested" + } + } + }, + "ref_state": { + "properties": { + "id": { + "type": "keyword" + }, + "qanon_id": { + "type": "keyword" + }, + "qanon_title": { + "type": "keyword" + }, + "qanon_date": { + "type": "text" + }, + "section_id": { + "type": "keyword" + }, + "section_title": { + "type": "keyword" + }, + "ref_key": { + "type": "keyword" + }, + "effect_type": { + "type": "keyword" + }, + "new_text": { + "type": "text" + }, + "description": { + "type": "text" + }, + "prev_text": { + "type": "text" + } + } + }, + "code-ai": { + "properties": { + "label": { + "type": "keyword" + }, + "score": { + "type": "float" + } + } + }, + "history_codes": { + "type": "nested", + "properties": { + "version_key": { + "type": "keyword" + }, + "sub_key": { + "type": "keyword" + }, + "label": { + "type": "keyword" + }, + "score": { + "type": "float" + }, + "description": { + "type": "text" + } + } + }, + "ai_code": { + "properties": { + "label": { + "type": "keyword" + }, + "score": { + "type": "float" + } + } + }, + "ai_codes": { + "properties": { + "label": { + "type": "keyword" + }, + "score": { + "type": "float" + } + } + }, + "embeddings": { + "type": "dense_vector", + "dims": 768, + "index": true, + "similarity": "cosine" + }, + "tvalid_state": { + "properties": { + "tstate": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "treason_type": { + "type": "keyword" + }, + "treason": { + "type": "text", + "fielddata": true + }, + "dependency_infos": { + "type": "nested" + } + } + }, + "tcode_mains": { + "properties": { + "label": { + "type": "keyword" + }, + "id": { + "type": "keyword" + } + } + }, + "tcode_subs": { + "properties": { + "label": { + "type": "keyword" + }, + "id": { + "type": "keyword" + } + } + }, + "tcode": { + "properties": { + "mains": { + "type": "keyword" + }, + "subs": { + "type": "keyword" + }, + "mains_old": { + "type": "keyword" + }, + "subs_old": { + "type": "keyword" + } + } + }, + "model_work_tags": { + "properties": { + "title": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "degree": { + "type": "integer" + } + } + }, + "model_work_state": { + "properties": { + "title": { + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "model_rules": { + "properties": { + "id": { + "type": "keyword" + }, + "text": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "text_ners": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "template_desc": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "degree": { + "type": "integer" + }, + "work_tags": { + "type": "keyword" + }, + "main_type": { + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "model_rules_logics": { + "properties": { + "id": { + "type": "keyword" + }, + "rules-id": { + "type": "keyword" + }, + "text": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "main_type": { + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "model_rules_logics_triples": { + "properties": { + "id": { + "type": "keyword" + }, + "rules-id": { + "type": "keyword" + }, + "rules_logics-id": { + "type": "keyword" + }, + "text": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "description": { + "type": "text" + } + } + }, + "model_rules_spins": { + "properties": { + "id": { + "type": "keyword" + }, + "rules-id": { + "type": "keyword" + }, + "text": { + "type": "text", + "fielddata": true + }, + "description": { + "type": "text" + } + } + }, + "model_files": { + "properties": { + "id": { + "type": "keyword" + }, + "file": { + "type": "keyword" + }, + "file_name": { + "type": "keyword" + }, + "title": { + "type": "text" + } + } + }, + "graph_context": { + "properties": { + "repalces": { + "type": "text" + }, + "delimiter_points": { + "type": "integer" + }, + "section_ids": { + "type": "keyword" + }, + "context": { + "type": "text" + }, + "tags": { + "type": "keyword" + } + } + }, + "graph_models": { + "properties": { + "id": { + "type": "keyword" + }, + "OWL": { + "type": "text" + }, + "rule_type": { + "type": "keyword" + }, + "rule": { + "type": "text" + }, + "labels": { + "type": "nested" + }, + "description": { + "type": "text" + } + } + }, + "ai_conflict_rules": { + "properties": { + "id": { + "type": "keyword" + }, + "rule_type": { + "type": "keyword" + }, + "rule": { + "type": "text" + }, + "relation_rules": { + "type": "keyword" + }, + "confilit_rules": { + "type": "keyword" + }, + "confilit_sections": { + "type": "keyword" + }, + "conflict_state": { + "type": "keyword" + }, + "description": { + "type": "text" + } + } + }, + "file_links": { + "properties": { + "title": { + "type": "text" + }, + "link": { + "type": "text" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "edit_type": { + "type": "keyword" + }, + "mirror_type": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + }, + "doc_tags": { + "type": "keyword" + }, + "user_edit": { + "type": "integer" + }, + "user_actions": { + "properties": { + "user_id": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "description": { + "type": "text" + }, + "step_order": { + "type": "integer" + }, + "wizard_key": { + "type": "keyword" + }, + "property_key": { + "type": "keyword" + }, + "action_key": { + "type": "keyword" + }, + "action_value": { + "type": "text" + }, + "action": { + "type": "object" + } + } + }, + "user_logs": { + "properties": { + "id": { + "type": "keyword" + }, + "user_id": { + "type": "integer" + }, + "username": { + "type": "keyword" + }, + "time_edit": { + "type": "date" + }, + "property": { + "type": "keyword" + } + } + } + }, + "validation": { + "required": [ + "id", + "qanon_title", + "qanon_id", + "ts_ref", + "ts_date" + ] + }, + "include_fields": [ + "id", + "content", + "ts_date", + "qanon_id", + "qanon_title", + "ts_ref", + "section_id", + "child_order", + "other_info", + "qanon_type", + "other_type", + "sort_date_timestamp", + "state_etebar", + "title", + "categories", + "topics", + "tags", + "ai_code", + "ai_codes", + "title_type" + ], + "exclude_fields": [ + "embeddings" + ], + "field_joins": { + "ts_date": [ + { + "process": "dateTotsmp", + "property": "sort_date_timestamp", + "desc": "dateTotsmp(\"ts_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "ts_year", + "desc": "1403/05/06 --> 1403" + } + ], + "content": [ + { + "process": "len", + "property": "content_len", + "desc": "len(\"content\")" + }, + { + "process": "countWords", + "property": "word_len", + "desc": "lenWords()" + } + ] + }, + "is_array": [ + "nlp_tags", + "path_headings", + "simplifies", + "ai_keywords", + "topics", + "categories", + "tags", + "keywords", + "nlp_parses", + "nlp_parses.referes", + "ref_state", + "history_codes", + "ai_codes", + + "tcode_mains", + "tcode_subs", + + "model_work_tags", + "model_rules", + "model_rules_logics", + "model_rules_logics_triples", + "model_rules_spins", + "model_files", + + "graph_models", + "ai_conflict_rules", + "file_links", + "doc_states", + "doc_tags", + "user_actions", + "user_logs" + ], + "is_autoid": [], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ts_date": "sort_date_timestamp" + } + ], + "aggregation_fields": { + "ai-codes.label": 100, + "code-ai.label": 100, + "ts_ref": 100, + "state_etebar": 200, + "qanon_etebar": 200, + "sub_type": 100, + "other_type": 100, + "ts_year": 100, + "qanon_title": 100, + "topics.title": 100, + "title_type": 20, + "tvalid_state.tstate": 20, + "tcode_mains.label": 20, + "tcode_subs.label": 20, + "user_edit": { + "size": 100, + "is_value_user_id": true + }, + "user_actions.action_key": { + "size": 100, + "name": "user_actions", + "rename_values": { + "expert_tvalid_state": "کارشناس-اعتبارسنجی", + "supervisor_tvalid_state": "ناظر-اعتبارسنجی", + "manager_tvalid_state": "مدیر-اعتبارسنجی", + "expert_tcode_mains": "کارشناس-کداصلی", + "expert_tcode_subs": "کارشناس-کد فرعی", + "supervisor_tcode_mains": "ناظر-کداصلی", + "supervisor_tcode_subs": "ناظر-کد فرعی", + "manager_tcode_mains": "مدیر-کداصلی", + "manager_tcode_subs": "مدیر-کد فرعی" + } + } + }, + "collapse_fields": { + "qanon_id": { + "sort": { + "child_order": "asc" + }, + "size": 5 + } + }, + "normal": { + "phrase": { + "title.ph": 12, + "title_popular": 10, + "content.ph": 9 + }, + "match": { + "title.ph": 4, + "title_popular": 2, + "content.ph": 2 + } + }, + "sort_keys": { + "lasttitle": { + "sort_date_timestamp": "desc" + }, + "firsttitle": { + "sort_date_timestamp": "asc" + }, + "title": { + "qanon_title": "asc" + } + }, + "filter_keys": { + "f_rk": "ref_key", + "f_td": "ts_date", + "f_ey": "eb_year", + "f_ws": "work_state.title", + "f_wt": "work_tags.title", + "f_tr": "ts_ref.keyword", + "f_qty": "qanon_type", + "f_se": "state_etebar", + "f_qe": "qanon_etebar", + "f_st": "state_tanghih", + "f_ty": "ts_year", + "f_qt": "qanon_title.keyword", + "f_tt": "title_type", + "f_ttk": "title_type.keyword", + "f_tp": "topics.title", + "f_eo": "exceuter_organs.title", + "f_ro": "receiver_organs.title", + "f_tg": "tags.keyword", + "f_ct": "categories.keyword", + "f_mn": "majles_name", + "f_tn": "term_number", + "f_ed": "error_date.keyword", + "f_mb": "meet_number", + "f_tm": "talker.name", + "f_to": "talker.organ", + "f_md": "meet_date.keyword", + "f_dn": "dastur_number", + "f_mc": "member_count", + "f_nt": "content_type", + "f_mt": "main_type", + "f_sty": "sub_type", + "f_ot": "other_type.keyword", + "f_cal": "code-ai.label", + "f_vs": "tvalid_state.tstate", + "f_cm": "tcode_mains.label", + "f_cs": "tcode_subs.label", + "f_ua": "user_actions.action_key", + "f_ue": "user_edit", + "fr_mid": "tcode_mains.id", + "fr_sid": "tcode_subs.id", + "qanon_id": "qanon_id", + "mqanon_tp": { + "type": "api", + "key": "topics.id", + "service": "tlist", + "url": "/subject/get/childs", + "result": "meta.child_ids", + "body": { + "parents": "@value", + "item_state": 1 + } + } + }, + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "qanon_title.fa": {}, + "qanon_title.ph": {}, + "title_popular": {}, + "content.fa": {}, + "content.ph": {} + } + } + } +} \ No newline at end of file diff --git a/app/schemas/settings.json b/app/schemas/settings.json new file mode 100644 index 0000000..3f9d275 --- /dev/null +++ b/app/schemas/settings.json @@ -0,0 +1,140 @@ +{ + "index.max_result_window": 15000, + "index": { + "analysis": { + "analyzer": { + "phrase_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [] + }, + "normal_analyzer_fa": { + "type": "custom", + "tokenizer": "standard", + "char_filter": [ + "fa_char_filter" + ], + "filter": [ + "fa_stop" + ] + } + }, + "filter": { + "fa_stop": { + "type": "stop", + "stopwords": [ + "یا", + "را", + "این", + "با", + "آن", + "و", + "در", + "به", + "که", + "از", + "طی", + "پس", + "چه", + "اگر", + "نه", + "آنها", + "هر", + "او", + "ما", + "من", + "تا", + "نیز", + "اما", + "یک", + "بر", + "هم", + "برای", + "کن", + "کرد", + "کردن", + "باش", + "بود", + "بودن", + "شو", + "شد", + "شدن", + "‏دار", + "داشت", + "داشتن", + "‏خواه", + "خواست", + "خواستن", + "‏گوی", + "گفت", + "گفتن", + "‏گیر", + "گرفت", + "گرفتن", + "‏آی", + "آمد", + "آمدن", + "‏توان", + "توانستن", + "‏یاب", + "یافتن", + "‏آور", + "آورد", + "آوردن", + "1", + "2", + "3", + "ص", + "4", + "و", + "5", + "ج", + "6", + "a", + "top", + "href", + "pageno" + ], + "char_filter": [] + } + }, + "char_filter": { + "fa_char_filter": { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9", + "ک => ك", + "ی => ي", + "ة => ه", + "إ => ا", + "أ => ا", + "آ => ا", + "ء => ا", + "َ => ", + "ُ => ", + "ِ => ", + "ّ => ", + "ً => ", + "ٌ => ", + "ٍ => ", + "ْ => " + ] + } + } + }, + "number_of_shards": "1", + "number_of_replicas": "0" + } +} \ No newline at end of file diff --git a/app/schemas/test_backend.json b/app/schemas/test_backend.json new file mode 100644 index 0000000..a51d25a --- /dev/null +++ b/app/schemas/test_backend.json @@ -0,0 +1,888 @@ +{ + "index": { + "name": "tt_test", + "aliases": [], + "index_key": "test" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + }, + "keyword": { + "type": "keyword" + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "embeddings": { + "type": "dense_vector" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "id": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + }, + "validation": { + "required": [ + "id", + "title" + ], + "default_value": { + "prompt_type": "question" + } + }, + "include_fields": [ + "id", + "title", + "title_popular", + "sort_date_timestamp", + "time_edit", + "ts_date", + "ts_ref", + "content", + "doc_states" + ], + "exclude_fields": [ + "embeddings" + ], + "field_joins": { + "ts_date": [ + { + "process": "dateTotsmp", + "property": "sort_date_timestamp", + "desc": "dateTotsmp(\"ts_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "ts_year", + "desc": "1403/05/06 --> 1403" + } + ] + }, + "is_array": [ + "initial.creators", + "initial.signers", + "initial.ministers", + "initial.files", + "refer.commission_mirrors", + "refer.commission_common", + "refer.commission_common.member_names", + "commission_report.agendas", + "commission_report.meets", + "commission_report.report_nexts", + "parl_handle.agendas", + "parl_handle.meets", + "parl_handle.agree_names", + "parl_handle.against_names", + "parl_handle.demands", + "parl_handle.demand85_names", + "parl_handle.handle_infos", + "before_info.ministers", + "before_info.creators", + "before_info.commission_mirrors", + "letters", + "letters.files", + "counts", + "exceuter_organs", + "receiver_organs", + "qanon_relations", + "opinion_relations", + "relation_organs", + "files", + "doc_states", + "doc_tags" + ], + "is_autoid": [ + "letters" + + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ts_date": "desc" + } + ], + "aggregation_fields": { + "ai-codes.label": 100, + "code-ai.label": 100, + "ts_ref": 100, + "state_etebar": 200, + "qanon_etebar": 200, + "sub_type": 100, + "other_type": 100, + "ts_year": 100, + "qanon_title": 100, + "topics.title": 100, + "title_type": 20, + "tvalid_state.tstate": 20, + "tcode_mains.label": 20, + "tcode_subs.label": 20, + "user_edit": { + "size": 100, + "is_value_user_id": true + }, + "user_actions.action_key": { + "size": 100, + "name": "user_actions", + "rename_values": { + "expert_tvalid_state": "کارشناس-اعتبارسنجی", + "supervisor_tvalid_state": "ناظر-اعتبارسنجی", + "manager_tvalid_state": "مدیر-اعتبارسنجی", + "expert_tcode_mains": "کارشناس-کداصلی", + "expert_tcode_subs": "کارشناس-کد فرعی", + "supervisor_tcode_mains": "ناظر-کداصلی", + "supervisor_tcode_subs": "ناظر-کد فرعی", + "manager_tcode_mains": "مدیر-کداصلی", + "manager_tcode_subs": "مدیر-کد فرعی" + } + } + }, + "collapse_fields": { + "qanon_id": { + "sort": { + "child_order": "asc" + }, + "size": 5 + } + }, + "normal": { + "phrase": { + "title.ph": 12, + "title_popular": 10, + "content.ph": 9 + }, + "match": { + "title.ph": 4, + "title_popular": 2, + "content.ph": 2 + } + }, + "sort_keys": { + "lasttitle": { + "sort_date_timestamp": "desc" + }, + "firsttitle": { + "sort_date_timestamp": "asc" + }, + "title": { + "qanon_title": "asc" + } + }, + "filter_keys": { + "f_rk": "ref_key", + "f_td": "ts_date", + "f_ey": "eb_year", + "f_ws": "work_state.title", + "f_wt": "work_tags.title", + "f_tr": "ts_ref.keyword", + "f_qty": "qanon_type", + "f_se": "state_etebar", + "f_qe": "qanon_etebar", + "f_st": "state_tanghih", + "f_ty": "ts_year", + "f_qt": "qanon_title.keyword", + "f_tt": "title_type", + "f_ttk": "title_type.keyword", + "f_tp": "topics.title", + "f_eo": "exceuter_organs.title", + "f_ro": "receiver_organs.title", + "f_tg": "tags.keyword", + "f_ct": "categories.keyword", + "f_mn": "majles_name", + "f_tn": "term_number", + "f_ed": "error_date.keyword", + "f_mb": "meet_number", + "f_tm": "talker.name", + "f_to": "talker.organ", + "f_md": "meet_date.keyword", + "f_dn": "dastur_number", + "f_mc": "member_count", + "f_nt": "content_type", + "f_mt": "main_type", + "f_sty": "sub_type", + "f_ot": "other_type.keyword", + "f_cal": "code-ai.label", + "f_vs": "tvalid_state.tstate", + "f_cm": "tcode_mains.label", + "f_cs": "tcode_subs.label", + "f_ua": "user_actions.action_key", + "f_ue": "user_edit", + "fr_mid": "tcode_mains.id", + "fr_sid": "tcode_subs.id", + "qanon_id": "qanon_id", + "mqanon_tp": { + "type": "api", + "key": "topics.id", + "service": "tlist", + "url": "/subject/get/childs", + "result": "meta.child_ids", + "body": { + "parents": "@value", + "item_state": 1 + } + } + }, + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "qanon_title.fa": {}, + "qanon_title.ph": {}, + "title_popular": {}, + "content.fa": {}, + "content.ph": {} + } + }, + "search_all_fields":{ + "title": "title", + "content": "prompt_answer" + } + } +} \ No newline at end of file diff --git a/app/schemas/use_for_test_backend.json b/app/schemas/use_for_test_backend.json new file mode 100644 index 0000000..6e6da57 --- /dev/null +++ b/app/schemas/use_for_test_backend.json @@ -0,0 +1,890 @@ +{ + "index": { + "name": "use_for_test", + "aliases": [], + "index_key": "use_for_test" + }, + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + }, + "keyword": { + "type": "keyword" + } + } + }, + "title_popular": { + "type": "text", + "analyzer": "normal_analyzer_fa", + "search_analyzer": "normal_analyzer_fa", + "search_quote_analyzer": "phrase_analyzer_fa" + }, + "content": { + "type": "text", + "fields": { + "fa": { + "type": "text" + }, + "ph": { + "type": "text" + } + } + }, + "embeddings": { + "type": "dense_vector" + }, + "initial": { + "properties": { + "title": { + "type": "text" + }, + "content": { + "type": "text" + }, + "approuve_type": { + "type": "keyword" + }, + "creators": { + "type": "keyword" + }, + "signers": { + "type": "keyword" + }, + "approuve_number": { + "type": "text" + }, + "approuve_date": { + "type": "date" + }, + "letter_number": { + "type": "text" + }, + "letter_date": { + "type": "date" + }, + "letter_signer": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "receive": { + "properties": { + "term_number": { + "type": "text" + }, + "ref_law": { + "type": "keyword" + }, + "meet_number": { + "type": "text" + }, + "meet_date": { + "type": "date" + }, + "register_number": { + "type": "text" + }, + "publish_number": { + "type": "text" + }, + "handle_type": { + "type": "keyword" + }, + "handle_method": { + "type": "keyword" + } + } + }, + "refer": { + "properties": { + "refer_date": { + "type": "date" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "commission_common": { + "properties": { + "name": { + "type": "keyword" + }, + "member_count": { + "type": "integer" + }, + "member_names": { + "type": "keyword" + } + } + }, + "handle_85": { + "type": "keyword" + } + } + }, + "commission_report": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_first": { + "properties": { + "approve_type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "report_nexts": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "parl_handle": { + "properties": { + "agendas": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "meets": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "date" + } + } + }, + "agree_names": { + "type": "keyword" + }, + "against_names": { + "type": "keyword" + }, + "demands": { + "type": "keyword" + }, + "demand85_names": { + "type": "keyword" + }, + "handle_infos": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "duration_85": { + "type": "date" + }, + "date_85": { + "type": "date" + } + } + } + } + }, + "before_info": { + "properties": { + "ref_id": { + "type": "keyword" + }, + "plan_type": { + "type": "keyword" + }, + "discuss_type": { + "type": "keyword" + }, + "ministers": { + "type": "keyword" + }, + "creators": { + "type": "text" + }, + "register_number": { + "type": "text" + }, + "prev_number": { + "type": "text" + }, + "receipt_date": { + "type": "date" + }, + "discuss_date1": { + "type": "date" + }, + "discuss_date2": { + "type": "date" + }, + "description": { + "type": "text" + }, + "commission_type": { + "type": "keyword" + }, + "commission_main": { + "type": "keyword" + }, + "commission_mirrors": { + "type": "keyword" + }, + "content": { + "type": "text" + } + } + }, + "letters": { + "properties": { + "id": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "order": { + "type": "float" + }, + "number": { + "type": "text" + }, + "date": { + "type": "date" + }, + "description": { + "type": "text" + }, + "content": { + "type": "text" + }, + "delay_time": { + "type": "text" + }, + "expire_date": { + "type": "date" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + } + } + } + } + }, + "counts": { + "properties": { + "key": { + "type": "keyword" + }, + "value": { + "type": "text" + } + } + }, + "davam_type": { + "type": "keyword" + }, + "number_all": { + "type": "text" + }, + "number_row": { + "type": "text" + }, + "archive_info": { + "properties": { + "type": { + "type": "keyword" + }, + "number": { + "type": "text" + }, + "description": { + "type": "text" + } + } + }, + "ts_date_dotic": { + "type": "date" + }, + "ts_date1": { + "type": "date" + }, + "ts_ref1": { + "type": "keyword" + }, + "ts_date2": { + "type": "date" + }, + "ts_ref2": { + "type": "keyword" + }, + "conformity_rrk": { + "type": "keyword" + }, + "conformity_qanonyar": { + "type": "keyword" + }, + "content_ocr": { + "type": "text" + }, + "qanon_etebar": { + "type": "keyword" + }, + "expire_date": { + "type": "date" + }, + "exec_date": { + "type": "date" + }, + "ref_key": { + "type": "keyword" + }, + "ts_date": { + "type": "date" + }, + "ts_year": { + "type": "integer" + }, + "ts_ref": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "term": { + "type": "keyword" + }, + "term_number": { + "type": "integer" + }, + "ranking_weight": { + "type": "integer" + }, + "main_type": { + "type": "keyword" + }, + "sub_type": { + "type": "keyword" + }, + "title_type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "action_type": { + "type": "keyword" + }, + "section_len": { + "type": "integer" + }, + "eblagh": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + }, + "from": { + "type": "keyword" + } + } + }, + "rrk": { + "properties": { + "number": { + "type": "text" + }, + "date": { + "type": "text" + } + } + }, + "exceuter_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "receiver_organs": { + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "keyword" + } + } + }, + "sort_date_timestamp": { + "type": "long" + }, + "qanon_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "opinion_relations": { + "properties": { + "from_type": { + "type": "keyword" + }, + "from_section_id": { + "type": "keyword" + }, + "from_section_title": { + "type": "keyword" + }, + "rel_type": { + "type": "keyword" + }, + "to_type": { + "type": "keyword" + }, + "to_id": { + "type": "keyword" + }, + "to_title": { + "type": "keyword" + }, + "wait": { + "type": "integer" + }, + "rel_key": { + "type": "keyword" + } + } + }, + "fixed_date": { + "type": "date" + }, + "renewal_date": { + "type": "date" + }, + "exec_duration": { + "type": "text" + }, + "effective_date": { + "type": "date" + }, + "relation_organs": { + "type": "keyword" + }, + "files": { + "properties": { + "path": { + "type": "text" + }, + "file_name": { + "type": "text" + }, + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + } + } + }, + "is_delete": { + "type": "integer" + }, + "time_edit": { + "type": "date" + }, + "doc_tags": { + "type": "keyword" + }, + "doc_states": { + "type": "keyword" + } + }, + "validation": { + "required": [ + "id", + "title" + ], + "default_value": { + "prompt_type": "question" + } + }, + "include_fields": [ + "id", + "title", + "title_popular", + "sort_date_timestamp", + "time_edit", + "ts_date", + "ts_ref", + "content", + "doc_states" + ], + "exclude_fields": [ + "embeddings" + ], + "field_joins": { + "ts_date": [ + { + "process": "dateTotsmp", + "property": "sort_date_timestamp", + "desc": "dateTotsmp(\"ts_date\",\"/\")" + }, + { + "process": "getYearOfDate", + "property": "ts_year", + "desc": "1403/05/06 --> 1403" + } + ] + }, + "is_array": [ + "initial.creators", + "initial.signers", + "initial.ministers", + "initial.files", + "refer.commission_mirrors", + "refer.commission_common", + "refer.commission_common.member_names", + "commission_report.agendas", + "commission_report.meets", + "commission_report.report_nexts", + "parl_handle.agendas", + "parl_handle.meets", + "parl_handle.agree_names", + "parl_handle.against_names", + "parl_handle.demands", + "parl_handle.demand85_names", + "parl_handle.handle_infos", + "before_info.ministers", + "before_info.creators", + "before_info.commission_mirrors", + "letters", + "letters.files", + "counts", + "exceuter_organs", + "receiver_organs", + "qanon_relations", + "opinion_relations", + "relation_organs", + "files", + "doc_states", + "doc_tags" + ], + "is_autoid": [ + "letters" + + ], + "query": { + "default_sort": [ + { + "_score": "desc" + }, + { + "ts_date": "desc" + } + ], + "aggregation_fields": { + "ai-codes.label": 100, + "code-ai.label": 100, + "ts_ref": 100, + "state_etebar": 200, + "qanon_etebar": 200, + "sub_type": 100, + "other_type": 100, + "ts_year": 100, + "qanon_title": 100, + "topics.title": 100, + "title_type": 20, + "tvalid_state.tstate": 20, + "tcode_mains.label": 20, + "tcode_subs.label": 20, + "user_edit": { + "size": 100, + "is_value_user_id": true + }, + "user_actions.action_key": { + "size": 100, + "name": "user_actions", + "rename_values": { + "expert_tvalid_state": "کارشناس-اعتبارسنجی", + "supervisor_tvalid_state": "ناظر-اعتبارسنجی", + "manager_tvalid_state": "مدیر-اعتبارسنجی", + "expert_tcode_mains": "کارشناس-کداصلی", + "expert_tcode_subs": "کارشناس-کد فرعی", + "supervisor_tcode_mains": "ناظر-کداصلی", + "supervisor_tcode_subs": "ناظر-کد فرعی", + "manager_tcode_mains": "مدیر-کداصلی", + "manager_tcode_subs": "مدیر-کد فرعی" + } + } + }, + "collapse_fields": { + "qanon_id": { + "sort": { + "child_order": "asc" + }, + "size": 5 + } + }, + "normal": { + "phrase": { + "title.ph": 12, + "title_popular": 10, + "content.ph": 9 + }, + "match": { + "title.ph": 4, + "title_popular": 2, + "content.ph": 2 + } + }, + "sort_keys": { + "lasttitle": { + "sort_date_timestamp": "desc" + }, + "firsttitle": { + "sort_date_timestamp": "asc" + }, + "title": { + "qanon_title": "asc" + } + }, + "filter_keys": { + "f_i":"id", + "f_cat":"category", + "f_rk": "ref_key", + "f_td": "ts_date", + "f_ey": "eb_year", + "f_ws": "work_state.title", + "f_wt": "work_tags.title", + "f_tr": "ts_ref.keyword", + "f_qty": "qanon_type", + "f_se": "state_etebar", + "f_qe": "qanon_etebar", + "f_st": "state_tanghih", + "f_ty": "ts_year", + "f_qt": "qanon_title.keyword", + "f_tt": "title_type", + "f_ttk": "title_type.keyword", + "f_tp": "topics.title", + "f_eo": "exceuter_organs.title", + "f_ro": "receiver_organs.title", + "f_tg": "tags.keyword", + "f_ct": "categories.keyword", + "f_mn": "majles_name", + "f_tn": "term_number", + "f_ed": "error_date.keyword", + "f_mb": "meet_number", + "f_tm": "talker.name", + "f_to": "talker.organ", + "f_md": "meet_date.keyword", + "f_dn": "dastur_number", + "f_mc": "member_count", + "f_nt": "content_type", + "f_mt": "main_type", + "f_sty": "sub_type", + "f_ot": "other_type.keyword", + "f_cal": "code-ai.label", + "f_vs": "tvalid_state.tstate", + "f_cm": "tcode_mains.label", + "f_cs": "tcode_subs.label", + "f_ua": "user_actions.action_key", + "f_ue": "user_edit", + "fr_mid": "tcode_mains.id", + "fr_sid": "tcode_subs.id", + "qanon_id": "qanon_id", + "mqanon_tp": { + "type": "api", + "key": "topics.id", + "service": "tlist", + "url": "/subject/get/childs", + "result": "meta.child_ids", + "body": { + "parents": "@value", + "item_state": 1 + } + } + }, + "advanced_tags": { + "تاریخ": { + "key": "sort_date_timestamp", + "boost_offset": 30, + "collapse_field": "id" + }, + "عنوان": { + "key": "title", + "collapse_field": "id" + }, + "متن": { + "key": "content", + "collapse_field": "" + }, + "دسته‌بندی": { + "key": "category", + "collapse_field": "id" + }, + "برچسب": { + "key": "tags", + "collapse_field": "id" + }, + "نویسنده": { + "key": "author.name", + "collapse_field": "id" + }, + "سال": { + "key": "created_year", + "boost_offset": 30, + "collapse_field": "id" + }, + "اولویت": { + "key": "priority", + "collapse_field": "id" + } + }, + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title.fa": {}, + "title.ph": {}, + "qanon_title.fa": {}, + "qanon_title.ph": {}, + "title_popular": {}, + "content.fa": {}, + "content.ph": {} + } + }, + "search_all_fields":{ + "title": "title", + "content": "prompt_answer" + } + } +} \ No newline at end of file