data_processes/p3_words_embedder.py
2025-08-10 18:14:42 +03:30

30 lines
1005 B
Python

"""
ایجاد بردار جملات - امبدینگ
"""
from sentence_transformers import SentenceTransformer
import json
def load_embedder_model(model_name):
model = SentenceTransformer(model_name)
return model
def do_word_embedder(sections):
model_name = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
model = load_embedder_model(model_name)
for index, item in enumerate(sections):
embeddings = get_sentence_embeddings(sections[item]['content'])
sections[item]['embeddings'] = embeddings
with open('./data/embeddings/sections_embeddings.json', 'w', encoding='utf-8') as outpu_file:
data = json.dumps(sections, ensure_ascii=False)
outpu_file.write(data)
def get_sentence_embeddings(sentence):
model_name = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
model = load_embedder_model(model_name)
embeddings = model.encode(sentence)
return embeddings
if __name__ == '__main__':
pass