30 lines
1005 B
Python
30 lines
1005 B
Python
"""
|
|
ایجاد بردار جملات - امبدینگ
|
|
"""
|
|
from sentence_transformers import SentenceTransformer
|
|
import json
|
|
|
|
def load_embedder_model(model_name):
|
|
model = SentenceTransformer(model_name)
|
|
return model
|
|
|
|
def do_word_embedder(sections):
|
|
model_name = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
|
|
model = load_embedder_model(model_name)
|
|
for index, item in enumerate(sections):
|
|
embeddings = get_sentence_embeddings(sections[item]['content'])
|
|
sections[item]['embeddings'] = embeddings
|
|
|
|
with open('./data/embeddings/sections_embeddings.json', 'w', encoding='utf-8') as outpu_file:
|
|
data = json.dumps(sections, ensure_ascii=False)
|
|
outpu_file.write(data)
|
|
|
|
def get_sentence_embeddings(sentence):
|
|
model_name = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
|
|
model = load_embedder_model(model_name)
|
|
embeddings = model.encode(sentence)
|
|
return embeddings
|
|
|
|
if __name__ == '__main__':
|
|
pass
|
|
|