42 lines
2.1 KiB
Python
42 lines
2.1 KiB
Python
|
# bag of words (bow) algorithm implementation
|
||
|
import numpy as np
|
||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, paired_distances, pairwise_distances
|
||
|
|
||
|
# متون ورودی
|
||
|
text0 = "قانون خانواده و جوانی جمعیت"
|
||
|
text1 = "قانون اصلاح موادی از قانون خانواده و جوانی جمعیت ایران"
|
||
|
text2 = "خانواده و جمعیت جوانی مهم است"
|
||
|
|
||
|
# ایجاد یک نمونه از CountVectorizer برای تبدیل متن به بردارهای ویژگی
|
||
|
vectorizer = CountVectorizer()
|
||
|
|
||
|
# تبدیل متون به بردارهای ویژگی
|
||
|
X = vectorizer.fit_transform([text0,text1, text2])
|
||
|
# Y = vectorizer.fit_transform([text1,text3])
|
||
|
# Z = vectorizer.fit_transform([text2,text2])
|
||
|
|
||
|
# تبدیل ماتریس sparse به آرایه numpy
|
||
|
X_array = X.toarray()
|
||
|
|
||
|
# محاسبه شباهت کسینوسی بین دو بردار
|
||
|
similarity = cosine_similarity(X_array)
|
||
|
manhattan_distance = manhattan_distances(X_array)
|
||
|
pairwise_distance = pairwise_distances(X_array)
|
||
|
#paired_distance = paired_distances(X_array)
|
||
|
# نمایش شباهت کسینوسی
|
||
|
print(f"Cosine Similarity between text0 and text1: {similarity[0, 1]}")
|
||
|
print(f"Cosine Similarity between text0 and text2: {similarity[0, 2]}")
|
||
|
print(f"Cosine Similarity between text1 and text2: {similarity[2, 1]}")
|
||
|
|
||
|
print(f"manhattan_distance between text0 and text1: {manhattan_distance[0, 1]}")
|
||
|
print(f"manhattan_distance between text0 and text2: {manhattan_distance[0, 2]}")
|
||
|
print(f"manhattan_distance between text1 and text2: {manhattan_distance[2, 1]}")
|
||
|
|
||
|
#print(f"paired_distance between text0 and text1: {paired_distance[0, 1]}")
|
||
|
#print(f"paired_distance between text0 and text2: {paired_distance[0, 2]}")
|
||
|
#print(f"paired_distance between text1 and text2: {paired_distance[2, 1]}")
|
||
|
|
||
|
print(f"pairwise_distance between text0 and text1: {pairwise_distance[0, 1]}")
|
||
|
print(f"pairwise_distance between text0 and text2: {pairwise_distance[0, 2]}")
|
||
|
print(f"pairwise_distance between text1 and text2: {pairwise_distance[2, 1]}")
|