first process
This commit is contained in:
parent
8eb3f5e5ed
commit
7e456568e5
18
.vscode/launch.json
vendored
Normal file
18
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
//"console": "integratedTerminal",
|
||||
"console": "internalConsole",
|
||||
"justMyCode": false,
|
||||
"python": "/home/gpu/NLP/.env/bin/python"
|
||||
}
|
||||
]
|
||||
}
|
File diff suppressed because one or more lines are too long
530
embedder.py
Normal file
530
embedder.py
Normal file
|
@ -0,0 +1,530 @@
|
|||
# !pip install hazm
|
||||
# !pip install transformers==4.26.0
|
||||
# !pip install --upgrade numpy
|
||||
# !pip install --upgrade sentence-transformers
|
||||
"""
|
||||
Persian Sentence Processing and Vector Analysis
|
||||
==============================================
|
||||
|
||||
This script processes Persian sentences from a JSON file and performs:
|
||||
1. Word extraction and preprocessing
|
||||
2. Vector representation using multilingual transformer
|
||||
3. Similarity analysis for key words
|
||||
4. Dimensionality reduction to 3D
|
||||
5. 3D visualization with Persian labels
|
||||
|
||||
Author: NLP Expert Assistant
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import List, Dict, Tuple, Set
|
||||
from collections import Counter
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# NLP and ML libraries
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.manifold import TSNE
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
# Visualization libraries
|
||||
# import matplotlib.pyplot as plt
|
||||
# import plotly.graph_objects as go
|
||||
# import plotly.express as px
|
||||
# from plotly.subplots import make_subplots
|
||||
|
||||
# Persian text processing
|
||||
import hazm
|
||||
from hazm import Normalizer, word_tokenize, POSTagger
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PersianVectorAnalyzer:
|
||||
"""
|
||||
A comprehensive class for Persian text processing and vector analysis.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
|
||||
"""
|
||||
Initialize the analyzer with the specified model.
|
||||
|
||||
Args:
|
||||
model_name: The sentence transformer model to use
|
||||
"""
|
||||
self.model_name = model_name
|
||||
self.model = None
|
||||
self.normalizer = Normalizer()
|
||||
self.stop_words = self._load_persian_stop_words()
|
||||
self.key_words = [
|
||||
"خدا", "بنده", "جهاد", "ولی", "زکات",
|
||||
"نماز", "صبر", "عبادت", "ولایت", "خلافت","پیامبر"
|
||||
]
|
||||
|
||||
logger.info(f"Initializing Persian Vector Analyzer with model: {model_name}")
|
||||
|
||||
def _load_persian_stop_words(self) -> Set[str]:
|
||||
"""
|
||||
Load Persian stop words.
|
||||
|
||||
Returns:
|
||||
Set of Persian stop words
|
||||
"""
|
||||
# Common Persian stop words
|
||||
stop_words = {
|
||||
'و', 'در', 'به', 'از', 'که', 'این', 'آن', 'با', 'برای', 'تا',
|
||||
'را', 'هم', 'یا', 'اما', 'اگر', 'چون', 'چرا', 'چگونه', 'کجا',
|
||||
'چه', 'کی', 'چند', 'چقدر', 'همه', 'هیچ', 'بعضی', 'هر', 'همه',
|
||||
'خود', 'خویش', 'ما', 'شما', 'آنها', 'ایشان', 'اینها', 'آنها',
|
||||
'من', 'تو', 'او', 'ما', 'شما', 'آنها', 'ایشان', 'اینها',
|
||||
'است', 'هست', 'بود', 'شد', 'می', 'باید', 'خواهد', 'دارد',
|
||||
'کرد', 'شد', 'بود', 'هست', 'است', 'میشود', 'میکند',
|
||||
'یک', 'دو', 'سه', 'چهار', 'پنج', 'شش', 'هفت', 'هشت', 'نه', 'ده',
|
||||
'اول', 'دوم', 'سوم', 'چهارم', 'پنجم', 'ششم', 'هفتم', 'هشتم', 'نهم', 'دهم',
|
||||
'سال', 'ماه', 'روز', 'هفته', 'ساعت', 'دقیقه', 'ثانیه','پس'
|
||||
'بله', 'نه', 'آری', 'خیر', 'بلی', 'نخیر',
|
||||
'حالا', 'الان', 'امروز', 'دیروز', 'فردا', 'هفته', 'ماه', 'سال',
|
||||
'بالا', 'پایین', 'چپ', 'راست', 'جلو', 'عقب', 'داخل', 'خارج',
|
||||
'بزرگ', 'کوچک', 'بلند', 'کوتاه', 'پهن', 'باریک', 'ضخیم', 'نازک',
|
||||
|
||||
|
||||
|
||||
}
|
||||
return stop_words
|
||||
|
||||
def load_model(self):
|
||||
"""
|
||||
Load the sentence transformer model.
|
||||
"""
|
||||
try:
|
||||
logger.info("Loading sentence transformer model...")
|
||||
self.model = SentenceTransformer(self.model_name)
|
||||
logger.info("Model loaded successfully!")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading model: {e}")
|
||||
raise
|
||||
|
||||
def load_json_data(self, file_path: str) -> List[str]:
|
||||
"""
|
||||
Load Persian sentences from JSON file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the JSON file
|
||||
|
||||
Returns:
|
||||
List of Persian sentences
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Loading data from {file_path}")
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if type(data) == dict:
|
||||
temp_data = []
|
||||
for item in data.items():
|
||||
temp_data.append(item[1])
|
||||
data = temp_data
|
||||
|
||||
sentences = []
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
# Extract sentences from different possible keys
|
||||
for key in ['persian_translate']:
|
||||
if key in item and item[key]:
|
||||
sentences.append(str(item[key]))
|
||||
elif isinstance(item, str):
|
||||
sentences.append(item)
|
||||
elif isinstance(data, dict):
|
||||
# If it's a single object, extract all string values
|
||||
for value in data.values():
|
||||
if isinstance(value, str):
|
||||
sentences.append(value)
|
||||
|
||||
logger.info(f"Loaded {len(sentences)} sentences")
|
||||
return sentences
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading JSON data: {e}")
|
||||
raise
|
||||
|
||||
def preprocess_text(self, text: str) -> str:
|
||||
"""
|
||||
Preprocess Persian text.
|
||||
|
||||
Args:
|
||||
text: Raw Persian text
|
||||
|
||||
Returns:
|
||||
Preprocessed text
|
||||
"""
|
||||
|
||||
# Normalize text
|
||||
text = self.normalizer.normalize(text)
|
||||
|
||||
# Remove extra whitespace
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Remove special characters but keep Persian characters
|
||||
text = re.sub(r'[^\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\s]', '', text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
def extract_words(self, sentences: List[str]) -> List[str]:
|
||||
"""
|
||||
Extract all words from sentences.
|
||||
|
||||
Args:
|
||||
sentences: List of Persian sentences
|
||||
|
||||
Returns:
|
||||
List of all words
|
||||
"""
|
||||
all_words = []
|
||||
|
||||
for sentence in sentences:
|
||||
# Preprocess sentence
|
||||
processed_sentence = self.preprocess_text(sentence)
|
||||
|
||||
# Tokenize
|
||||
words = word_tokenize(processed_sentence)
|
||||
# words = processed_sentence.split()
|
||||
# Filter out empty strings and very short words
|
||||
words = [word for word in words if len(word) > 1]
|
||||
|
||||
all_words.extend(words)
|
||||
|
||||
logger.info(f"Extracted {len(all_words)} words from {len(sentences)} sentences")
|
||||
return all_words
|
||||
|
||||
def remove_stop_words(self, words: List[str]) -> List[str]:
|
||||
"""
|
||||
Remove stop words from the word list.
|
||||
|
||||
Args:
|
||||
words: List of words
|
||||
|
||||
Returns:
|
||||
List of words without stop words
|
||||
"""
|
||||
filtered_words = [word for word in words if word not in self.stop_words]
|
||||
logger.info(f"Removed {len(words) - len(filtered_words)} stop words")
|
||||
return filtered_words
|
||||
|
||||
def get_unique_words(self, words: List[str]) -> List[str]:
|
||||
"""
|
||||
Get unique words from the list.
|
||||
|
||||
Args:
|
||||
words: List of words
|
||||
|
||||
Returns:
|
||||
List of unique words
|
||||
"""
|
||||
unique_words = list(set(words))
|
||||
logger.info(f"Found {len(unique_words)} unique words from {len(words)} total words")
|
||||
return unique_words
|
||||
|
||||
def compute_word_vectors(self, words: List[str]) -> Dict[str, List[float]]:
|
||||
"""
|
||||
Compute vector representations for words.
|
||||
|
||||
Args:
|
||||
words: List of unique words
|
||||
|
||||
Returns:
|
||||
Dictionary mapping words to their vector representations
|
||||
"""
|
||||
if self.model is None:
|
||||
self.load_model()
|
||||
|
||||
logger.info(f"Computing vectors for {len(words)} words...")
|
||||
|
||||
# Compute embeddings
|
||||
embeddings = self.model.encode(words, show_progress_bar=True)
|
||||
|
||||
# Create dictionary
|
||||
word_vectors = {}
|
||||
for i, word in enumerate(words):
|
||||
word_vectors[word] = embeddings[i].tolist()
|
||||
|
||||
logger.info("Word vectors computed successfully!")
|
||||
return word_vectors
|
||||
|
||||
def find_closest_words(self, word_vectors: Dict[str, List[float]],
|
||||
key_words: List[str], top_k: int = 20) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Find the closest words to each key word.
|
||||
|
||||
Args:
|
||||
word_vectors: Dictionary of word vectors
|
||||
key_words: List of key words to find neighbors for
|
||||
top_k: Number of closest words to find
|
||||
|
||||
Returns:
|
||||
Dictionary mapping key words to their closest neighbors
|
||||
"""
|
||||
logger.info(f"Finding {top_k} closest words for {len(key_words)} key words...")
|
||||
|
||||
# Convert to numpy arrays for faster computation
|
||||
words = list(word_vectors.keys())
|
||||
vectors = np.array(list(word_vectors.values()))
|
||||
|
||||
closest_words = {}
|
||||
|
||||
for key_word in key_words:
|
||||
if key_word in word_vectors:
|
||||
# Get the key word vector
|
||||
key_vector = np.array(word_vectors[key_word]).reshape(1, -1)
|
||||
|
||||
# Compute cosine similarities
|
||||
similarities = cosine_similarity(key_vector, vectors)[0]
|
||||
|
||||
# Get indices of top k similar words (excluding the key word itself)
|
||||
word_indices = np.argsort(similarities)[::-1]
|
||||
|
||||
# Filter out the key word itself and get top k
|
||||
closest_indices = []
|
||||
for idx in word_indices:
|
||||
if words[idx] != key_word and len(closest_indices) < top_k:
|
||||
closest_indices.append(idx)
|
||||
|
||||
# Get the closest words
|
||||
closest_words[key_word] = [words[idx] for idx in closest_indices]
|
||||
logger.info(f"Found {len(closest_words[key_word])} closest words for '{key_word}'")
|
||||
else:
|
||||
logger.warning(f"Key word '{key_word}' not found in word vectors")
|
||||
closest_words[key_word] = []
|
||||
|
||||
return closest_words
|
||||
|
||||
def reduce_to_3d(self, word_vectors: Dict[str, List[float]],
|
||||
method: str = 'tsne') -> Dict[str, List[float]]:
|
||||
"""
|
||||
Reduce word vectors to 3D coordinates.
|
||||
|
||||
Args:
|
||||
word_vectors: Dictionary of word vectors
|
||||
method: Dimensionality reduction method ('pca' or 'tsne')
|
||||
|
||||
Returns:
|
||||
Dictionary mapping words to their 3D coordinates
|
||||
"""
|
||||
logger.info(f"Reducing dimensions to 3D using {method.upper()}...")
|
||||
|
||||
words = list(word_vectors.keys())
|
||||
vectors = np.array(list(word_vectors.values()))
|
||||
|
||||
if method.lower() == 'pca':
|
||||
reducer = PCA(n_components=3, random_state=42)
|
||||
elif method.lower() == 'tsne':
|
||||
reducer = TSNE(n_components=3, random_state=42, perplexity=min(30, len(vectors)-1))
|
||||
else:
|
||||
raise ValueError("Method must be 'pca' or 'tsne'")
|
||||
|
||||
# Reduce dimensions
|
||||
reduced_vectors = reducer.fit_transform(vectors)
|
||||
|
||||
# Create dictionary
|
||||
word_vectors_3d = {}
|
||||
for i, word in enumerate(words):
|
||||
word_vectors_3d[word] = reduced_vectors[i].tolist()
|
||||
|
||||
logger.info("Dimensionality reduction completed!")
|
||||
return word_vectors_3d
|
||||
|
||||
def save_json(self, data: dict, file_path: str):
|
||||
"""
|
||||
Save data to JSON file.
|
||||
|
||||
Args:
|
||||
data: Data to save
|
||||
file_path: Output file path
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Data saved to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving to {file_path}: {e}")
|
||||
raise
|
||||
|
||||
# def create_3d_visualization(self, word_vectors_3d: Dict[str, List[float]],
|
||||
# selected_words: Dict[str, List[str]],
|
||||
# output_path: str = "persian_words_3d.html"):
|
||||
# """
|
||||
# Create 3D visualization of words.
|
||||
|
||||
# Args:
|
||||
# word_vectors_3d: Dictionary of 3D word coordinates
|
||||
# selected_words: Dictionary of selected words for each key word
|
||||
# output_path: Output file path for the visualization
|
||||
# """
|
||||
# logger.info("Creating 3D visualization...")
|
||||
|
||||
# # Prepare data for plotting
|
||||
# words = list(word_vectors_3d.keys())
|
||||
# coords = np.array(list(word_vectors_3d.values()))
|
||||
|
||||
# # Create color mapping for key words and their neighbors
|
||||
# colors = []
|
||||
# sizes = []
|
||||
# hover_texts = []
|
||||
|
||||
# for word in words:
|
||||
# # Check if word is a key word
|
||||
# is_key_word = word in self.key_words
|
||||
|
||||
# # Check if word is in selected words
|
||||
# in_selected = False
|
||||
# key_word_group = None
|
||||
# for key_word, selected_list in selected_words.items():
|
||||
# if word in selected_list:
|
||||
# in_selected = True
|
||||
# key_word_group = key_word
|
||||
# break
|
||||
|
||||
# if is_key_word:
|
||||
# colors.append('red')
|
||||
# sizes.append(15)
|
||||
# hover_texts.append(f"کلیدواژه: {word}")
|
||||
# elif in_selected:
|
||||
# colors.append('blue')
|
||||
# sizes.append(10)
|
||||
# hover_texts.append(f"کلمه مرتبط با '{key_word_group}': {word}")
|
||||
# else:
|
||||
# colors.append('lightgray')
|
||||
# sizes.append(5)
|
||||
# hover_texts.append(f"کلمه: {word}")
|
||||
|
||||
# # Create 3D scatter plot
|
||||
# fig = go.Figure()
|
||||
|
||||
# # Add scatter plot
|
||||
# fig.add_trace(go.Scatter3d(
|
||||
# x=coords[:, 0],
|
||||
# y=coords[:, 1],
|
||||
# z=coords[:, 2],
|
||||
# mode='markers+text',
|
||||
# marker=dict(
|
||||
# size=sizes,
|
||||
# color=colors,
|
||||
# opacity=0.8
|
||||
# ),
|
||||
# text=words,
|
||||
# textposition="middle center",
|
||||
# hovertext=hover_texts,
|
||||
# hoverinfo='text'
|
||||
# ))
|
||||
|
||||
# # Update layout
|
||||
# fig.update_layout(
|
||||
# title={
|
||||
# 'text': 'نمایش سهبعدی کلمات فارسی',
|
||||
# 'x': 0.5,
|
||||
# 'xanchor': 'center',
|
||||
# 'font': {'size': 20}
|
||||
# },
|
||||
# scene=dict(
|
||||
# xaxis_title='محور X',
|
||||
# yaxis_title='محور Y',
|
||||
# zaxis_title='محور Z',
|
||||
# camera=dict(
|
||||
# eye=dict(x=1.5, y=1.5, z=1.5)
|
||||
# )
|
||||
# ),
|
||||
# width=1000,
|
||||
# height=800,
|
||||
# showlegend=False
|
||||
# )
|
||||
|
||||
# # Save the plot
|
||||
# fig.write_html(output_path)
|
||||
# logger.info(f"3D visualization saved to {output_path}")
|
||||
|
||||
# return fig
|
||||
|
||||
def process_pipeline(self, input_file: str, output_dir: str = "output"):
|
||||
"""
|
||||
Run the complete processing pipeline.
|
||||
|
||||
Args:
|
||||
input_file: Path to input JSON file
|
||||
output_dir: Output directory for results
|
||||
"""
|
||||
# Create output directory
|
||||
Path(output_dir).mkdir(exist_ok=True)
|
||||
|
||||
logger.info("Starting Persian Vector Analysis Pipeline...")
|
||||
|
||||
# Step 1: Load data
|
||||
sentences = self.load_json_data(input_file)
|
||||
|
||||
# Step 2: Extract words
|
||||
all_words = self.extract_words(sentences)
|
||||
|
||||
# Step 3: Remove stop words
|
||||
# filtered_words = self.remove_stop_words(all_words)
|
||||
filtered_words = all_words
|
||||
|
||||
# Step 4: Get unique words
|
||||
unique_words = self.get_unique_words(filtered_words)
|
||||
|
||||
# Step 5: Compute word vectors
|
||||
word_vectors = self.compute_word_vectors(unique_words)
|
||||
|
||||
# Step 6: Save word vectors
|
||||
self.save_json(word_vectors, f"{output_dir}/words_vector.json")
|
||||
|
||||
# Step 7: Find closest words to key words
|
||||
selected_words = self.find_closest_words(word_vectors, self.key_words)
|
||||
|
||||
# Step 8: Save selected words
|
||||
self.save_json(selected_words, f"{output_dir}/selected_words.json")
|
||||
|
||||
# Step 9: Reduce to 3D
|
||||
word_vectors_3d = self.reduce_to_3d(word_vectors, method='tsne')
|
||||
|
||||
# Step 10: Save 3D vectors
|
||||
self.save_json(word_vectors_3d, f"{output_dir}/words_vector_3d.json")
|
||||
|
||||
# Step 11: Create visualization
|
||||
# self.create_3d_visualization(word_vectors_3d, selected_words,
|
||||
# f"{output_dir}/persian_words_3d.html")
|
||||
|
||||
logger.info("Pipeline completed successfully!")
|
||||
|
||||
# Print summary
|
||||
print("\n" + "="*50)
|
||||
print("PIPELINE SUMMARY")
|
||||
print("="*50)
|
||||
print(f"Input sentences: {len(sentences)}")
|
||||
print(f"Total words extracted: {len(all_words)}")
|
||||
print(f"Unique words after preprocessing: {len(unique_words)}")
|
||||
print(f"Word vectors computed: {len(word_vectors)}")
|
||||
print(f"Key words processed: {len(self.key_words)}")
|
||||
print(f"Output files saved to: {output_dir}/")
|
||||
print("="*50)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to run the Persian Vector Analysis.
|
||||
"""
|
||||
# Initialize analyzer
|
||||
analyzer = PersianVectorAnalyzer()
|
||||
|
||||
# Define input and output paths
|
||||
input_file = "./data/final_wisdom.json"
|
||||
output_dir = "output"
|
||||
|
||||
# Run the complete pipeline
|
||||
analyzer.process_pipeline(input_file, output_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
223
output/selected_words.json
Normal file
223
output/selected_words.json
Normal file
|
@ -0,0 +1,223 @@
|
|||
{
|
||||
"خدا": [
|
||||
"بالله",
|
||||
"خدای",
|
||||
"خداوند",
|
||||
"خدایی",
|
||||
"الله",
|
||||
"خدایا",
|
||||
"الهی",
|
||||
"لله",
|
||||
"آله",
|
||||
"خداییم",
|
||||
"الرب",
|
||||
"خداوندا",
|
||||
"خدایش",
|
||||
"حضرت",
|
||||
"یاسر",
|
||||
"آیه",
|
||||
"بهشتش",
|
||||
"تعالی",
|
||||
"باطنم",
|
||||
"وعید"
|
||||
],
|
||||
"بنده": [
|
||||
"مالک",
|
||||
"پیشگاه",
|
||||
"قربانگاه",
|
||||
"فرمانروایی",
|
||||
"کوچ",
|
||||
"مالکی",
|
||||
"قربانگاههای",
|
||||
"خزانهدار",
|
||||
"پیشوای",
|
||||
"جانشین",
|
||||
"همنشین",
|
||||
"مأمور",
|
||||
"مستولی",
|
||||
"منکرات",
|
||||
"بندهاش",
|
||||
"اختیار",
|
||||
"منکری",
|
||||
"حاکم",
|
||||
"عبد",
|
||||
"زمامداران"
|
||||
],
|
||||
"جهاد": [
|
||||
"مجاهد",
|
||||
"اسلام",
|
||||
"مسلم",
|
||||
"شامیان",
|
||||
"علیهالسلام",
|
||||
"مسلمانان",
|
||||
"قرآن",
|
||||
"طلبان",
|
||||
"صلیالله",
|
||||
"عبیدالله",
|
||||
"امان",
|
||||
"عبدالله",
|
||||
"شامی",
|
||||
"خلافت",
|
||||
"پیغمبر",
|
||||
"مسلمین",
|
||||
"سپاه",
|
||||
"سید",
|
||||
"علی",
|
||||
"پیامبر"
|
||||
],
|
||||
"ولی": [
|
||||
"اما",
|
||||
"مگر",
|
||||
"وإنما",
|
||||
"إلا",
|
||||
"اگرچه",
|
||||
"برخلاف",
|
||||
"خلافی",
|
||||
"درحالیکه",
|
||||
"بلکه",
|
||||
"إلیها",
|
||||
"غیرش",
|
||||
"لان",
|
||||
"وگرنه",
|
||||
"بخلاف",
|
||||
"ورزند",
|
||||
"چنانچه",
|
||||
"وگروه",
|
||||
"بس",
|
||||
"وبالش",
|
||||
"واگر"
|
||||
],
|
||||
"زکات": [
|
||||
"گلوگاه",
|
||||
"غنائمی",
|
||||
"مینگرند",
|
||||
"غبن",
|
||||
"دراز",
|
||||
"نزند",
|
||||
"میافکند",
|
||||
"گرچه",
|
||||
"زبیر",
|
||||
"تابی",
|
||||
"طغیان",
|
||||
"بلاغت",
|
||||
"توفیق",
|
||||
"ضبائی",
|
||||
"قیمة",
|
||||
"فریفتند",
|
||||
"آمیز",
|
||||
"پوشی",
|
||||
"طویلة",
|
||||
"سوگشان"
|
||||
],
|
||||
"نماز": [
|
||||
"دعا",
|
||||
"صلوات",
|
||||
"دعای",
|
||||
"دعایی",
|
||||
"عبادت",
|
||||
"مومنان",
|
||||
"مؤمنان",
|
||||
"ایمانی",
|
||||
"مؤمنی",
|
||||
"مؤمن",
|
||||
"مومن",
|
||||
"برکت",
|
||||
"ایمان",
|
||||
"المؤمنین",
|
||||
"ایمانش",
|
||||
"رحمت",
|
||||
"مؤمنانم",
|
||||
"دینی",
|
||||
"ایمانتان",
|
||||
"معنوی"
|
||||
],
|
||||
"صبر": [
|
||||
"انتظار",
|
||||
"یصبر",
|
||||
"لایصبر",
|
||||
"صبور",
|
||||
"پروا",
|
||||
"متکبر",
|
||||
"تعویذ",
|
||||
"دعائم",
|
||||
"سکونت",
|
||||
"رکاب",
|
||||
"إرواد",
|
||||
"ماند",
|
||||
"پرخوری",
|
||||
"دنبال",
|
||||
"استهزاء",
|
||||
"میپیچید",
|
||||
"دوشید",
|
||||
"بیندیشید",
|
||||
"تقوای",
|
||||
"نفرماید"
|
||||
],
|
||||
"عبادت": [
|
||||
"دعایی",
|
||||
"دعای",
|
||||
"صلوات",
|
||||
"نماز",
|
||||
"دعا",
|
||||
"خدای",
|
||||
"مومن",
|
||||
"خداوند",
|
||||
"بالله",
|
||||
"خدا",
|
||||
"برکت",
|
||||
"مؤمنانم",
|
||||
"الهی",
|
||||
"خدایا",
|
||||
"الرب",
|
||||
"لله",
|
||||
"آله",
|
||||
"ایمانی",
|
||||
"الله",
|
||||
"خدایی"
|
||||
],
|
||||
"ولایت": [],
|
||||
"خلافت": [
|
||||
"سپاه",
|
||||
"حاکم",
|
||||
"امت",
|
||||
"فرمانروایی",
|
||||
"لشکر",
|
||||
"قصار",
|
||||
"امان",
|
||||
"برترند",
|
||||
"نهاد",
|
||||
"زمامداران",
|
||||
"وحکمة",
|
||||
"ستمگری",
|
||||
"الإبل",
|
||||
"بالإبل",
|
||||
"مسلط",
|
||||
"سرکش",
|
||||
"اختیار",
|
||||
"امانی",
|
||||
"مأموریت",
|
||||
"حکومتی"
|
||||
],
|
||||
"پیامبر": [
|
||||
"پیغمبر",
|
||||
"پیامبرش",
|
||||
"پیامبران",
|
||||
"پیامبرتان",
|
||||
"قرآن",
|
||||
"رسولالله",
|
||||
"مجاهد",
|
||||
"عبیدالله",
|
||||
"الله",
|
||||
"مسلم",
|
||||
"ربانی",
|
||||
"اسلام",
|
||||
"خدای",
|
||||
"ایمانی",
|
||||
"یاسر",
|
||||
"شهید",
|
||||
"خدایی",
|
||||
"بالله",
|
||||
"صلیالله",
|
||||
"خدا"
|
||||
]
|
||||
}
|
1820764
output/words_vector.json
Normal file
1820764
output/words_vector.json
Normal file
File diff suppressed because it is too large
Load Diff
23587
output/words_vector_3d.json
Normal file
23587
output/words_vector_3d.json
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user