change data_model.py
This commit is contained in:
parent
e9363a6434
commit
654fc60c73
Binary file not shown.
138
data_model.py
138
data_model.py
|
|
@ -19,7 +19,9 @@ def create_speechs_table():
|
||||||
url TEXT,
|
url TEXT,
|
||||||
types TEXT,
|
types TEXT,
|
||||||
arabic_text TEXT,
|
arabic_text TEXT,
|
||||||
interpretation_links TEXT
|
interpretation_links TEXT,
|
||||||
|
ai_title TEXT,
|
||||||
|
ai_paragraph_type TEXT
|
||||||
)
|
)
|
||||||
''')
|
''')
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
@ -33,8 +35,8 @@ def insert_data(data_list):
|
||||||
for data in data_list:
|
for data in data_list:
|
||||||
try:
|
try:
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links)
|
INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type)
|
||||||
VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links)
|
VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links, :ai_title, :ai_paragraph_type)
|
||||||
''', data)
|
''', data)
|
||||||
except sqlite3.IntegrityError:
|
except sqlite3.IntegrityError:
|
||||||
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
||||||
|
|
@ -43,6 +45,80 @@ def insert_data(data_list):
|
||||||
conn.close()
|
conn.close()
|
||||||
print("Data inserted successfully!")
|
print("Data inserted successfully!")
|
||||||
|
|
||||||
|
|
||||||
|
# درج داده ها به صورت تکی
|
||||||
|
def insert_data_to_speechs(data: list):
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type)
|
||||||
|
VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links, :ai_title, :ai_paragraph_type)
|
||||||
|
''', data)
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("Data inserted successfully!")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_data_to_central_concepts(data: list):
|
||||||
|
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO central_concepts (id, concept, paragraph_effect, part_id)
|
||||||
|
VALUES (:id, :concept, :paragraph_effect, :part_id)
|
||||||
|
''', data)
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("Data inserted successfully!")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_data_to_persons(data: list ):
|
||||||
|
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO persons (id, person, part_id)
|
||||||
|
VALUES (:id, :person, :part_id)
|
||||||
|
''', data)
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("Data inserted successfully!")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_data_to_rules(data: list ):
|
||||||
|
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO rules (id, rule, type, part_id)
|
||||||
|
VALUES (:id, :rule, :type, :part_id)
|
||||||
|
''', data)
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
print(f"Warning: Data with id '{data['id']}' already exists and was skipped.")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("Data inserted successfully!")
|
||||||
|
|
||||||
|
|
||||||
# خواندن داده بر اساس id
|
# خواندن داده بر اساس id
|
||||||
def get_data_by_id(record_id):
|
def get_data_by_id(record_id):
|
||||||
conn = sqlite3.connect('./db/nahj.db')
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
|
@ -398,9 +474,65 @@ def get_last_request():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def create_central_concepts_table():
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# ایجاد جدول اگر وجود نداشته باشد
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS central_concepts (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
concept TEXT,
|
||||||
|
paragraph_effect REAL,
|
||||||
|
part_id TEXT
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("central_concepts table created successfully!")
|
||||||
|
|
||||||
|
def create_persons_table():
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# ایجاد جدول اگر وجود نداشته باشد
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS persons (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
person TEXT,
|
||||||
|
part_id TEXT
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("persons table created successfully!")
|
||||||
|
|
||||||
|
def create_rules_table():
|
||||||
|
conn = sqlite3.connect('./db/nahj.db')
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# ایجاد جدول اگر وجود نداشته باشد
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS rules (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
rule TEXT,
|
||||||
|
type TEXT,
|
||||||
|
part_id TEXT
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print("rules table created successfully!")
|
||||||
|
|
||||||
|
|
||||||
def create_tables():
|
def create_tables():
|
||||||
create_speechs_table()
|
create_speechs_table()
|
||||||
create_chat_table()
|
create_chat_table()
|
||||||
|
create_rules_table()
|
||||||
|
create_persons_table()
|
||||||
|
create_central_concepts_table()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# مثال استفاده
|
# مثال استفاده
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
BIN
db/nahj-old.db
BIN
db/nahj-old.db
Binary file not shown.
BIN
db/nahj.db
BIN
db/nahj.db
Binary file not shown.
|
|
@ -9,9 +9,10 @@ from langchain_openai import ChatOpenAI
|
||||||
from nahj_get_metadata_oss import oss_test
|
from nahj_get_metadata_oss import oss_test
|
||||||
import asyncio
|
import asyncio
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import data_model as dm
|
||||||
|
|
||||||
conn = sqlite3.connect('./db/nahj.db')
|
# conn = sqlite3.connect('./db/nahj.db')
|
||||||
cursor = conn.cursor()
|
# cursor = conn.cursor()
|
||||||
|
|
||||||
today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}'
|
today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}'
|
||||||
|
|
||||||
|
|
@ -24,33 +25,8 @@ today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime
|
||||||
فرق این سورس با ورژن 1 در این هست که برای هرکدام از تایتل ها و مفاهیم کلیدی و ... از پرامپت مخصوص خودش
|
فرق این سورس با ورژن 1 در این هست که برای هرکدام از تایتل ها و مفاهیم کلیدی و ... از پرامپت مخصوص خودش
|
||||||
استفاده میکنه و در چهار فایل جداگانه ذخیره میکنه
|
استفاده میکنه و در چهار فایل جداگانه ذخیره میکنه
|
||||||
'''
|
'''
|
||||||
def create_tables():
|
|
||||||
# متد ساخت تیبل های اسکیو ال لایت در صورت عدم وجود
|
|
||||||
|
|
||||||
cursor.execute("""CREATE TABLE IF NOT EXISTS rules (
|
|
||||||
id TEXT,
|
|
||||||
rule TEXT,
|
|
||||||
type TEXT,
|
|
||||||
part_id TEXT
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""CREATE TABLE IF NOT EXISTS persons (
|
|
||||||
id TEXT,
|
|
||||||
person TEXT,
|
|
||||||
part_id TEXT
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
|
|
||||||
cursor.execute("""CREATE TABLE IF NOT EXISTS central_concepts (
|
|
||||||
id TEXT,
|
|
||||||
concept TEXT,
|
|
||||||
paragraph_effect REAL,
|
|
||||||
part_id TEXT
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
|
|
||||||
# پرامپت های مختلف که هرکدام یکی از (تایتل یا مفاهیم کلیدی یا شخصیت و یا قواعد) رو از متن نهج البلاغه استخراج میکنه
|
# پرامپت های مختلف که هرکدام یکی از (تایتل یا مفاهیم کلیدی یا شخصیت و یا قواعد) رو از متن نهج البلاغه استخراج میکنه
|
||||||
|
|
@ -497,7 +473,7 @@ def find_passed_data_ids(output_metadata_jsonl_path):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
create_tables() # ساخت تیبل های مورد نیاز اسکیو ال لایت در صورت عدم وجود
|
dm.create_tables() # متد ساخت تمام تیبل های مورد نیاز
|
||||||
input_data_path = './nahj_data/all_nahj_CONTEXT.json' # شامل تمامی (خطبه و نامه و حکمت ها) به همراه پاراگراف هایشان
|
input_data_path = './nahj_data/all_nahj_CONTEXT.json' # شامل تمامی (خطبه و نامه و حکمت ها) به همراه پاراگراف هایشان
|
||||||
llm_error_path = './nahj-answer/error-in-getting-metadata-Final.txt'
|
llm_error_path = './nahj-answer/error-in-getting-metadata-Final.txt'
|
||||||
previous_peroid_errors_path = "./nahj_data/error-ids-Final.txt"
|
previous_peroid_errors_path = "./nahj_data/error-ids-Final.txt"
|
||||||
|
|
@ -590,8 +566,8 @@ if __name__ == "__main__":
|
||||||
new_entry['paragraphs'] = new_paragraphs
|
new_entry['paragraphs'] = new_paragraphs
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result_data = asyncio.run(oss_test(SYSTEM_PROMPT,USER_PROMPT,new_entry))#gpt-4o
|
# result_data = asyncio.run(oss_test(SYSTEM_PROMPT,USER_PROMPT,new_entry))#gpt-4o
|
||||||
# result_data = llm_request(new_entry)
|
result_data = llm_request(new_entry)
|
||||||
llm_answer_data = text_to_dict(result_data)
|
llm_answer_data = text_to_dict(result_data)
|
||||||
if path == "title" :
|
if path == "title" :
|
||||||
entery['paragraph_metadata'] = []
|
entery['paragraph_metadata'] = []
|
||||||
|
|
@ -643,11 +619,11 @@ if __name__ == "__main__":
|
||||||
arabic_text = part['arabic_text']
|
arabic_text = part['arabic_text']
|
||||||
ai_title = entery['paragraph_metadata'][N]['title']
|
ai_title = entery['paragraph_metadata'][N]['title']
|
||||||
paragraph_type = entery['paragraph_metadata'][N]['paragraph_type']
|
paragraph_type = entery['paragraph_metadata'][N]['paragraph_type']
|
||||||
|
dm.insert_data_to_speechs([id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type])
|
||||||
cursor.execute("INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) \
|
# cursor.execute("INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) \
|
||||||
VALUES (?, ?, ?, ? ,? ,? ,? ,? ,? ,? ,? ,?)",
|
# VALUES (?, ?, ?, ? ,? ,? ,? ,? ,? ,? ,? ,?)",
|
||||||
(id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type))
|
# ([id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type]))
|
||||||
conn.commit()
|
# conn.commit()
|
||||||
|
|
||||||
central_concepts = entery['paragraph_metadata'][N]['central_concepts']
|
central_concepts = entery['paragraph_metadata'][N]['central_concepts']
|
||||||
persons = entery['paragraph_metadata'][N]['persons']
|
persons = entery['paragraph_metadata'][N]['persons']
|
||||||
|
|
@ -659,18 +635,20 @@ if __name__ == "__main__":
|
||||||
c_id = context_id+part_id+f"c{k}"
|
c_id = context_id+part_id+f"c{k}"
|
||||||
concept = row['concept']
|
concept = row['concept']
|
||||||
paragraph_effect = row['paragraph_effect']
|
paragraph_effect = row['paragraph_effect']
|
||||||
cursor.execute("""INSERT INTO central_concepts (id, concept, paragraph_effect, part_id)
|
dm.insert_data_to_central_concepts([c_id, concept, paragraph_effect, part_id])
|
||||||
VALUES(?, ?, ?, ?)""",(c_id, concept, paragraph_effect, part_id))
|
# cursor.execute("""INSERT INTO central_concepts (id, concept, paragraph_effect, part_id)
|
||||||
conn.commit()
|
# VALUES(?, ?, ?, ?)""",(c_id, concept, paragraph_effect, part_id))
|
||||||
|
# conn.commit()
|
||||||
|
|
||||||
k=0
|
k=0
|
||||||
for row in persons:
|
for row in persons:
|
||||||
k+=1
|
k+=1
|
||||||
c_id = context_id+part_id+f"p{k}"
|
c_id = context_id+part_id+f"p{k}"
|
||||||
person = row
|
person = row
|
||||||
cursor.execute("""INSERT INTO persons (id, person, part_id)
|
dm.insert_data_to_persons([c_id, person, part_id])
|
||||||
VALUES(?, ?, ?)""",(c_id, person, part_id))
|
# cursor.execute("""INSERT INTO persons (id, person, part_id)
|
||||||
conn.commit()
|
# VALUES(?, ?, ?)""",(c_id, person, part_id))
|
||||||
|
# conn.commit()
|
||||||
|
|
||||||
k=0
|
k=0
|
||||||
for row in rules:
|
for row in rules:
|
||||||
|
|
@ -678,9 +656,10 @@ if __name__ == "__main__":
|
||||||
c_id = context_id+part_id+f"r{k}"
|
c_id = context_id+part_id+f"r{k}"
|
||||||
rule = row['rule']
|
rule = row['rule']
|
||||||
rule_type = row['type']
|
rule_type = row['type']
|
||||||
cursor.execute("""INSERT INTO rules (id, rule, type, part_id)
|
dm.insert_data_to_rules([c_id, rule, rule_type, part_id])
|
||||||
VALUES(?, ?, ?, ?)""",(c_id, rule, rule_type, part_id))
|
# cursor.execute("""INSERT INTO rules (id, rule, type, part_id)
|
||||||
conn.commit()
|
# VALUES(?, ?, ?, ?)""",([c_id, rule, rule_type, part_id]))
|
||||||
|
# conn.commit()
|
||||||
|
|
||||||
N+=1
|
N+=1
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user