diff --git a/__pycache__/data_model.cpython-312.pyc b/__pycache__/data_model.cpython-312.pyc index 95e0529..5635491 100644 Binary files a/__pycache__/data_model.cpython-312.pyc and b/__pycache__/data_model.cpython-312.pyc differ diff --git a/data_model.py b/data_model.py index ac7f3e0..b095d52 100644 --- a/data_model.py +++ b/data_model.py @@ -19,7 +19,9 @@ def create_speechs_table(): url TEXT, types TEXT, arabic_text TEXT, - interpretation_links TEXT + interpretation_links TEXT, + ai_title TEXT, + ai_paragraph_type TEXT ) ''') conn.commit() @@ -33,8 +35,8 @@ def insert_data(data_list): for data in data_list: try: cursor.execute(''' - INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links) - VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links) + INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) + VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links, :ai_title, :ai_paragraph_type) ''', data) except sqlite3.IntegrityError: print(f"Warning: Data with id '{data['id']}' already exists and was skipped.") @@ -43,6 +45,80 @@ def insert_data(data_list): conn.close() print("Data inserted successfully!") + +# درج داده ها به صورت تکی +def insert_data_to_speechs(data: list): + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) + VALUES (:id, :context_id, :part_id, :title, :large_title, :norm_sentence, :url, :types, :arabic_text, :Interpretation_links, :ai_title, :ai_paragraph_type) + ''', data) + except sqlite3.IntegrityError: + print(f"Warning: Data with id '{data['id']}' already exists and was skipped.") + + conn.commit() + conn.close() + print("Data inserted successfully!") + + +def insert_data_to_central_concepts(data: list): + + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO central_concepts (id, concept, paragraph_effect, part_id) + VALUES (:id, :concept, :paragraph_effect, :part_id) + ''', data) + except sqlite3.IntegrityError: + print(f"Warning: Data with id '{data['id']}' already exists and was skipped.") + + conn.commit() + conn.close() + print("Data inserted successfully!") + + +def insert_data_to_persons(data: list ): + + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + + try: + cursor.execute(''' + INSERT INTO persons (id, person, part_id) + VALUES (:id, :person, :part_id) + ''', data) + except sqlite3.IntegrityError: + print(f"Warning: Data with id '{data['id']}' already exists and was skipped.") + + conn.commit() + conn.close() + print("Data inserted successfully!") + + +def insert_data_to_rules(data: list ): + + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + try: + cursor.execute(''' + INSERT INTO rules (id, rule, type, part_id) + VALUES (:id, :rule, :type, :part_id) + ''', data) + except sqlite3.IntegrityError: + print(f"Warning: Data with id '{data['id']}' already exists and was skipped.") + + conn.commit() + conn.close() + print("Data inserted successfully!") + + # خواندن داده بر اساس id def get_data_by_id(record_id): conn = sqlite3.connect('./db/nahj.db') @@ -398,9 +474,65 @@ def get_last_request(): return None +def create_central_concepts_table(): + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + # ایجاد جدول اگر وجود نداشته باشد + cursor.execute(''' + CREATE TABLE IF NOT EXISTS central_concepts ( + id TEXT PRIMARY KEY, + concept TEXT, + paragraph_effect REAL, + part_id TEXT + ) + ''') + conn.commit() + conn.close() + print("central_concepts table created successfully!") + +def create_persons_table(): + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + # ایجاد جدول اگر وجود نداشته باشد + cursor.execute(''' + CREATE TABLE IF NOT EXISTS persons ( + id TEXT PRIMARY KEY, + person TEXT, + part_id TEXT + ) + ''') + conn.commit() + conn.close() + print("persons table created successfully!") + +def create_rules_table(): + conn = sqlite3.connect('./db/nahj.db') + cursor = conn.cursor() + + # ایجاد جدول اگر وجود نداشته باشد + cursor.execute(''' + CREATE TABLE IF NOT EXISTS rules ( + id TEXT PRIMARY KEY, + rule TEXT, + type TEXT, + part_id TEXT + ) + ''') + conn.commit() + conn.close() + print("rules table created successfully!") + + def create_tables(): create_speechs_table() create_chat_table() + create_rules_table() + create_persons_table() + create_central_concepts_table() + + # مثال استفاده if __name__ == "__main__": diff --git a/db/nahj-old.db b/db/nahj-old.db index 7ce1bde..40ba437 100644 Binary files a/db/nahj-old.db and b/db/nahj-old.db differ diff --git a/db/nahj.db b/db/nahj.db index 40ba437..7015939 100644 Binary files a/db/nahj.db and b/db/nahj.db differ diff --git a/nahj_get_metadata_v2.py b/nahj_get_metadata_v2.py index 25106a6..eedb0a0 100644 --- a/nahj_get_metadata_v2.py +++ b/nahj_get_metadata_v2.py @@ -9,9 +9,10 @@ from langchain_openai import ChatOpenAI from nahj_get_metadata_oss import oss_test import asyncio import sqlite3 +import data_model as dm -conn = sqlite3.connect('./db/nahj.db') -cursor = conn.cursor() +# conn = sqlite3.connect('./db/nahj.db') +# cursor = conn.cursor() today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime.datetime.now().day}' @@ -24,33 +25,8 @@ today = f'{datetime.datetime.now().year}{datetime.datetime.now().month}{datetime فرق این سورس با ورژن 1 در این هست که برای هرکدام از تایتل ها و مفاهیم کلیدی و ... از پرامپت مخصوص خودش استفاده میکنه و در چهار فایل جداگانه ذخیره میکنه ''' -def create_tables(): - # متد ساخت تیبل های اسکیو ال لایت در صورت عدم وجود - cursor.execute("""CREATE TABLE IF NOT EXISTS rules ( - id TEXT, - rule TEXT, - type TEXT, - part_id TEXT -); -""") - cursor.execute("""CREATE TABLE IF NOT EXISTS persons ( - id TEXT, - person TEXT, - part_id TEXT -); -""") - - cursor.execute("""CREATE TABLE IF NOT EXISTS central_concepts ( - id TEXT, - concept TEXT, - paragraph_effect REAL, - part_id TEXT -); -""") - - conn.commit() # پرامپت های مختلف که هرکدام یکی از (تایتل یا مفاهیم کلیدی یا شخصیت و یا قواعد) رو از متن نهج البلاغه استخراج میکنه @@ -497,7 +473,7 @@ def find_passed_data_ids(output_metadata_jsonl_path): if __name__ == "__main__": - create_tables() # ساخت تیبل های مورد نیاز اسکیو ال لایت در صورت عدم وجود + dm.create_tables() # متد ساخت تمام تیبل های مورد نیاز input_data_path = './nahj_data/all_nahj_CONTEXT.json' # شامل تمامی (خطبه و نامه و حکمت ها) به همراه پاراگراف هایشان llm_error_path = './nahj-answer/error-in-getting-metadata-Final.txt' previous_peroid_errors_path = "./nahj_data/error-ids-Final.txt" @@ -590,8 +566,8 @@ if __name__ == "__main__": new_entry['paragraphs'] = new_paragraphs try: - result_data = asyncio.run(oss_test(SYSTEM_PROMPT,USER_PROMPT,new_entry))#gpt-4o - # result_data = llm_request(new_entry) + # result_data = asyncio.run(oss_test(SYSTEM_PROMPT,USER_PROMPT,new_entry))#gpt-4o + result_data = llm_request(new_entry) llm_answer_data = text_to_dict(result_data) if path == "title" : entery['paragraph_metadata'] = [] @@ -643,11 +619,11 @@ if __name__ == "__main__": arabic_text = part['arabic_text'] ai_title = entery['paragraph_metadata'][N]['title'] paragraph_type = entery['paragraph_metadata'][N]['paragraph_type'] - - cursor.execute("INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) \ - VALUES (?, ?, ?, ? ,? ,? ,? ,? ,? ,? ,? ,?)", - (id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type)) - conn.commit() + dm.insert_data_to_speechs([id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type]) + # cursor.execute("INSERT INTO speeches (id, context_id, part_id, title, large_title, normalized_sentence, url, types, arabic_text, interpretation_links, ai_title, ai_paragraph_type) \ + # VALUES (?, ?, ?, ? ,? ,? ,? ,? ,? ,? ,? ,?)", + # ([id_, context_id, part_id, title, large_title, text, url, typee, arabic_text, i_link, ai_title, paragraph_type])) + # conn.commit() central_concepts = entery['paragraph_metadata'][N]['central_concepts'] persons = entery['paragraph_metadata'][N]['persons'] @@ -659,18 +635,20 @@ if __name__ == "__main__": c_id = context_id+part_id+f"c{k}" concept = row['concept'] paragraph_effect = row['paragraph_effect'] - cursor.execute("""INSERT INTO central_concepts (id, concept, paragraph_effect, part_id) - VALUES(?, ?, ?, ?)""",(c_id, concept, paragraph_effect, part_id)) - conn.commit() + dm.insert_data_to_central_concepts([c_id, concept, paragraph_effect, part_id]) + # cursor.execute("""INSERT INTO central_concepts (id, concept, paragraph_effect, part_id) + # VALUES(?, ?, ?, ?)""",(c_id, concept, paragraph_effect, part_id)) + # conn.commit() k=0 for row in persons: k+=1 c_id = context_id+part_id+f"p{k}" person = row - cursor.execute("""INSERT INTO persons (id, person, part_id) - VALUES(?, ?, ?)""",(c_id, person, part_id)) - conn.commit() + dm.insert_data_to_persons([c_id, person, part_id]) + # cursor.execute("""INSERT INTO persons (id, person, part_id) + # VALUES(?, ?, ?)""",(c_id, person, part_id)) + # conn.commit() k=0 for row in rules: @@ -678,9 +656,10 @@ if __name__ == "__main__": c_id = context_id+part_id+f"r{k}" rule = row['rule'] rule_type = row['type'] - cursor.execute("""INSERT INTO rules (id, rule, type, part_id) - VALUES(?, ?, ?, ?)""",(c_id, rule, rule_type, part_id)) - conn.commit() + dm.insert_data_to_rules([c_id, rule, rule_type, part_id]) + # cursor.execute("""INSERT INTO rules (id, rule, type, part_id) + # VALUES(?, ?, ?, ?)""",([c_id, rule, rule_type, part_id])) + # conn.commit() N+=1