From b0582335a50f9326c5188ec29e7ba4f184116dfe Mon Sep 17 00:00:00 2001 From: mmpouya Date: Wed, 8 Jan 2025 10:55:17 +0000 Subject: [PATCH] Upload files to "VerbNet/VerbNet2OWL" --- VerbNet/VerbNet2OWL/Final1.py | 97 +++++++++++ VerbNet/VerbNet2OWL/Final2.py | 233 +++++++++++++++++++++++++++ VerbNet/VerbNet2OWL/adding to ttl.py | 39 +++++ 3 files changed, 369 insertions(+) create mode 100644 VerbNet/VerbNet2OWL/Final1.py create mode 100644 VerbNet/VerbNet2OWL/Final2.py create mode 100644 VerbNet/VerbNet2OWL/adding to ttl.py diff --git a/VerbNet/VerbNet2OWL/Final1.py b/VerbNet/VerbNet2OWL/Final1.py new file mode 100644 index 0000000..e5677a9 --- /dev/null +++ b/VerbNet/VerbNet2OWL/Final1.py @@ -0,0 +1,97 @@ +import os +import xml.etree.ElementTree as ET +from rdflib import Graph, Namespace, URIRef , RDFS , RDF , OWL + + +ttl_directory = "TTLs" +xml_directory = "XMLs" #path + +def xmlclass_to_rdf(xml_file_path, rdf_output_path): + tree = ET.parse(xml_file_path) + root = tree.getroot() + g = Graph(base="http://tavasi.majles.ir/ontology/general/Verbnet") + go = Namespace("http://tavasi.majles.ir/ontology/general#") + g.bind("go", go) + + ontology_iri = URIRef("http://tavasi.majles.ir/ontology/general") + g.add((ontology_iri, RDF.type, OWL.Ontology)) + + for j in root.iter('VNCLASS'): + id = j.attrib['ID'] + id_uri = URIRef(go[id]) + MEMBERS = j.find('MEMBERS') + if MEMBERS is None: + pass + else: + for i in j.find('MEMBERS').findall('MEMBER'): + name = i.attrib['name'] + name2 = str(name) + name2.replace(" ", "_") + name_uri = URIRef(go[name2]) + g.add((name_uri, RDFS.subClassOf, id_uri)) + + for p in root.iter('SUBCLASSES'): + for k in p.iter('VNSUBCLASS'): + id2 = k.attrib['ID'] + id2_uri = URIRef(go[id2]) + MEMBERS = k.find('MEMBERS') + if MEMBERS is None: + pass + for i in k.find('MEMBERS').findall('MEMBER'): + name = i.attrib['name'] + name = str(name) + name.replace(" ","_") + name_uri = URIRef(go[name]) + g.add((name_uri, RDFS.subClassOf, id2_uri)) + + for a in root.iter('VNCLASS'): + id = a.attrib['ID'] + id_uri = URIRef(go[id]) + SUBCLASSES = a.find('SUBCLASSES') + if SUBCLASSES is None: + pass + else: + for p in a.find('SUBCLASSES').findall('VNSUBCLASS'): + id2 = p.attrib['ID'] + id2_uri = URIRef(go[id2]) + g.add((id2_uri, RDFS.subClassOf, id_uri)) + SUBCLASSES = p.find('SUBCLASSES') + if SUBCLASSES is None: + pass + else: + for f in p.find('SUBCLASSES').findall('VNSUBCLASS'): + id3 = f.attrib['ID'] + id3_uri = URIRef(go[id3]) + g.add((id3_uri, RDFS.subClassOf, id2_uri)) + SUBCLASSES = f.find('SUBCLASSES') + if SUBCLASSES is None: + pass + else: + for m in f.find('SUBCLASSES').findall('VNSUBCLASS'): + id4 = m.attrib['ID'] + id4_uri = URIRef(go[id4]) + g.add((id4_uri, RDFS.subClassOf, id3_uri)) + + + + for m in root.iter('THEMROLE'): + type = m.attrib['type'] + has = "has" + type = has + type + type_uri = URIRef(go[type]) + g.add((id_uri, RDFS.subClassOf, type_uri)) + + + g.serialize(destination=rdf_output_path, format='turtle') + + +# for filename in os.listdir(xml_directory): +# if filename.endswith(".xml"): +# xml_file_path = os.path.join(xml_directory, filename) +# rdf_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl') +# xmlclass_to_rdf(xml_file_path, rdf_output_path) + +if __name__ == "__main__": + xml_directory = "XMLs/appear-48.1.1.xml" + ttl_directory = "TTLs/appear-48.1.1.ttl" + xmlclass_to_rdf(xml_directory,ttl_directory) diff --git a/VerbNet/VerbNet2OWL/Final2.py b/VerbNet/VerbNet2OWL/Final2.py new file mode 100644 index 0000000..a8fa7b3 --- /dev/null +++ b/VerbNet/VerbNet2OWL/Final2.py @@ -0,0 +1,233 @@ +import os +import xml.etree.ElementTree as ET +from rdflib import Namespace, URIRef + + +def parse_and_generate_OWL(xml_file_path): + go1 = Namespace("http://tavasi.majles.ir/ontology/general#") + + final_output = "" + tree = ET.parse(xml_file_path) + root = tree.getroot() + + for j in root.iter('VNCLASS'): + id = j.attrib['ID'] + id_uri = URIRef('<'+ go1 + id +'>') + selrestr_elements = [] + themroles = j.find('THEMROLES') + if themroles is None: + continue + + for t in themroles.findall('THEMROLE'): + themrole_type = t.attrib['type'] + predicate = predicate_uri(themrole_type) + + + for frst in t.findall('SELRESTRS'): + for sec in frst.findall('SELRESTRS'): + for thrd in sec.findall('SELRESTRS'): + for type3 in thrd.findall('SELRESTR'): + type3_uri = URIRef('<'+ go1 + type3.attrib['type'] +'>') + if type3.attrib.get('Value') == '-': + nottype3 = nottype(type3_uri) + selrestr_elements.append(nottype3) + else: + selrestr_elements.append(type3_uri) + if thrd.findall('SELRESTR'): + SELRESTR_element = thrd.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if thrd.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type3 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type3) + elif thrd.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not thrd.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type3 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type3) + for type2 in sec.findall('SELRESTR'): + type2_uri = URIRef('<'+ go1 + type2.attrib['type'] +'>') + if type2.attrib.get('Value') == '-': + nottype2 = nottype(type2_uri) + selrestr_elements.append(nottype2) + else: + selrestr_elements.append(type2_uri) + if sec.findall('SELRESTR'): + SELRESTR_element = sec.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if sec.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type2 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type2) + elif sec.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not sec.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type2 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type2) + for type1 in frst.findall('SELRESTR'): + type1_uri = URIRef('<'+ go1 + type1.attrib['type'] +'>') + if type1.attrib.get('Value') == '-': + nottype1 = nottype(type1_uri) + selrestr_elements.append(nottype1) + else: + selrestr_elements.append(type1_uri) + if frst.findall('SELRESTR'): + SELRESTR_element = frst.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if frst.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type1 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type1) + elif frst.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not frst.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type1 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type1) + if len(selrestr_elements) >= 1: + if frst.text is None: + pass + else: + final1 = only(id_uri,predicate,selrestr_elements[-1]) + # print(final_output) + final_output = final_output + final1 + +#------------------------------------------------------------------- + + for j in root.iter('VNSUBCLASS'): + id = j.attrib['ID'] + id_uri = URIRef('<'+ go1 + id +'>') + selrestr_elements = [] + themroles = j.find('THEMROLES') + if themroles is None: + continue + + for t in themroles.findall('THEMROLE'): + themrole_type = t.attrib['type'] + predicate = predicate_uri(themrole_type) + + for frst in t.findall('SELRESTRS'): + for sec in frst.findall('SELRESTRS'): + for thrd in sec.findall('SELRESTRS'): + for type3 in thrd.findall('SELRESTR'): + type3_uri = URIRef('<'+ go1 + type3.attrib['type'] +'>') + if type3.attrib.get('Value') == '-': + nottype3 = nottype(type3_uri) + selrestr_elements.append(nottype3) + else: + selrestr_elements.append(type3_uri) + if thrd.findall('SELRESTR'): + SELRESTR_element = thrd.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if thrd.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type3 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type3) + elif thrd.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not thrd.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type3 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type3) + for type2 in sec.findall('SELRESTR'): + type2_uri = URIRef('<'+ go1 + type2.attrib['type'] +'>') + if type2.attrib.get('Value') == '-': + nottype2 = nottype(type2_uri) + selrestr_elements.append(nottype2) + else: + selrestr_elements.append(type2_uri) + if sec.findall('SELRESTR'): + SELRESTR_element = sec.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if sec.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type2 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type2) + elif sec.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not sec.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type2 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type2) + for type1 in frst.findall('SELRESTR'): + type1_uri = URIRef('<'+ go1 + type1.attrib['type'] +'>') + if type1.attrib.get('Value') == '-': + nottype1 = nottype(type1_uri) + selrestr_elements.append(nottype1) + else: + selrestr_elements.append(type1_uri) + if frst.findall('SELRESTR'): + SELRESTR_element = frst.find('SELRESTR') + SELRESTR_count = len(list(SELRESTR_element)) + if frst.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + or_type1 = OR(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(or_type1) + elif frst.text is None: + pass + elif SELRESTR_count == 1: + pass + elif not frst.attrib.get('logic') == "or": + if len(selrestr_elements) >= 2: + AND_type1 = AND(selrestr_elements[-1],selrestr_elements[-2]) + selrestr_elements.append(AND_type1) + if len(selrestr_elements) >= 1: + if frst.text is None: + pass + else: + final2 = only(id_uri,predicate,selrestr_elements[-1]) + final_output = final_output + final2 + # print(final_output) + return final_output + + + + +def predicate_uri (themrole): + go3 = "" + return predicate + +def nottype(type): + notType = f"""[ rdf:type owl:Class ; owl:complementOf {type} ]""" + return notType + +#check +def only(id, predicate, rest): + only = f"""{id} rdfs:subClassOf [ rdf:type owl:Restriction ; + owl:onProperty {predicate} ; + owl:allValuesFrom {rest} + ].\n""" + return only +def AND (type1, type2): + ANDrest = f"""[owl:intersectionOf ( {type1} + {type2} + ) ; + rdf:type owl:Class]""" + return ANDrest +def OR (type1, type2): + ORrest = f"""[ rdf:type owl:Class ; + owl:unionOf ( {type1} + {type2} + )] +""" + return ORrest + +if __name__ == "__main__": + xml_directory = "XMLs/" + for filename in os.listdir(xml_directory): + xml_file_path = os.path.join(xml_directory, filename) + x = parse_and_generate_OWL(xml_file_path) + print(x) \ No newline at end of file diff --git a/VerbNet/VerbNet2OWL/adding to ttl.py b/VerbNet/VerbNet2OWL/adding to ttl.py new file mode 100644 index 0000000..5659e93 --- /dev/null +++ b/VerbNet/VerbNet2OWL/adding to ttl.py @@ -0,0 +1,39 @@ +import os +from Final1 import xmlclass_to_rdf +from Final2 import parse_and_generate_OWL + +ttl_directory = "TTLs" +xml_directory = "XMLs" #path + + +for filename in os.listdir(xml_directory): + if filename.endswith(".xml"): + xml_file_path = os.path.join(xml_directory, filename) + rdf_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl') + xmlclass_to_rdf(xml_file_path, rdf_output_path) + print (f"corresponding ttl from {filename} is created") +# for filename in os.listdir(ttl_directory): +# if filename.endswith(".ttl"): +# ttl_output_path = os.path.join(ttl_directory, filename) +# with open(ttl_output_path, 'r') as file: +# ttl = file.read() +# ttl.replace("@prefix go: .", "@prefix go: .") +# with open(ttl_output_path, 'w') as file: +# file.write(modified_content) + +for filename in os.listdir(xml_directory): + if filename.endswith(".xml"): + xml_file_path = os.path.join(xml_directory, filename) + ttl_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl') + # ttl_output_path = os.path.join(ttl_directory, filename + '.ttl') + ttl_added_string = parse_and_generate_OWL(xml_file_path) + + with open(ttl_output_path, 'r') as file: + content = file.read() + if ttl_added_string == "": + modified_content = content + "\n#This verb has no selectional restriction \n" + else: + modified_content = content + f"\n# Generated String:\n{ttl_added_string}\n" + with open(ttl_output_path, 'w') as file: + file.write(modified_content) + print (f"corresponding ttl from {filename} is modified") \ No newline at end of file