Upload files to "VerbNet/VerbNet2OWL"

This commit is contained in:
mmpouya 2025-01-08 10:55:17 +00:00
parent 26ddc15841
commit b0582335a5
3 changed files with 369 additions and 0 deletions

View File

@ -0,0 +1,97 @@
import os
import xml.etree.ElementTree as ET
from rdflib import Graph, Namespace, URIRef , RDFS , RDF , OWL
ttl_directory = "TTLs"
xml_directory = "XMLs" #path
def xmlclass_to_rdf(xml_file_path, rdf_output_path):
tree = ET.parse(xml_file_path)
root = tree.getroot()
g = Graph(base="http://tavasi.majles.ir/ontology/general/Verbnet")
go = Namespace("http://tavasi.majles.ir/ontology/general#")
g.bind("go", go)
ontology_iri = URIRef("http://tavasi.majles.ir/ontology/general")
g.add((ontology_iri, RDF.type, OWL.Ontology))
for j in root.iter('VNCLASS'):
id = j.attrib['ID']
id_uri = URIRef(go[id])
MEMBERS = j.find('MEMBERS')
if MEMBERS is None:
pass
else:
for i in j.find('MEMBERS').findall('MEMBER'):
name = i.attrib['name']
name2 = str(name)
name2.replace(" ", "_")
name_uri = URIRef(go[name2])
g.add((name_uri, RDFS.subClassOf, id_uri))
for p in root.iter('SUBCLASSES'):
for k in p.iter('VNSUBCLASS'):
id2 = k.attrib['ID']
id2_uri = URIRef(go[id2])
MEMBERS = k.find('MEMBERS')
if MEMBERS is None:
pass
for i in k.find('MEMBERS').findall('MEMBER'):
name = i.attrib['name']
name = str(name)
name.replace(" ","_")
name_uri = URIRef(go[name])
g.add((name_uri, RDFS.subClassOf, id2_uri))
for a in root.iter('VNCLASS'):
id = a.attrib['ID']
id_uri = URIRef(go[id])
SUBCLASSES = a.find('SUBCLASSES')
if SUBCLASSES is None:
pass
else:
for p in a.find('SUBCLASSES').findall('VNSUBCLASS'):
id2 = p.attrib['ID']
id2_uri = URIRef(go[id2])
g.add((id2_uri, RDFS.subClassOf, id_uri))
SUBCLASSES = p.find('SUBCLASSES')
if SUBCLASSES is None:
pass
else:
for f in p.find('SUBCLASSES').findall('VNSUBCLASS'):
id3 = f.attrib['ID']
id3_uri = URIRef(go[id3])
g.add((id3_uri, RDFS.subClassOf, id2_uri))
SUBCLASSES = f.find('SUBCLASSES')
if SUBCLASSES is None:
pass
else:
for m in f.find('SUBCLASSES').findall('VNSUBCLASS'):
id4 = m.attrib['ID']
id4_uri = URIRef(go[id4])
g.add((id4_uri, RDFS.subClassOf, id3_uri))
for m in root.iter('THEMROLE'):
type = m.attrib['type']
has = "has"
type = has + type
type_uri = URIRef(go[type])
g.add((id_uri, RDFS.subClassOf, type_uri))
g.serialize(destination=rdf_output_path, format='turtle')
# for filename in os.listdir(xml_directory):
# if filename.endswith(".xml"):
# xml_file_path = os.path.join(xml_directory, filename)
# rdf_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl')
# xmlclass_to_rdf(xml_file_path, rdf_output_path)
if __name__ == "__main__":
xml_directory = "XMLs/appear-48.1.1.xml"
ttl_directory = "TTLs/appear-48.1.1.ttl"
xmlclass_to_rdf(xml_directory,ttl_directory)

View File

@ -0,0 +1,233 @@
import os
import xml.etree.ElementTree as ET
from rdflib import Namespace, URIRef
def parse_and_generate_OWL(xml_file_path):
go1 = Namespace("http://tavasi.majles.ir/ontology/general#")
final_output = ""
tree = ET.parse(xml_file_path)
root = tree.getroot()
for j in root.iter('VNCLASS'):
id = j.attrib['ID']
id_uri = URIRef('<'+ go1 + id +'>')
selrestr_elements = []
themroles = j.find('THEMROLES')
if themroles is None:
continue
for t in themroles.findall('THEMROLE'):
themrole_type = t.attrib['type']
predicate = predicate_uri(themrole_type)
for frst in t.findall('SELRESTRS'):
for sec in frst.findall('SELRESTRS'):
for thrd in sec.findall('SELRESTRS'):
for type3 in thrd.findall('SELRESTR'):
type3_uri = URIRef('<'+ go1 + type3.attrib['type'] +'>')
if type3.attrib.get('Value') == '-':
nottype3 = nottype(type3_uri)
selrestr_elements.append(nottype3)
else:
selrestr_elements.append(type3_uri)
if thrd.findall('SELRESTR'):
SELRESTR_element = thrd.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if thrd.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type3 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type3)
elif thrd.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not thrd.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type3 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type3)
for type2 in sec.findall('SELRESTR'):
type2_uri = URIRef('<'+ go1 + type2.attrib['type'] +'>')
if type2.attrib.get('Value') == '-':
nottype2 = nottype(type2_uri)
selrestr_elements.append(nottype2)
else:
selrestr_elements.append(type2_uri)
if sec.findall('SELRESTR'):
SELRESTR_element = sec.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if sec.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type2 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type2)
elif sec.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not sec.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type2 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type2)
for type1 in frst.findall('SELRESTR'):
type1_uri = URIRef('<'+ go1 + type1.attrib['type'] +'>')
if type1.attrib.get('Value') == '-':
nottype1 = nottype(type1_uri)
selrestr_elements.append(nottype1)
else:
selrestr_elements.append(type1_uri)
if frst.findall('SELRESTR'):
SELRESTR_element = frst.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if frst.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type1 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type1)
elif frst.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not frst.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type1 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type1)
if len(selrestr_elements) >= 1:
if frst.text is None:
pass
else:
final1 = only(id_uri,predicate,selrestr_elements[-1])
# print(final_output)
final_output = final_output + final1
#-------------------------------------------------------------------
for j in root.iter('VNSUBCLASS'):
id = j.attrib['ID']
id_uri = URIRef('<'+ go1 + id +'>')
selrestr_elements = []
themroles = j.find('THEMROLES')
if themroles is None:
continue
for t in themroles.findall('THEMROLE'):
themrole_type = t.attrib['type']
predicate = predicate_uri(themrole_type)
for frst in t.findall('SELRESTRS'):
for sec in frst.findall('SELRESTRS'):
for thrd in sec.findall('SELRESTRS'):
for type3 in thrd.findall('SELRESTR'):
type3_uri = URIRef('<'+ go1 + type3.attrib['type'] +'>')
if type3.attrib.get('Value') == '-':
nottype3 = nottype(type3_uri)
selrestr_elements.append(nottype3)
else:
selrestr_elements.append(type3_uri)
if thrd.findall('SELRESTR'):
SELRESTR_element = thrd.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if thrd.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type3 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type3)
elif thrd.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not thrd.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type3 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type3)
for type2 in sec.findall('SELRESTR'):
type2_uri = URIRef('<'+ go1 + type2.attrib['type'] +'>')
if type2.attrib.get('Value') == '-':
nottype2 = nottype(type2_uri)
selrestr_elements.append(nottype2)
else:
selrestr_elements.append(type2_uri)
if sec.findall('SELRESTR'):
SELRESTR_element = sec.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if sec.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type2 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type2)
elif sec.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not sec.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type2 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type2)
for type1 in frst.findall('SELRESTR'):
type1_uri = URIRef('<'+ go1 + type1.attrib['type'] +'>')
if type1.attrib.get('Value') == '-':
nottype1 = nottype(type1_uri)
selrestr_elements.append(nottype1)
else:
selrestr_elements.append(type1_uri)
if frst.findall('SELRESTR'):
SELRESTR_element = frst.find('SELRESTR')
SELRESTR_count = len(list(SELRESTR_element))
if frst.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
or_type1 = OR(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(or_type1)
elif frst.text is None:
pass
elif SELRESTR_count == 1:
pass
elif not frst.attrib.get('logic') == "or":
if len(selrestr_elements) >= 2:
AND_type1 = AND(selrestr_elements[-1],selrestr_elements[-2])
selrestr_elements.append(AND_type1)
if len(selrestr_elements) >= 1:
if frst.text is None:
pass
else:
final2 = only(id_uri,predicate,selrestr_elements[-1])
final_output = final_output + final2
# print(final_output)
return final_output
def predicate_uri (themrole):
go3 = "<http://tavasi.majles.ir/ontology/general#EventHas"
predicate = go3 + themrole + ">"
return predicate
def nottype(type):
notType = f"""[ rdf:type owl:Class ; owl:complementOf {type} ]"""
return notType
#check
def only(id, predicate, rest):
only = f"""{id} rdfs:subClassOf [ rdf:type owl:Restriction ;
owl:onProperty {predicate} ;
owl:allValuesFrom {rest}
].\n"""
return only
def AND (type1, type2):
ANDrest = f"""[owl:intersectionOf ( {type1}
{type2}
) ;
rdf:type owl:Class]"""
return ANDrest
def OR (type1, type2):
ORrest = f"""[ rdf:type owl:Class ;
owl:unionOf ( {type1}
{type2}
)]
"""
return ORrest
if __name__ == "__main__":
xml_directory = "XMLs/"
for filename in os.listdir(xml_directory):
xml_file_path = os.path.join(xml_directory, filename)
x = parse_and_generate_OWL(xml_file_path)
print(x)

View File

@ -0,0 +1,39 @@
import os
from Final1 import xmlclass_to_rdf
from Final2 import parse_and_generate_OWL
ttl_directory = "TTLs"
xml_directory = "XMLs" #path
for filename in os.listdir(xml_directory):
if filename.endswith(".xml"):
xml_file_path = os.path.join(xml_directory, filename)
rdf_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl')
xmlclass_to_rdf(xml_file_path, rdf_output_path)
print (f"corresponding ttl from {filename} is created")
# for filename in os.listdir(ttl_directory):
# if filename.endswith(".ttl"):
# ttl_output_path = os.path.join(ttl_directory, filename)
# with open(ttl_output_path, 'r') as file:
# ttl = file.read()
# ttl.replace("@prefix go: <http://tavasi.majles.ir/ontology/general/> .", "@prefix go: <http://tavasi.majles.ir/ontology/general/#> .")
# with open(ttl_output_path, 'w') as file:
# file.write(modified_content)
for filename in os.listdir(xml_directory):
if filename.endswith(".xml"):
xml_file_path = os.path.join(xml_directory, filename)
ttl_output_path = os.path.join(ttl_directory, os.path.splitext(filename)[0] + '.ttl')
# ttl_output_path = os.path.join(ttl_directory, filename + '.ttl')
ttl_added_string = parse_and_generate_OWL(xml_file_path)
with open(ttl_output_path, 'r') as file:
content = file.read()
if ttl_added_string == "":
modified_content = content + "\n#This verb has no selectional restriction \n"
else:
modified_content = content + f"\n# Generated String:\n{ttl_added_string}\n"
with open(ttl_output_path, 'w') as file:
file.write(modified_content)
print (f"corresponding ttl from {filename} is modified")