I’m working on a project about language treatment, my task now is to parse the xml file to return an annotation. i will start by giving annotation.py :
import xml.etree.ElementTree as ET
import json
"""
Classe entité défini par un nom et par un type d'entité tout deux en chaine de caracteres
"""
class Entity:
def __init__(self, entity_id, name, entity_type):
self.id = entity_id
self.name = name
self.type = entity_type
def __eq__(self, other):
return (self.name == other.name and
self.type == other.type)
def __hash__(self):
return hash((self.name, self.type))
class Relation:
def __init__(self, e1, e2, relation_type, directed=False):
self.entity1 = e1
self.entity2 = e2
self.type = relation_type
self.directed = directed
def __eq__(self, other):
return (self.entity1 == other.entity1 and
self.entity2 == other.entity2 and
self.type == other.type and
self.directed == other.directed)
def __hash__(self):
return hash((self.entity1, self.entity2, self.type, self.directed))
class Event:
def __init__(self, trigger, event_type):
self.trigger = trigger
self.type = event_type
self.arguments = []
def add_argument(self, role, entity):
self.arguments.append((role, entity))
def __eq__(self, other):
return self.trigger == other.trigger and self.type == other.type
def __hash__(self):
return hash((self.trigger, self.type))
class Annotation:
def __init__(self):
self.phrase="Six Palestinians and an Israeli border policeman were involved in an incident, as reported by Palestinian security officials."
self.entities = []
self.relations = []
self.events = []
def add_entity(self, entity_id, mention, entity_type):
entity = Entity(entity_id, mention, entity_type)
self.entities.append(entity)
return entity
def add_relation(self, entity1, entity2, relation_type, directed=False):
relation = Relation(entity1, entity2, relation_type, directed)
self.relations.append(relation)
return relation
def add_event(self, trigger, event_type):
event = Event(trigger, event_type)
self.events.append(event)
return event
Now i need to write parse_xml , i did , but this one is not recognizing relations and events :
def parse_xml(f):
tree = ET.parse(f)
root = tree.getroot()
annotation = Annotation()
# Parse entities
for composite_entity_elem in root.findall('.//composite_entity'):
for entity_elem in composite_entity_elem.findall('entity'):
entity_id = entity_elem.get('ID')
entity_type = entity_elem.get('TYPE')
for mention_elem in entity_elem.findall('entity_mention'):
mention_id = mention_elem.get('ID')
name = mention_elem.find('./extent/charseq').text
annotation.add_entity(mention_id, name, entity_type)
# Parse relations (if any)
for relation_elem in root.findall('.//relation'):
relation_type_elem = relation_elem.find('type')
if relation_type_elem is None:
print(f"Warning: relation element missing 'type': {ET.tostring(relation_elem, encoding='unicode')}")
continue
relation_type = relation_type_elem.text
directed = relation_elem.get('DIRECTED') == 'true'
entity1_id_elem = relation_elem.find('arg1/entity')
entity2_id_elem = relation_elem.find('arg2/entity')
if entity1_id_elem is None or entity2_id_elem is None:
print(f"Warning: relation element missing 'arg1/entity' or 'arg2/entity': {ET.tostring(relation_elem, encoding='unicode')}")
continue
entity1_id = entity1_id_elem.get('ID')
entity2_id = entity2_id_elem.get('ID')
entity1 = next((e for e in annotation.entities if e.id == entity1_id), None)
entity2 = next((e for e in annotation.entities if e.id == entity2_id), None)
if entity1 and entity2:
annotation.add_relation(entity1, entity2, relation_type, directed)
# Parse events (if any)
for event_elem in root.findall('.//event'):
trigger_elem = event_elem.find('trigger')
if trigger_elem is None:
print(f"Warning: event element missing 'trigger': {ET.tostring(event_elem, encoding='unicode')}")
continue
trigger_id = trigger_elem.get('ID')
trigger_value = trigger_elem.find('value')
trigger_type = trigger_elem.find('type')
if trigger_value is None or trigger_type is None:
print(f"Warning: trigger element missing 'value' or 'type': {ET.tostring(trigger_elem, encoding='unicode')}")
continue
trigger = annotation.add_entity(trigger_id, trigger_value.text, trigger_type.text)
event_type_elem = event_elem.find('type')
if event_type_elem is None:
print(f"Warning: event element missing 'type': {ET.tostring(event_elem, encoding='unicode')}")
continue
event_type = event_type_elem.text
event = annotation.add_event(trigger, event_type)
for argument_elem in event_elem.findall('argument'):
role = argument_elem.get('ROLE')
argument_id = argument_elem.get('ENTITY')
argument = next((e for e in annotation.entities if e.id == argument_id), None)
if argument:
annotation.add_event_argument(event, role, argument)
return annotation
after debugging on my terminal, i noticed that it is not recognizing relation_type_elem and that its value is None. I didn’t check for events but i’m pretty sure it’s the same issue,i will end this by giving a snippet of the relation in my xml file :
<relation ID="APW20001007.1745.0371-R1" TYPE="PHYS" SUBTYPE="Part-Whole">
<relation_mention ID="1-1" LEXICALCONDITION="Formulaic">
<extent>
<charseq START="120" END="136">NABLUS, West Bank</charseq>
</extent>
<relation_mention_argument ENTITYID="APW20001007.1745.0371-E23" ENTITYMENTIONID="23-1" ARGNUM="1">
<extent>
<charseq START="120" END="136">NABLUS, West Bank</charseq>
</extent>
</relation_mention_argument>
<relation_mention_argument ENTITYID="APW20001007.1745.0371-E61" ENTITYMENTIONID="61-82" ARGNUM="2">
<extent>
<charseq START="128" END="136">West Bank</charseq>
</extent>
</relation_mention_argument>
</relation_mention>
</relation>
``` can someone please help? thank you.