Source code for kyoto_reader.coreference

import logging
from typing import Optional, Set

from .base_phrase import BasePhrase

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


[docs]class Mention(BasePhrase): """A class to represent a mention in coreference. Args: bp (BasePhrase): A base phrase object that corresponds to this mention. Attributes: eids (set): Entity IDs. eids_unc (set): Uncertain entity IDs. "Uncertain" means the mention is annotated with "≒". """
[docs] def __init__(self, bp: BasePhrase): super().__init__(bp.tag, bp.dmids[0], bp.dtid, bp.sid, bp.doc_id, parent=bp.parent, children=bp.children) self.eids: Set[int] = set() self.eids_unc: Set[int] = set()
@property def all_eids(self) -> Set[int]: """All entity IDs this mention refers to.""" return self.eids | self.eids_unc
[docs] def is_uncertain_to(self, entity: 'Entity') -> bool: """Whether this mention has uncertain relation with a specified entity.""" if entity.eid in self.eids: return False else: assert entity.eid in self.eids_unc return True
def __repr__(self) -> str: return f'Mention(bp: {repr(super())}, eids: {repr(self.eids)}, eids_unc: {repr(self.eids_unc)})' def __str__(self) -> str: return self.core def __eq__(self, other: 'Mention') -> bool: return self.dtid == other.dtid and self.sid == other.sid def __hash__(self) -> int: return hash((self.dtid, self.sid))
[docs]class Entity: """A class to represent an entity in coreference. This class manages entity IDs of mentions that refer to this entity. Args: eid (int): An Entity ID. exophor (str, optional): The kind of exophor if this entity corresponds to some exophor. Otherwise, None. Attributes: eid (int): An Entity ID. exophor (str, optional): A string to represent exophor, such as "著者", "読者", and "不特定:人". mentions (Set[Mention]): A set of mentions that refer to this entity. mentions_unc (Set[Mention]): Mentions that have uncertain relation with this entity. taigen (bool, optional): Whether this entity is 体言 or not. yougen (bool, optional): Whether this entity is 用言 or not. """
[docs] def __init__(self, eid: int, exophor: Optional[str] = None): self.eid: int = eid self.exophor: Optional[str] = exophor self.mentions: Set[Mention] = set() self.mentions_unc: Set[Mention] = set() self.taigen: Optional[bool] = None self.yougen: Optional[bool] = None
@property def is_special(self) -> bool: """Whether this entity corresponds to special entity, such as exophor.""" return self.exophor is not None @property def all_mentions(self) -> Set[Mention]: """All mentions that refer to this entity, including uncertain ones.""" return self.mentions | self.mentions_unc
[docs] def add_mention(self, mention: Mention, uncertain: bool) -> None: """Add a mention that refers to this entity. When a non-uncertain mention is added and the mention has already been registered as an uncertain mention, it will be overwritten as non-uncertain. Args: mention (Mention): A mention uncertain (bool): Whether the mention is uncertain (i.e., annotated with "≒"). """ if uncertain: if mention in self.all_mentions: return mention.eids_unc.add(self.eid) self.mentions_unc.add(mention) else: if mention in self.mentions_unc: self.remove_mention(mention) mention.eids.add(self.eid) self.mentions.add(mention) # 全ての mention の品詞が一致した場合のみ entity に品詞を設定 self.yougen = (self.yougen is not False) and ('用言' in mention.tag.features) self.taigen = (self.taigen is not False) and ('体言' in mention.tag.features)
[docs] def remove_mention(self, mention: Mention) -> None: """Remove a mention that is managed by this entity.""" if mention in self.mentions: self.mentions.remove(mention) mention.eids.remove(self.eid) if mention in self.mentions_unc: self.mentions_unc.remove(mention) mention.eids_unc.remove(self.eid)
def __str__(self) -> Optional[str]: if self.is_special: return self.exophor if self.mentions: return list(self.mentions)[0].__str__() elif self.mentions_unc: return list(self.mentions_unc)[0].__str__() else: return str(None)