-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathbundle.py
More file actions
127 lines (105 loc) · 4.35 KB
/
bundle.py
File metadata and controls
127 lines (105 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""Bundle class represents one sentence."""
import re
from udapi.core.root import Root
from udapi.block.write.textmodetrees import TextModeTrees
VALID_ZONE_REGEX = re.compile("^[a-z-]*(_[A-Za-z0-9-]+)?$")
class Bundle(object):
"""Bundle represents one sentence in an UD document.
A bundle contains one or more trees. More trees are needed e.g. in case of
parallel treebanks where each tree represents a translation of the sentence
in a different languages.
Trees in one bundle are distinguished by a zone label.
"""
__slots__ = ["trees", "number", "_bundle_id", "_document"]
def __init__(self, bundle_id=None, document=None):
self.trees = []
self._bundle_id = bundle_id
self._document = document
@property
def bundle_id(self):
"""ID of this bundle."""
return self._bundle_id
@bundle_id.setter
def bundle_id(self, bundle_id):
self._bundle_id = bundle_id
if len(self.trees) == 1 and self.trees[0].zone == '':
self.trees[0]._sent_id = bundle_id
else:
for tree in self.trees:
tree._sent_id = bundle_id + '/' + tree.zone # pylint: disable=protected-access
def __str__(self):
if self._bundle_id is None:
return 'bundle without id'
return f"bundle id='{self._bundle_id}'"
def __iter__(self):
return iter(self.trees)
@property
def document(self):
"""Returns the document in which the bundle is contained."""
return self._document
def get_tree(self, zone=''):
"""Returns the tree root whose zone is equal to zone."""
trees = [tree for tree in self.trees if tree.zone == zone]
if len(trees) == 1:
return trees[0]
elif len(trees) == 0:
raise Exception("No tree with zone=" + zone + " in the bundle")
else:
raise Exception("More than one tree with zone=" +
zone + " in the bundle")
def has_tree(self, zone=''):
"""Does this bundle contain a tree with a given zone?"""
for tree in self.trees:
if tree.zone == zone:
return True
return False
def create_tree(self, zone=None):
"""Return the root of a newly added tree with a given zone."""
root = Root()
root._zone = zone
self.add_tree(root)
return root
def check_zone(self, new_zone):
"""Raise an exception if the zone is invalid or already exists."""
if not VALID_ZONE_REGEX.match(new_zone):
raise ValueError("'{}' is not a valid zone name ({})".format(
new_zone, VALID_ZONE_REGEX.pattern))
if new_zone == 'all':
raise ValueError("'all' cannot be used as a zone name")
if new_zone in [x.zone for x in self.trees]:
raise Exception("Tree with zone '%s' already exists in %s" % (new_zone, self))
def add_tree(self, root):
"""Add an existing tree to the bundle."""
if root.zone is None:
root._zone = ''
self.check_zone(root.zone)
if self._bundle_id:
root._sent_id = self._bundle_id
if root.zone:
root._sent_id += '/' + root.zone
root.bundle = self
self.trees.append(root)
doc_json = root.json.get('__doc__')
if doc_json:
self._document.json.update(doc_json)
del root.json['__doc__']
return root
def remove(self):
"""Remove a bundle from the document."""
self._document.bundles = [bundle for bundle in self._document.bundles if bundle != self]
for i, bundle in enumerate(self._document.bundles):
bundle.number = i
def address(self):
"""Return bundle_id or '?' if missing."""
return self._bundle_id if self._bundle_id is not None else '?'
def draw(self, **kwargs):
"""Pretty print the trees using TextModeTrees."""
TextModeTrees(**kwargs).process_bundle(self)
@property
def nodes(self):
"""An iterator over all nodes (excluding empty nodes) in all trees in this bundle."""
for tree in self:
# tree.descendants is slightly slower than tree._descendants,
# but it seems safer, see the comment in udapi.core.block.Block.process_tree().
for node in tree.descendants:
yield node