diff --git a/udapi/block/corefud/delete.py b/udapi/block/corefud/delete.py index 4e68e8dd..5aaf94e7 100644 --- a/udapi/block/corefud/delete.py +++ b/udapi/block/corefud/delete.py @@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None): proc_node, path = stack.pop() # root is reachable if proc_node == node.root: - break + return True # path forms a cycle, the root cannot be reached through this branch - if proc_node in path: - continue - for dep in proc_node.deps: - # the root cannot be reached through ignored nodes - if dep['parent'] in parents_to_ignore: - continue - # process the parent recursively - stack.append((dep['parent'], path + [proc_node])) - else: - return False - return True + if proc_node not in path: + for dep in proc_node.deps: + # the root cannot be reached through ignored nodes + if dep['parent'] not in parents_to_ignore: + # process the parent recursively + stack.append((dep['parent'], path + [proc_node])) + return False def _deps_ignore_nodes(self, node, parents_to_ignore): """ Retrieve deps from the node, recursively ignoring specified parents. @@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore): stack = [(node, [])] while stack: proc_node, skipped_nodes = stack.pop() - # if there is a cycle of skipped nodes, ground the subtree to the root - if proc_node in skipped_nodes: - newdeps.append({'parent': node.root, 'deprel': 'root'}) - continue - for dep in proc_node.deps: - # keep deps with a parent that shouldn't be ignored - if not dep['parent'] in parents_to_ignore: - newdeps.append(dep) - continue - # process the ignored parent recursively - stack.append((dep['parent'], skipped_nodes + [proc_node])) - return newdeps + if proc_node not in skipped_nodes: + for dep in proc_node.deps: + if dep['parent'] in parents_to_ignore: + # process the ignored parent recursively + stack.append((dep['parent'], skipped_nodes + [proc_node])) + else: + # keep deps with a parent that shouldn't be ignored + newdeps.append(dep) + # If no newdeps were found (because of a cycle), return the root. + return newdeps if newdeps else [{'parent': node.root, 'deprel': 'root'}] def process_document(self, doc): # This block should work both with coreference loaded (deserialized) and not. @@ -67,17 +61,14 @@ def process_document(self, doc): if self.empty: for node in root.descendants: # process only the nodes dependent on empty nodes - if not '.' in node.raw_deps: - continue - # just remove empty parents if the root remains reachable - if self.is_root_reachable_by_deps(node, root.empty_nodes): - node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] - # otherwise propagate to non-empty ancestors - else: - newdeps = self._deps_ignore_nodes(node, root.empty_nodes) - newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps)) - node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted) - + if '.' in node.raw_deps: + # just remove empty parents if the root remains reachable + if self.is_root_reachable_by_deps(node, root.empty_nodes): + node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] + # otherwise propagate to non-empty ancestors + else: + node.deps = self._deps_ignore_nodes(node, root.empty_nodes) + # This needs to be done even if '.' not in node.raw_deps. if '.' in node.misc['Functor'].split(':')[0]: del node.misc['Functor'] root.empty_nodes = [] diff --git a/udapi/core/node.py b/udapi/core/node.py index 0e746c60..863ed90f 100644 --- a/udapi/core/node.py +++ b/udapi/core/node.py @@ -252,7 +252,7 @@ def raw_deps(self): #if self._raw_deps is not None: # return self._raw_deps if self._deps: - self._raw_deps = '|'.join(f"{dep['parent']._ord}:{dep['deprel']}" for dep in self._deps) + self._raw_deps = '|'.join(f"{p}:{r}" for p, r in sorted(set((d['parent'].ord, d['deprel']) for d in self._deps))) return self._raw_deps @raw_deps.setter diff --git a/udapi/core/tests/test_enhdeps.py b/udapi/core/tests/test_enhdeps.py index 3f473bf3..53a74389 100644 --- a/udapi/core/tests/test_enhdeps.py +++ b/udapi/core/tests/test_enhdeps.py @@ -57,7 +57,7 @@ def test_create_deps2empty(self): e.deps.append({'parent': h, 'deprel':'dep:e2h'}) d.deps.append({'parent': e, 'deprel': 'dep:d2e'}) self.assertEqual("2:dep:e2h", e.raw_deps, ) - self.assertEqual("5:conj|3.1:dep:d2e", d.raw_deps) + self.assertEqual("3.1:dep:d2e|5:conj", d.raw_deps) self.assertEqual(self.tree.descendants_and_empty, self.nodes[:3] + [e] + self.nodes[3:]) diff --git a/udapi/core/tests/test_node.py b/udapi/core/tests/test_node.py index 83348c67..f2b64a3d 100755 --- a/udapi/core/tests/test_node.py +++ b/udapi/core/tests/test_node.py @@ -245,5 +245,25 @@ def test_empty_nodes(self): self.assertEqual(root.descendants_and_empty, [e1, e2, e3, e4, e6, e7]) self.assertEqual([n.ord for n in root.descendants_and_empty], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + def test_enh_deps_and_reordering(self): + """Test reordering of node ord in enhanced deps when reorderin/removing nodes.""" + root = Root() + for i in range(3): + root.create_child(form=f'node{i+1}') + + n1, n2, n3 = root.descendants() + n1.raw_deps = '2:nsubj|3:obj' + self.assertEqual(n1.raw_deps, '2:nsubj|3:obj') + self.assertEqual(n1.deps, [{'parent': n2, 'deprel': 'nsubj'}, {'parent': n3, 'deprel': 'obj'}]) + n2.shift_after_node(n3) + self.assertEqual(n1.raw_deps, '2:obj|3:nsubj') + # TODO only node.raw_deps are currently guaranteed to return the deps sorted, not node.deps + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}, {'parent': n2, 'deprel': 'nsubj'}]) + # TODO: after removing a node, all deps should be updated + #n2.remove() + #self.assertEqual(n1.raw_deps, '2:nsubj') + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}]) + + if __name__ == "__main__": unittest.main()