forked from Sefaria/Sefaria-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfix_jagged_arrays.py
30 lines (25 loc) · 1.12 KB
/
fix_jagged_arrays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-
import argparse
import re
from sefaria.model import *
from sefaria.datatype.jagged_array import JaggedTextArray, JaggedArray
from sefaria.system.exceptions import BookNameError
all_versions = VersionSet()
#all_library_nodes = library.get_content_nodes(with_commentary=True)
for version in all_versions:
print "{}: {}".format(version.title.encode('utf-8'), version.versionTitle.encode('utf-8'))
version_altered = False
try:
idx = version.get_index()
content_nodes = idx.nodes.get_leaf_nodes()
for node in content_nodes:
print node
ja_text = JaggedTextArray(version.content_node(node))
normalized = ja_text.normalize(terminal_depth=node.depth)
if normalized: #only set things that were changed.
version.sub_content(key_list=node.version_address(), value=ja_text.array())
version_altered = True
if version_altered: #only go through save if something actually changed
version.save()
except BookNameError as e:
print "no index for {}".format(version.title.encode('utf-8'))