forked from Sefaria/Sefaria-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_commentaries.py
106 lines (78 loc) · 3.49 KB
/
split_commentaries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
import argparse
from sefaria.model import *
from sefaria.datatype.jagged_array import JaggedTextArray, JaggedArray
from sefaria.utils.talmud import daf_to_section
import copy
def pad_moved_ja(ja, padding_values):
#pads the jagged array with the skeleton of the empty places up to this point in the ref
if len(padding_values) > 0:
padval = padding_values.pop(0)
if all(isinstance(y, list) for y in ja):
for _ in range(padval-1):
ja.insert(0, [])
pad_moved_ja(ja[padval-1], padding_values)
return ja
elif
#first text
#create needed index and versions
rashbam_bava_batra_he = Version({
"chapter" : Index().load({'title': 'Bava Batra'}).nodes.create_skeleton(),
"title" : 'Rashbam on Bava Batra',
"versionTitle" : "WikiSource",
"language" : "he",
"versionSource" : "https://he.wikisource.org/wiki/תלמוד_בבלי"
}).save()
whole_ref = Ref('Rashi on Bava Batra')
whole_moved_ref = Ref('Rashbam on Bava Batra')
stay_section_ref = Ref('Rashi on Bava Batra.2a.1.1-29a.9.1')
move_section_ref = Ref('Rashi on Bava Batra.29a.9.2-176b.4.2')
orig_tc = TextChunk(whole_ref, 'he', rashbam_bava_batra_he.versionTitle)
dest_tc = TextChunk(whole_moved_ref, 'he', rashbam_bava_batra_he.versionTitle)
#get the two slices of the whole text, corresponding to the new texts
jatext_tostay = JaggedTextArray(orig_tc.text).subarray_with_ref(stay_section_ref).array()
jatext_tomove = copy.deepcopy(JaggedTextArray(orig_tc.text).subarray_with_ref(move_section_ref).array())
#the piece of text being moved needs to be padded so that its overall structure matches the original structure
jatext_tostay = pad_moved_ja(jatext_tostay, stay_section_ref.sections)
jatext_tomove = pad_moved_ja(jatext_tomove, move_section_ref.sections)
orig_tc.text = jatext_tostay
orig_tc.save()
dest_tc.text = jatext_tomove
dest_tc.save()
r_gershom_index = Index({
"title":'Rabbeinu Gershom',
"heTitle" : u'רבינו גרשום',
"titleVariants":[],
"heTitleVariants":[],
"categories" : [
"Commentary",
"Rishonim"
],
"sectionNames":["",""],
"maps":[]
}).save()
r_gershom_makkot_he = Version({
"chapter": Index().load({'title': 'Makkot'}).nodes.create_skeleton(),
"status" : "locked",
"versionTitle" : "Vilna Edition",
"license" : "Public Domain",
"language" : "he",
"title" : "Rabbeinu Gershom on Makkot",
"versionSource" : "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957"
}).save()
whole_ref = Ref('Rashi on Makkot')
whole_moved_ref = Ref('Rabbeinu Gershom on Makkot')
stay_section_ref = Ref('Rashi on Makkot.2a.1.1-24a.34.2')
move_section_ref = Ref('Rashi on Makkot.24a.35.1-24b.16.1')
orig_tc = TextChunk(whole_ref, 'he', r_gershom_makkot_he.versionTitle)
dest_tc = TextChunk(whole_moved_ref, 'he', r_gershom_makkot_he.versionTitle)
#get the two slices of the whole text, corresponding to the new texts
jatext_tostay = JaggedTextArray(orig_tc.text).subarray_with_ref(stay_section_ref).array()
jatext_tomove = copy.deepcopy(JaggedTextArray(orig_tc.text).subarray_with_ref(move_section_ref).array())
#the piece of text being moved needs to be padded so that its overall structure matches the original structure
jatext_tostay = pad_moved_ja(jatext_tostay, stay_section_ref.sections)
jatext_tomove = pad_moved_ja(jatext_tomove, move_section_ref.sections)
orig_tc.text = jatext_tostay
orig_tc.save()
dest_tc.text = jatext_tomove
dest_tc.save()