forked from Sefaria/Sefaria-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop_texts.js
141 lines (109 loc) · 3.6 KB
/
top_texts.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Generate scores for top text sections (Chapters, Dafs etc).
var db = connect("localhost:27017/sefaria")
// These values may be set form the command line (using --eval)
// to authenticate.
if (!(typeof user === 'undefined'|| typeof password === 'undefined')) {
db.auth(user, password);
}
now = new Date();
print("Calculating top texts at " + now);
// ----------- Count Top Chapters by # of Connections ------------
var mapper = function () {
var countText = function(ref) {
var key = ref.substr(0, ref.lastIndexOf(":"))
var value = {
ref: ref,
count: 1
};
emit( key, value );
}
countText(this.refs[0]);
countText(this.refs[1]);
};
var reducer = function(key, values) {
var reducedText = {
ref: key,
count:0
};
values.forEach( function(value) {
reducedText.count += value.count;
});
return reducedText;
};
db.links.mapReduce(mapper, reducer, {out: "texts_by_connections"});
// ----------- Count Top Chapters by # of Disctinct Connections ------------
var mapper = function () {
var countLink = function(base, ref) {
var key = base.indexOf(":") > 0 ? base.substr(0, base.lastIndexOf(":")) : base;
var link = ref.indexOf(" ") > 0 ? ref.substr(0, ref.lastIndexOf(" ")) : ref;
var value = {
link: link,
count: 1
};
emit( key, value );
}
countLink(this.refs[0], this.refs[1]);
countLink(this.refs[1], this.refs[0]);
};
var reducer = function(key, values) {
links = {};
var reducedText = {
ref: key,
count: 0
};
values.forEach( function(value) {
if (!(value.link in links)) {
links[value.link] = 1;
reducedText.count += value.count;
} else {
links[value.link] += 1;
}
});
return reducedText;
};
db.links.mapReduce(mapper, reducer, {out: "texts_by_distinct_connections"});
// ------------- Count Top Chapters by # Connections X # Distinct Connections -----------
var texts = db.texts_by_connections.find();
texts.forEach(function(t) {
dt = db.texts_by_distinct_connections.findOne({_id: t._id});
if (dt) {
count = {
_id: t._id,
count: t.value.count * dt.value.count
}
db.texts_by_multiplied_connections.save(count);
}
});
// ----------- Count Top Chapters by Activity ---------------
var mapper = function () {
var count = function(ref, points) {
if (!ref) { return; }
var key = ref.indexOf(":") > 0 ? ref.substr(0, ref.lastIndexOf(":")) : ref;
emit( key, points );
};
if (this.rev_type == "add link") {
count(this.new.refs[0], 1);
count(this.new.refs[1], 1);
} else if (this.rev_type == "add text") {
var p = this.language == "en" ? this.versionTitle === "Sefaria Community Translation" ? 8 : 2 : 1;
count(this.ref, p);
} else if (this.rev_type == "edit text") {
count(this.ref, 1);
}
};
var reducer = function(key, values) {
var points = 0;
values.forEach(function(value) {
points += value;
});
return points;
};
var countActivity = function(days) {
var date = new Date();
date.setDate(date.getDate() - days );
db.history.mapReduce(mapper, reducer, {out: "texts_by_activity_" + days,
query: {date: {$gt: date}}});
};
countActivity(7);
countActivity(14);
countActivity(30);