generated from greenelab/lab-website-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoogle-scholar.py
61 lines (49 loc) · 1.68 KB
/
google-scholar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
from serpapi import GoogleSearch
from util import *
def main(entry):
"""
receives single list entry from google-scholar data file
returns list of sources to cite
"""
# get api key (serp api key to access google scholar)
api_key = os.environ.get("GOOGLE_SCHOLAR_API_KEY", "")
if not api_key:
raise Exception('No "GOOGLE_SCHOLAR_API_KEY" env var')
# serp api properties
params = {
"engine": "google_scholar_author",
"api_key": api_key,
"num": 100, # max allowed
}
# get id from entry
_id = get_safe(entry, "gsid", "")
if not _id:
raise Exception('No "gsid" key')
# query api
@log_cache
@cache.memoize(name=__file__, expire=1 * (60 * 60 * 24))
def query(_id):
params["author_id"] = _id
return get_safe(GoogleSearch(params).get_dict(), "articles", [])
response = query(_id)
# list of sources to return
sources = []
# go through response and format sources
for work in response:
# create source
year = get_safe(work, "year", "")
source = {
"id": get_safe(work, "citation_id", ""),
# api does not provide Manubot-citeable id, so keep citation details
"title": get_safe(work, "title", ""),
"authors": list(map(str.strip, get_safe(work, "authors", "").split(","))),
"publisher": get_safe(work, "publication", ""),
"date": (year + "-01-01") if year else "",
"link": get_safe(work, "link", ""),
}
# copy fields from entry to source
source.update(entry)
# add source to list
sources.append(source)
return sources