1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#!/usr/bin/env python3
#
# check_patches_in_archives.py
#
# Download and check attachments in the archives, to see if they are
# actually patches. We do this asynchronously in a separate script
# so we don't block the archives unnecessarily.
#
import os
import sys
import requests
import magic
import logging
# Set up for accessing django
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), '../../'))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pgcommitfest.settings")
import django
django.setup()
from django.db import connection
from django.conf import settings
from pgcommitfest.commitfest.models import MailThreadAttachment
if __name__ == "__main__":
debug = "--debug" in sys.argv
# Logging always done to stdout, but we can turn on/off how much
logging.basicConfig(format='%(asctime)s %(levelname)s: %(msg)s',
level=debug and logging.DEBUG or logging.INFO,
stream=sys.stdout)
mag = magic.open(magic.MIME)
mag.load()
logging.debug("Updating attachment metadata from archives")
# Try to fetch/scan all attachments that haven't already been scanned.
# If they have already been scanned, we don't bother.
# We will hit the archives without delay when doing this, but that
# should generally not be a problem because it's not going to be
# downloading a lot...
for a in MailThreadAttachment.objects.filter(ispatch__isnull=True):
url = "/message-id/attachment/%s/attach" % a.attachmentid
logging.debug("Checking attachment %s" % a.attachmentid)
resp = requests.get(
"http{0}://{1}:{2}{3}".format(settings.ARCHIVES_PORT == 443 and 's' or '',
settings.ARCHIVES_SERVER,
settings.ARCHIVES_PORT,
url),
headers={
'Host': settings.ARCHIVES_HOST,
},
timeout=settings.ARCHIVES_TIMEOUT,
)
if resp.status_code != 200:
logging.error("Failed to get %s: %s" % (url, resp.status_code))
continue
# Attempt to identify the file using magic information
mtype = mag.buffer(resp.content)
logging.debug("Detected MIME type is %s" % mtype)
# We don't support gzipped or tar:ed patches or anything like
# that at this point - just plain patches.
if mtype.startswith('text/x-diff'):
a.ispatch = True
else:
a.ispatch = False
logging.info("Attachment %s is patch: %s" % (a.id, a.ispatch))
a.save()
connection.close()
logging.debug("Done.")
|