summaryrefslogtreecommitdiff
path: root/tools/commitfest/check_patches_in_archives.py
blob: 148de2bfcdfe3e1e16db69de87089d5860d06810 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
#
# check_patches_in_archives.py
#
# Download and check attachments in the archives, to see if they are
# actually patches. We do this asynchronously in a separate script
# so we don't block the archives unnecessarily.
#

import os
import sys
import requests
import magic
import logging

# Set up for accessing django
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), '../../'))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pgcommitfest.settings")
import django
django.setup()

from django.db import connection
from django.conf import settings

from pgcommitfest.commitfest.models import MailThreadAttachment

if __name__ == "__main__":
    debug = "--debug" in sys.argv

    # Logging always done to stdout, but we can turn on/off how much
    logging.basicConfig(format='%(asctime)s %(levelname)s: %(msg)s',
                        level=debug and logging.DEBUG or logging.INFO,
                        stream=sys.stdout)

    mag = magic.open(magic.MIME)
    mag.load()

    logging.debug("Updating attachment metadata from archives")

    # Try to fetch/scan all attachments that haven't already been scanned.
    # If they have already been scanned, we don't bother.
    # We will hit the archives without delay when doing this, but that
    # should generally not be a problem because it's not going to be
    # downloading a lot...
    for a in MailThreadAttachment.objects.filter(ispatch__isnull=True):
        url = "/message-id/attachment/%s/attach" % a.attachmentid
        logging.debug("Checking attachment %s" % a.attachmentid)

        resp = requests.get(
            "http{0}://{1}:{2}{3}".format(settings.ARCHIVES_PORT == 443 and 's' or '',
                                          settings.ARCHIVES_SERVER,
                                          settings.ARCHIVES_PORT,
                                          url),
            headers={
                'Host': settings.ARCHIVES_HOST,
            },
            timeout=settings.ARCHIVES_TIMEOUT,
        )

        if resp.status_code != 200:
            logging.error("Failed to get %s: %s" % (url, resp.status_code))
            continue

        # Attempt to identify the file using magic information
        mtype = mag.buffer(resp.content)
        logging.debug("Detected MIME type is %s" % mtype)

        # We don't support gzipped or tar:ed patches or anything like
        # that at this point - just plain patches.
        if mtype.startswith('text/x-diff'):
            a.ispatch = True
        else:
            a.ispatch = False
        logging.info("Attachment %s is patch: %s" % (a.id, a.ispatch))
        a.save()

    connection.close()
    logging.debug("Done.")