Case From Email
Case From Email
Case From Email
config
import imaplib
import io
import json
import base64
import hashlib
import re
import email
import emoji
import urllib.parse
import traceback
import ioc_finder
import thehive4py.api, thehive4py.models, thehive4py.query
def connect_to_IMAP_server(wsl):
# Create the connection to the IMAP server using host and port
connection = imaplib.IMAP4_SSL(config['imapHost'], config['imapPort'])
# Log in using username and password
connection.login(config['imapUser'],config['imapPassword'])
log.info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
wsl.emit_info('Connected to email {0} server
{1}:{2}/{3}'.format(config['imapUser'], config['imapHost'], config['imapPort'],
config['imapFolder']))
return connection
# Use the ioc-finder module to extract observables from a string buffer and add to
the list only if they are not whitelisted
def search_observables(buffer, wsl):
observables = []
iocs = {}
iocs['email_addresses'] = ioc_finder.parse_email_addresses(buffer)
iocs['ipv4s'] = ioc_finder.parse_ipv4_addresses(buffer)
iocs['domains'] = ioc_finder.parse_domain_names(buffer)
# Option to parse URLs without a scheme (e.g. without https://)
iocs['urls'] = ioc_finder.parse_urls(buffer, parse_urls_without_scheme=False)
for mail in iocs['email_addresses']:
if is_whitelisted('mail', mail):
log.info("Skipped whitelisted observable mail: {0}".format(mail))
wsl.emit_info("Skipped whitelisted observable mail:
{0}".format(mail))
else:
log.info("Found observable mail: {0}".format(mail))
wsl.emit_info("Found observable mail: {0}".format(mail))
observables.append({'type': 'mail', 'value': mail})
for ip in iocs['ipv4s']:
if is_whitelisted('ip', ip):
log.info("Skipped whitelisted observable ip: {0}".format(ip))
wsl.emit_info("Skipped whitelisted observable ip:
{0}".format(ip))
else:
log.info("Found observable ip: {0}".format(ip))
wsl.emit_info("Found observable ip: {0}".format(ip))
observables.append({'type': 'ip', 'value': ip})
for domain in iocs['domains']:
if is_whitelisted('domain', domain):
log.info("Skipped whitelisted observable domain:
{0}".format(domain))
wsl.emit_info("Skipped whitelisted observable domain:
{0}".format(domain))
else:
log.info("Found observable domain: {0}".format(domain))
wsl.emit_info("Found observable domain: {0}".format(domain))
observables.append({'type': 'domain', 'value': domain})
for url in iocs['urls']:
if is_whitelisted('url', url):
log.info("Skipped whitelisted observable url: {0}".format(url))
wsl.emit_info("Skipped whitelisted observable url:
{0}".format(url))
else:
log.info("Found observable url: {0}".format(url))
wsl.emit_info("Found observable url: {0}".format(url))
observables.append({'type': 'url', 'value': url})
return observables
# Use the mail UID of the selected email to fetch only that email from the mailbox
def obtain_eml(connection, mail_uid, wsl):
# Read all the unseen emails from this folder
connection.select(config['imapFolder'])
typ, dat = connection.search(None, '(UNSEEN)')
# The dat[0] variable contains the IDs of all the unread emails
# The IDs are obtained by using the split function and the length of the
array is the number of unread emails
# If the selected mail uid is present in the list, then process only that
email
if mail_uid.encode() in dat[0].split():
typ, dat = connection.fetch(mail_uid.encode(), '(RFC822)')
if typ != 'OK':
log.error(dat[-1])
wsl.emit_error(dat[-1])
message = dat[0][1]
# The fetch operation flags the message as seen by default
log.info("Message {0} flagged as read".format(mail_uid))
wsl.emit_info("Message {0} flagged as read".format(mail_uid))
# Obtain the From field of the external email that will be used to send
the verdict to the user
msg = email.message_from_bytes(message)
decode = email.header.decode_header(msg['From'])[0]
if decode[1] is not None:
external_from_field = decode[0].decode(decode[1])
else:
external_from_field = str(decode[0])
parsed_from_field = email.utils.parseaddr(external_from_field)
if len(parsed_from_field) > 1:
external_from_field = parsed_from_field[1]
# Walk the multipart structure of the email (now only the EML part is
needed)
for part in msg.walk():
mimetype = part.get_content_type()
# If the content type of this part is the rfc822 message, then
stop because the EML attachment is the last part
# If there is any other part after the rfc822 part, then it may
be related to the internal email, so it must not be considered
# Both message/rfc822 and application/octet-stream types are
considered due to differences in how the attachment is handled by different mail
clients
if mimetype in ['application/octet-stream', 'message/rfc822']:
# Obtain the internal EML file in both cases
if mimetype == 'application/octet-stream':
eml_payload = part.get_payload(decode=1)
internal_msg = email.message_from_bytes(eml_payload)
elif mimetype == 'message/rfc822':
eml_payload = part.get_payload(decode=0)[0]
try:
internal_msg =
email.message_from_string(base64.b64decode(str(eml_payload)).decode())
except:
internal_msg = eml_payload
# If the EML attachment has been found, then break the for
break
else:
# Handle multiple analysts that select the same email from more than
one tab
log.error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
wsl.emit_error("The email with UID {} has already been analyzed. Please
refresh the page and retry.".format(mail_uid))
return
decoded_elements_subj.append(decode_elem[0].decode(decode_elem[1]))
else:
if(isinstance(decode_elem[0], str)):
decoded_elements_subj.append(str(decode_elem[0]))
else:
decoded_elements_subj.append(decode_elem[0].decode())
subject_field = ''.join(decoded_elements_subj)
# Search the observables in the values of all the selected header fields
# Since a field may appear more than one time (e.g. Received:), the lists
need to be initialized and then extended
i = 0
while i < len(header_fields.keys()):
if header_fields.keys()[i] in header_fields_list:
if not observables_header.get(header_fields.keys()[i]):
observables_header[header_fields.keys()[i]] = []
observables_header[header_fields.keys()
[i]].extend(search_observables(header_fields.values()[i], wsl))
i+=1
# Create a tuple containing the eml file and the name it should have as an
observable
filename = subject_field + ".eml"
inmem_file = io.BytesIO()
gen = email.generator.BytesGenerator(inmem_file)
gen.flatten(internal_msg)
eml_file_tuple = (inmem_file, filename)
# Workaround to prevent HTML tags to appear inside the URLs (splits on < or
>)
for observable_body in observables_body:
if observable_body['type'] == "url":
observable_body['value'] = observable_body['value'].replace(">",
"<").split("<")[0]
# Add attachments
for attachment in attachments:
observable = thehive4py.models.CaseObservable(
dataType='file',
data = attachment,
ioc = False,
tags = ['email', 'email_attachment'],
message = 'Found as email attachment'
)
response = api_thehive.create_case_observable(new_id, observable)
if response.status_code == 201:
log.info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
wsl.emit_info('Added observable file {0} to case
{1}'.format(attachment[1], new_case_id))
else:
log.debug('Cannot add observable: file {0} - {1}
({2})'.format(attachment[1], response.status_code, response.text))
else:
log.error('Cannot create case: {0} ({1})'.format(response.status_code,
response.text))
wsl.emit_error('Cannot create case: {0}
({1})'.format(response.status_code, response.text))
return
# Return the id of the just created case on which to run the analysis
return new_case
global config
global whitelist
global log
global api_thehive
# Logging configuration
try:
with open('logging_conf.json') as log_conf:
log_conf_dict = json.load(log_conf)
logging.config.dictConfig(log_conf_dict)
except Exception as e:
print("[ERROR]_[list_emails]: Error while trying to open the file
'logging_conf.json'. It cannot be read or it is not valid:
{}".format(traceback.format_exc()))
return
log = logging.getLogger(__name__)
try:
with open('configuration.json') as conf_file:
conf_dict = json.load(conf_file)
# IMAP configuration
config['imapHost'] = conf_dict['imap']['host']
config['imapPort'] = int(conf_dict['imap']['port'])
config['imapUser'] = conf_dict['imap']['user']
config['imapPassword'] = conf_dict['imap']['password']
config['imapFolder'] = conf_dict['imap']['folder']
# TheHive configuration
config['thehiveURL'] = conf_dict['thehive']['url']
config['thehiveApiKey'] = conf_dict['thehive']['apikey']
except Exception as e:
log.error("Error while trying to open the file 'configuration.json':
{}".format(traceback.format_exc()))
wsl.emit_error("Error while trying to open the file
'configuration.json'")
return
# The domains in the last three lists are used to create three
lists of regular expressions that serve to whitelist subdomains, URLs and email
addresses based on those domains
whitelist['regexDomainsInSubdomains'] = [r'^(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInSubdomains']]
whitelist['regexDomainsInURLs'] = [r'^(http|https):\/\/([^\/]
+\.|){0}(\/.*|\?.*|\#.*|)$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInURLs']]
whitelist['regexDomainsInEmails'] = [r'^.+@(.+\.|)
{0}$'.format(domain.replace(r'.', r'\.')) for domain in
whitelist_dict['domainsInEmails']]
except Exception as e:
log.error("Error while trying to open the file 'whitelist.json':
{}".format(traceback.format_exc()))
wsl.emit_error("Error while trying to open the file 'whitelist.json'")
return