another hack test3
another hack test3
another hack test3
import requests
import threading
from time import sleep
from random import choice
from bs4 import BeautifulSoup
from unidecode import unidecode
from urllib.parse import urlparse
from crosslinked.logger import Log
from datetime import datetime, timedelta
from urllib3 import disable_warnings, exceptions
disable_warnings(exceptions.InsecureRequestWarning)
logging.getLogger("urllib3").setLevel(logging.WARNING)
csv = logging.getLogger("cLinked_csv")
class Timer(threading.Thread):
def __init__(self, timeout):
threading.Thread.__init__(self)
self.start_time = None
self.running = None
self.timeout = timeout
def run(self):
self.running = True
self.start_time = datetime.now()
logging.debug("Thread Timer: Started")
while self.running:
if (datetime.now() - self.start_time) >
timedelta(seconds=self.timeout):
self.stop()
sleep(0.05)
def stop(self):
logging.debug("Thread Timer: Stopped")
self.running = False
class CrossLinked:
def __init__(
self, search_engine, target, timeout, conn_timeout=3, proxies=[], jitter=0
):
self.results = []
self.url = {
"google":
'https://www.google.com/search?q=site:linkedin.com/in+"{}"&num=100&start={}',
"bing":
'http://www.bing.com/search?q="{}"+site:linkedin.com/in&first={}',
}
while search_timer.running:
try:
url = self.url[self.search_engine].format(
self.target, len(self.results)
)
resp = web_request(url, self.conn_timeout, self.proxies)
http_code = get_statuscode(resp)
if http_code != 200:
Log.info("{:<3} {} ({})".format(len(self.results), url,
http_code))
Log.warn("None 200 response, exiting search
({})".format(http_code))
break
self.page_parser(resp)
Log.info("{:<3} {} ({})".format(len(self.results), url, http_code))
sleep(self.jitter)
except KeyboardInterrupt:
Log.warn("Key event detected, exiting search...")
break
search_timer.stop()
return self.results
if not extract_subdomain(url).endswith("linkedin.com"):
return False
elif "linkedin.com/in" not in url:
return False
self.results.append(d)
# Search results are logged to names.csv but names.txt is not generated
until end to prevent duplicates
logging.debug("name: {:25} RawTxt: {}".format(d["name"], d["text"]))
csv.info(
'"{}","{}","{}","{}","{}","{}",'.format(
self.runtime,
self.search_engine,
d["name"],
d["title"],
d["url"],
d["text"],
)
)
def get_statuscode(resp):
try:
return resp.status_code
except:
return 0
def get_proxy(proxies):
tmp = choice(proxies) if proxies else False
return {"http": tmp, "https": tmp} if tmp else {}
def get_agent():
return choice(
[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101
Firefox/104.0"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 12.5; rv:104.0) Gecko/20100101
Firefox/104.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15
(KHTML, like Gecko) Version/16.1 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15
(KHTML, like Gecko) Version/16.1 Safari/605.1.15",
]
)
def extract_links(resp):
links = []
soup = BeautifulSoup(resp.content, "lxml")
for link in soup.findAll("a"):
links.append(link)
return links
def extract_subdomain(url):
return urlparse(url).netloc