Appendices A D
Appendices A D
APPENDICES
Appendix A
Project Gantt Chart
45
Appendix B
Relevant Source Code
47
App.py
#importing required libraries
data = pd.read_csv("phishing.csv")
#droping index column
data = data.drop(['Index'],axis = 1)
# Splitting the dataset into dependant and independant fetature
X = data.drop(["class"],axis =1)
y = data["class"]
app = Flask(_name_)
@app.route("/")
def index():
return render_template("index.html", xx= -1)
Features
import ipaddress
import re
import urllib.request
from bs4 import BeautifulSoup
import socket
48
import requests
from googlesearch import search
import whois
from datetime import date, datetime
import time
from dateutil.parser import parse as date_parse
def generate_data_set(url):
data_set = []
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
except:
response = ""
soup = -999
rank_checker_response =
requests.post("https://www.checkpagerank.net/index.php", {
"name": domain
})
try:
global_rank = int(re.findall(
r"Global Rank: ([0-9]+)", rank_checker_response.text)[0])
except:
global_rank = -1
# 1.UsingIP
try:
ipaddress.ip_address(url)
data_set.append(-1)
except:
49
data_set.append(1)
# 2.LongURL
if len(url) < 54:
data_set.append(1)
elif len(url) >= 54 and len(url) <= 75:
data_set.append(0)
else:
data_set.append(-1)
# 3.ShortURL
match =
re.search('bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.
gs|'
'yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|s
nipurl\.com|'
'short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.c
om|fic\.kr|loopt\.us|'
'doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|
t\.co|lnkd\.in|'
'db\.tt|qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|
ity\.im|'
'q\.gs|is\.gd|po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.u
s|u\.bb|yourls\.org|'
'x\.co|prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|
1url\.com|tweez\.me|v\.gd|tr\.im|link\.zip\.net', url)
if match:
data_set.append(-1)
else:
data_set.append(1)
Gradientboostclassifier
#importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
#Loading Data
50
data = pd.read_csv("phishing.csv")
data.head()
data.columns
data.info()
data.nunique()
data = data.drop(['Index'],axis = 1)
#description of dataset
data.describe().T
X = data.drop(["class"],axis =1)
y = data["class"]
#Splitting the dataset into train and test sets: 80-20 split
ML_Model = []
accuracy = []
f1_score = []
recall = []
precision = []
gbc = GradientBoostingClassifier(max_depth=4,learning_rate=0.7)
gbc.fit(X_train,y_train)
#predicting the target value from the model for the samples
y_train_gbc = gbc.predict(X_train)
y_test_gbc = gbc.predict(X_test)
HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="This website is developed to identify the
safety of a url.">
<meta name="keywords" content="phishing url,phishing,cyber
security,machine learning,classifier,python">
<meta name="author" content="VJCET">
<body>
<div class="main-body">
<div class="form" id="form1">
<br><br><br>
<form action="/predict" method ="post">
<input type="text" class="form__input" name ='url' id="url" placeholder="
Type a URL" required="" />
<button class="button" id="bxt" role="button" >Click here to
check</button>
</form>
<br>
<br>
</div>
<div class="hi" id="form2" >
<br>
<h6 class = "right "><a href= {{ url }} target="_blank">{{ url }}</a></h6>
<br>
<h3 id="prediction"></h3>
<button class="button2" id="button2"
role="button" onclick="window.location.replace('http://127.0.0.1:5000')"
target="_blank" > Continue ?</button>
<button class="button1" id="button1"
role="button" onclick="window.location.replace('http://127.0.0.1:5000')"
target="_blank">Continue?</button>
</div>
</div>
<script
src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.min.js"
integrity="sha384-
QJHtvGhmr9XOIpI6YVutG+2QOK9T+ZnN4kzFN1RtK3zEFEIsxhlmWl5/YESvpZ
13"
crossorigin="anonymous"></script>
<script>
$('#form2').hide();
let x = '{{xx}}';
let num = x*100;
if (0<=x && x<0.50){
num = 100-num;
}
let txtx = num.toString();
if(x<=1 && x>=0.50){
var label = "Website is "+txtx +"% safe to use...";
document.getElementById("prediction").innerHTML = label;
document.getElementById("button1").style.display="block";
$('#form1').hide();
setTimeout(function()
{
$('#form2').show();
},1000);
}
else if (0<=x && x<0.50){
var label = "Website is "+txtx +"% unsafe to use..."
document.getElementById("prediction").innerHTML = label ;
document.getElementById("button2").style.display="block";
$('#form1').hide();
setTimeout(function()
{
$('#form2').show();
},1000);
}
</script>
</body>
</html>
54
Appendix C
User’s Guide
55
C. User's Guide
3. After assessing the URL, the system will show “This Website is
Safe to use” or “This Website is not safe to use”. Check the
“Features table” to see where the URL matches to features and its
percentage to be classified as phishing site.
Appendix D
Evaluation Tool
59
D. Evaluation Tool
60
61
62
63
64
65
66