0% found this document useful (0 votes)
2 views19 pages

Scam Detection

The document contains the code for a Website Scam Analyzer, including HTML, CSS, and JavaScript for the front-end interface, and Python scripts for back-end analysis. The front-end allows users to input a URL for analysis, displaying results such as risk levels, SSL certificate information, and AI recommendations. The back-end processes the URL, evaluates potential risks, and generates assessments based on various factors, including SSL validity and sentiment analysis of the website content.

Uploaded by

AAYUSH TIKONE
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views19 pages

Scam Detection

The document contains the code for a Website Scam Analyzer, including HTML, CSS, and JavaScript for the front-end interface, and Python scripts for back-end analysis. The front-end allows users to input a URL for analysis, displaying results such as risk levels, SSL certificate information, and AI recommendations. The back-end processes the URL, evaluates potential risks, and generates assessments based on various factors, including SSL validity and sentiment analysis of the website content.

Uploaded by

AAYUSH TIKONE
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 19

Style.

css:
:root {
--primary-blue: #2196F3;
--warning-red: #FF5252;
--safe-green: #4CAF50;
--light-grey: #F5F5F5;
--dark-grey: #333333;
}

body {
font-family: "SF Pro Display", "Roboto", -apple-system, sans-serif;
background-color: #f8f9fa; /* Updated background color */
color: var(--dark-grey);
}

.container {
padding: 16px;
max-width: 1000px; /* Added max-width */
}

.shield-icon {
color: var(--primary-blue);
}

.btn-primary {
background-color: var(--primary-blue);
border-color: var(--primary-blue);
}

.btn-primary:hover {
background-color: #1976D2;
border-color: #1976D2;
}

.badge {
font-size: 1rem; /* Updated font size */
padding: 0.5rem 1rem; /* Updated padding */
}

.badge.risk-high {
background-color: var(--warning-red);
}

.badge.risk-medium {
background-color: #FFA726;
}

.badge.risk-low {
background-color: var(--safe-green);
}

.card {
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}

.list-group-item {
border: none;
padding: 1rem; /* Updated padding */
border-left: none; /* Added border styles */
border-right: none;
}

#loadingIndicator {
margin: 2rem 0;
}

.spinner-border {
width: 3rem;
height: 3rem;
}

@media (max-width: 768px) {


.container {
padding: 8px;
}
h1 {
font-size: 1.75rem;
}
}

.shield-icon {
color: #0d6efd;
}

.risk-badge {
font-size: 1rem;
}
.risk-badge.HIGH {
background-color: #dc3545;
}

.risk-badge.MEDIUM {
background-color: #ffc107;
}

.risk-badge.LOW {
background-color: #198754;
}

.loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(255, 255, 255, 0.8);
display: flex;
justify-content: center;
align-items: center;
z-index: 1000;
}

.accordion-button:not(.collapsed) {
background-color: #e7f1ff;
}

.threat-category {
padding: 0.5rem;
margin-bottom: 0.5rem;
border-radius: 0.25rem;
}

.threat-category.CRITICAL {
background-color: #dc3545;
color: white;
}

.threat-category.HIGH {
background-color: #ffc107;
}
.threat-category.MEDIUM {
background-color: #0dcaf0;
}

.threat-category.LOW {
background-color: #198754;
color: white;
}

#urlInput {
font-size: 1.1rem;
padding: 0.75rem;
}

#analyzeBtn {
padding: 0.75rem 2rem;
font-size: 1.1rem;
}

#riskBadge.badge-high {
background-color: #dc3545;
color: white;
}

#riskBadge.badge-medium {
background-color: #ffc107;
color: black;
}

#riskBadge.badge-low {
background-color: #28a745;
color: white;
}

.alert {
font-size: 1.1rem;
}

main.js:
document.addEventListener('DOMContentLoaded', () => {
const urlInput = document.getElementById('urlInput');
const analyzeBtn = document.getElementById('analyzeBtn');
const loadingIndicator = document.getElementById('loadingIndicator');
const results = document.getElementById('results');
const riskBadge = document.getElementById('riskBadge');
const overallAssessment = document.getElementById('overallAssessment');
const riskFactors = document.getElementById('riskFactors');
const safeIndicators = document.getElementById('safeIndicators');
let riskChart = null;

function getBadgeClass(riskLevel) {
switch (riskLevel?.toUpperCase()) {
case 'HIGH':
return 'bg-danger';
case 'MEDIUM':
return 'bg-warning text-dark';
case 'LOW':
return 'bg-success';
default:
return 'bg-secondary';
}
}

function getAlertClass(riskLevel) {
switch (riskLevel?.toUpperCase()) {
case 'HIGH':
return 'alert-danger';
case 'MEDIUM':
return 'alert-warning';
case 'LOW':
return 'alert-success';
default:
return 'alert-info';
}
}

analyzeBtn.addEventListener('click', async () => {


const url = urlInput.value.trim();
if (!url) {
alert('Please enter a valid URL');
return;
}

try {
loadingIndicator.classList.remove('d-none');
results.classList.add('d-none');
analyzeBtn.disabled = true;
riskFactors.innerHTML = '';
safeIndicators.innerHTML = '';

const response = await fetch('/analyze', {


method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ url }),
});

if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}

const data = await response.json();

// Update risk badge


riskBadge.textContent = data.risk_level;
riskBadge.className = `badge ${getBadgeClass(data.risk_level)}`;

// Update overall assessment


overallAssessment.textContent = data.overall_assessment;
overallAssessment.className = `alert ${getAlertClass(data.risk_level)}`;

// Update risk factors


data.risk_factors?.forEach(factor => {
const li = document.createElement('li');
li.className = 'list-group-item text-danger';
li.innerHTML = `<i class="fas fa-exclamation-triangle me-2"></i>${factor}`;
riskFactors.appendChild(li);
});

// Update safe indicators


data.safe_indicators?.forEach(indicator => {
const li = document.createElement('li');
li.className = 'list-group-item text-success';
li.innerHTML = `<i class="fas fa-check-circle me-2"></i>${indicator}`;
safeIndicators.appendChild(li);
});
// Update SSL info
if (data.ssl_certificate) {
const sslInfo = document.getElementById('sslInfo');
sslInfo.innerHTML = `
<div class="row">
<div class="col-md-6">
<p><strong>Status:</strong> ${data.ssl_certificate.is_valid ? '<span
class="text-success">Valid</span>' : '<span class="text-danger">Invalid</span>'}</p>
<p><strong>Issuer:</strong> ${data.ssl_certificate.issuer}</p>
</div>
<div class="col-md-6">
<p><strong>Expires:</strong> ${data.ssl_certificate.expires}</p>
<p><strong>Trust Score:</strong> ${data.ssl_certificate.trust_score}%</p>
</div>
</div>
`;
}

// Update trust factor


if (data.trust_factor !== undefined) {
const trustFactor = document.getElementById('trustFactor');
trustFactor.style.width = `${data.trust_factor}%`;
trustFactor.textContent = `${Math.round(data.trust_factor)}%`;
trustFactor.className = `progress-bar ${getBadgeClass(data.risk_level)}`;
}

// Update AI recommendations
const aiRecommendations = document.getElementById('aiRecommendations');
aiRecommendations.innerHTML = data.ai_recommendations
.map(rec => `<li class="list-group-item">${rec}</li>`)
.join('');

// Create/Update risk chart


if (!data.error && document.getElementById('riskChart')) {
try {
const ctx = document.getElementById('riskChart').getContext('2d');
const riskScore = data.risk_level === 'HIGH' ? 0.8 :
data.risk_level === 'MEDIUM' ? 0.5 : 0.2;

if (riskChart) {
riskChart.destroy();
}

riskChart = new Chart(ctx, {


type: 'pie',
data: {
labels: ['Risk Level', 'Safe Level'],
datasets: [{
data: [riskScore, 1 - riskScore],
backgroundColor: [
data.risk_level === 'HIGH' ? '#dc3545' :
data.risk_level === 'MEDIUM' ? '#ffc107' : '#28a745',
'#e9ecef'
]
}]
},
options: {
responsive: true,
plugins: {
legend: {
position: 'bottom'
},
title: {
display: true,
text: 'Risk Assessment'
}
}
}
});
} catch (chartError) {
console.error('Error creating chart:', chartError);
}
}

results.classList.remove('d-none');
} catch (error) {
console.error('Error:', error);
overallAssessment.textContent = `Error: ${error.message}`;
overallAssessment.className = 'alert alert-danger';
results.classList.remove('d-none');
} finally {
loadingIndicator.classList.add('d-none');
analyzeBtn.disabled = false;
}
});
});
Index.html:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Website Scam Analyzer</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"
rel="stylesheet">
<link href="/static/css/style.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<div class="container my-5">
<div class="text-center mb-4">
<h1>Website Scam Analyzer</h1>
<p class="lead">Enter any website URL to analyze it for potential scams and security
threats</p>
</div>

<div class="row justify-content-center">


<div class="col-md-8">
<div class="input-group mb-4">
<input type="url" class="form-control" id="urlInput" placeholder="Enter website URL
(e.g., https://example.com)" required>
<button class="btn btn-primary" id="analyzeBtn">Analyze</button>
</div>

<div id="loadingIndicator" class="text-center d-none">


<div class="spinner-border text-primary" role="status">
<span class="visually-hidden">Loading...</span>
</div>
<p>Analyzing website...</p>
</div>

<div id="results" class="card d-none">


<div class="card-header d-flex justify-content-between align-items-center">
<h5 class="mb-0">Analysis Results</h5>
<span id="riskBadge" class="badge"></span>
</div>
<div class="card-body">
<div class="row mb-4">
<div class="col-md-6">
<canvas id="riskChart" width="200" height="200"></canvas>
</div>
</div>
<div class="alert" id="overallAssessment" role="alert"></div>

<div class="row">
<div class="col-md-6">
<h6>Risk Factors</h6>
<ul id="riskFactors" class="list-group list-group-flush"></ul>
</div>
<div class="col-md-6">
<h6>Safe Indicators</h6>
<ul id="safeIndicators" class="list-group list-group-flush"></ul>
</div>
</div>
<div class="mt-4">
<h6>SSL Certificate Information</h6>
<div id="sslInfo" class="card p-3"></div>
</div>
<div class="mt-4">
<h6>AI Recommendations</h6>
<ul id="aiRecommendations" class="list-group"></ul>
</div>
<div class="mt-4">
<h6>Trust Metrics</h6>
<div class="row">
<div class="col-md-6">
<div class="card p-3">
<h6>Trust Factor</h6>
<div class="progress">
<div id="trustFactor" class="progress-bar"
role="progressbar"></div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card p-3">
<h6>Technical Details</h6>
<div id="technicalDetails"></div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>

<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></
script>
<script src="/static/js/main.js"></script>
</body>
</html>

analyzer.py:
import urllib.parse
import logging
from bs4 import BeautifulSoup
from .threat_analyzer import DetailedThreatAnalyzer, analyze_text_sentiment,
check_ssl_certificate

# Configure logging
logger = logging.getLogger(__name__)

def analyze_website(url, content):


try:
threat_analyzer = DetailedThreatAnalyzer()
parsed_url = urllib.parse.urlparse(url)
domain = parsed_url.netloc.lower()
soup = BeautifulSoup(content, 'html.parser')
ssl_info = check_ssl_certificate(url)
risk_factors = []
safe_indicators = []
confidence_score = 0
if parsed_url.scheme == 'https':
safe_indicators.append("Uses secure HTTPS protocol")
else:
risk_factors.append("No HTTPS security - connection not encrypted")
confidence_score += 20
text_content = soup.get_text()
sentiment_risks = analyze_text_sentiment(text_content)
risk_factors.extend(sentiment_risks)
confidence_score += len(sentiment_risks) * 10
trust_factor = ssl_info['trust_score']
assessment = generate_assessment(confidence_score)
return {
"confidence_score": confidence_score,
"risk_factors": risk_factors,
"safe_indicators": safe_indicators,
"overall_assessment": assessment,
"ssl_certificate": ssl_info,
"trust_factor": trust_factor,
}
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
return {
"risk_level": "ERROR",
"confidence_score": 0,
"risk_factors": ["Technical Error: Unable to complete analysis"],
}

def generate_assessment(confidence_score):
if confidence_score > 70:
return "High risk detected! Exercise extreme caution."
elif confidence_score > 40:
return "Medium risk level. Proceed with caution."
else:
return "Low risk detected. Website appears to be legitimate."

common.py:
class DetailedThreatAnalyzer:
pass

class ThreatCategory:
pass

class ThreatSeverity:
pass

class ThreatDetail:
pass

Ml_classifier.py:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import re
from bs4 import BeautifulSoup
import tldextract
import urllib.parse

class ScamDetectionClassifier:
def __init__(self):
self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
self.scaler = StandardScaler()
self._initialize_classifier()

def _initialize_classifier(self):
"""Initialize classifier with diverse training data."""
# Features: [https, ssl_valid, domain_age, form_count, external_links,
suspicious_keywords, input_fields]
X = np.array([
# Legitimate sites (varying security levels)
[1, 1, 1, 0.2, 0.9, 0.1, 0.3], # Major tech company
[1, 1, 0.8, 0.4, 0.7, 0.2, 0.4], # E-commerce
[1, 0, 0.6, 0.3, 0.5, 0.3, 0.5], # Blog/News
[0, 0, 0.4, 0.1, 0.3, 0.4, 0.2], # Small business

# Suspicious sites
[0, 0, 0.1, 0.8, 0.2, 0.9, 0.8], # Phishing
[1, 0, 0.2, 0.7, 0.3, 0.8, 0.7], # Scam with HTTPS
[0, 0, 0.1, 0.9, 0.1, 0.9, 0.9], # Fake login
[1, 1, 0.3, 0.6, 0.4, 0.7, 0.6] # Sophisticated scam
])

y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

self.scaler.fit(X)
X_scaled = self.scaler.transform(X)
self.classifier.fit(X_scaled, y)

def extract_features(self, url, content, ssl_info):


"""Extract detailed features from website."""
features = np.zeros(7)

# HTTPS (0-1)
features[0] = 1 if url.startswith('https') else 0
# SSL Valid (0-1)
features[1] = 1 if ssl_info.get('is_valid', False) else 0

# Domain Reputation (0-1)


domain = tldextract.extract(url)
suspicious_tlds = {'.tk', '.ml', '.ga', '.cf', '.gq', '.xyz', '.top', '.pw'}
features[2] = 0.2 if domain.suffix in suspicious_tlds else 0.8

soup = BeautifulSoup(content, 'html.parser')

# Form Density (0-1)


forms = soup.find_all('form')
total_content_length = len(content)
features[3] = min(1.0, len(forms) / 3.0)

# External Links Ratio (0-1)


links = soup.find_all('a', href=True)
external_links = sum(1 for link in links if not link['href'].startswith(('#', '/', 'tel:', 'mailto:')))
features[4] = 1.0 - (external_links / max(len(links), 1)) if links else 0.5

# Suspicious Keyword Density (0-1)


text = soup.get_text().lower()
suspicious_keywords = [
'urgent', 'verify', 'account', 'banking', 'password',
'credit card', 'login', 'winner', 'prize', 'free',
'investment', 'bitcoin', 'cryptocurrency', 'limited time'
]
keyword_count = sum(1 for keyword in suspicious_keywords if keyword in text)
features[5] = min(1.0, keyword_count / len(suspicious_keywords))

# Input Field Density (0-1)


input_fields = soup.find_all('input')
sensitive_types = ['password', 'text', 'tel', 'email', 'number']
sensitive_inputs = sum(1 for field in input_fields if field.get('type', '') in sensitive_types)
features[6] = min(1.0, sensitive_inputs / 5.0)

return self.scaler.transform([features])

def predict_risk(self, url, content, ssl_info):


"""Predict risk level with detailed analysis."""
features = self.extract_features(url, content, ssl_info)
prediction = self.classifier.predict(features)[0]
probabilities = self.classifier.predict_proba(features)[0]
# Calculate trust score
feature_weights = [0.25, 0.20, 0.15, 0.10, 0.10, 0.15, 0.05]
raw_features = features[0]
trust_score = 100 * (1 - sum(w * f for w, f in zip(feature_weights, raw_features)))

# Adjust for SSL and HTTPS


if not ssl_info.get('is_valid', False):
trust_score *= 0.7
if not url.startswith('https'):
trust_score *= 0.8

return {
'is_scam': bool(prediction),
'confidence_score': float(probabilities[1]),
'trust_score': max(0, min(100, trust_score)),
'risk_probability': float(probabilities[1]),
'feature_scores': {
'ssl_security': float(features[0][0:2].mean()),
'domain_reputation': float(features[0][2]),
'form_safety': float(1 - features[0][3]),
'link_trustworthiness': float(features[0][4]),
'content_safety': float(1 - features[0][5]),
'input_safety': float(1 - features[0][6])
}
}

Thread_analyzer.py:
import os
import re
import json
import logging
import urllib.parse
from bs4 import BeautifulSoup
from textblob import TextBlob
import nltk
from .common import DetailedThreatAnalyzer, ThreatCategory, ThreatSeverity, ThreatDetail

# Download required NLTK data


try:
nltk.data.find('tokenizers/punkt')
import textblob.download_corpora
textblob.download_corpora.download_all()
except LookupError:
nltk.download('punkt')
import textblob.download_corpora
textblob.download_corpora.download_all()

# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def analyze_text_sentiment(text):
risk_factors = []
scam_patterns = [
(r'urgent|immediate action', "Urgency tactics detected"),
(r'verify.*account|confirm.*identity', "Identity verification request"),
(r'password|credit card|bank account', "Requests sensitive information"),
(r'winner|lottery|prize|inheritance', "Promises unrealistic rewards"),
(r'cryptocurrency|bitcoin|investment', "Potential investment scam"),
]
text_lower = text.lower()
for pattern, message in scam_patterns:
if re.search(pattern, text_lower):
risk_factors.append(message)
return risk_factors

def check_ssl_certificate(url):
try:
import ssl
import socket
from datetime import datetime
hostname = urllib.parse.urlparse(url).netloc
context = ssl.create_default_context()
with socket.create_connection((hostname, 443)) as sock:
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
cert = ssock.getpeercert()
not_after = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
is_valid = datetime.now() < not_after
issuer = dict(x[0] for x in cert['issuer'])
return {
"is_valid": is_valid,
"issuer": issuer.get('organizationName', 'Unknown'),
"expires": not_after.strftime('%Y-%m-%d'),
"trust_score": 100 if is_valid else 0
}
except Exception:
return {
"is_valid": False,
"issuer": "Certificate Error",
"expires": "Unknown",
"trust_score": 0
}

app.py:
import os
import logging
from flask import Flask, render_template, request, jsonify
from urllib.parse import urlparse
import requests
from utils.analyzer import analyze_website

# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

app = Flask(__name__)
app.secret_key = os.environ.get("SESSION_SECRET")

def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except:
return False

@app.route('/')
def index():
return render_template('index.html')

@app.route('/analyze', methods=['POST'])
def analyze():
data = request.get_json()
url = data.get('url')

if not url:
return jsonify({'error': 'URL is required'}), 400

if not is_valid_url(url):
return jsonify({'error': 'Invalid URL format'}), 400

try:
# Set headers to mimic a browser
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Fetch website content with improved error handling


try:
response = requests.get(url, timeout=15, verify=True, headers=headers,
allow_redirects=True)
response.raise_for_status()
content = response.text
except requests.exceptions.SSLError:
# Try without SSL verification if SSL fails
response = requests.get(url, timeout=15, verify=False, headers=headers,
allow_redirects=True)
content = response.text

# Analyze the website using OpenAI


analysis_result = analyze_website(url, content)
return jsonify(analysis_result)

except requests.exceptions.RequestException as e:
logger.error(f"Error fetching website: {str(e)}")
return jsonify({'error': 'Failed to fetch website content'}), 400
except Exception as e:
logger.error(f"Error analyzing website: {str(e)}")
return jsonify({'error': 'Failed to analyze website'}), 500

app.py:
from app import app
if __name__ == "__main__":
app.run(debug=True, port=8000)
Directory Structure:

You might also like