Scam Detection
Scam Detection
css:
:root {
--primary-blue: #2196F3;
--warning-red: #FF5252;
--safe-green: #4CAF50;
--light-grey: #F5F5F5;
--dark-grey: #333333;
}
body {
font-family: "SF Pro Display", "Roboto", -apple-system, sans-serif;
background-color: #f8f9fa; /* Updated background color */
color: var(--dark-grey);
}
.container {
padding: 16px;
max-width: 1000px; /* Added max-width */
}
.shield-icon {
color: var(--primary-blue);
}
.btn-primary {
background-color: var(--primary-blue);
border-color: var(--primary-blue);
}
.btn-primary:hover {
background-color: #1976D2;
border-color: #1976D2;
}
.badge {
font-size: 1rem; /* Updated font size */
padding: 0.5rem 1rem; /* Updated padding */
}
.badge.risk-high {
background-color: var(--warning-red);
}
.badge.risk-medium {
background-color: #FFA726;
}
.badge.risk-low {
background-color: var(--safe-green);
}
.card {
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.list-group-item {
border: none;
padding: 1rem; /* Updated padding */
border-left: none; /* Added border styles */
border-right: none;
}
#loadingIndicator {
margin: 2rem 0;
}
.spinner-border {
width: 3rem;
height: 3rem;
}
.shield-icon {
color: #0d6efd;
}
.risk-badge {
font-size: 1rem;
}
.risk-badge.HIGH {
background-color: #dc3545;
}
.risk-badge.MEDIUM {
background-color: #ffc107;
}
.risk-badge.LOW {
background-color: #198754;
}
.loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(255, 255, 255, 0.8);
display: flex;
justify-content: center;
align-items: center;
z-index: 1000;
}
.accordion-button:not(.collapsed) {
background-color: #e7f1ff;
}
.threat-category {
padding: 0.5rem;
margin-bottom: 0.5rem;
border-radius: 0.25rem;
}
.threat-category.CRITICAL {
background-color: #dc3545;
color: white;
}
.threat-category.HIGH {
background-color: #ffc107;
}
.threat-category.MEDIUM {
background-color: #0dcaf0;
}
.threat-category.LOW {
background-color: #198754;
color: white;
}
#urlInput {
font-size: 1.1rem;
padding: 0.75rem;
}
#analyzeBtn {
padding: 0.75rem 2rem;
font-size: 1.1rem;
}
#riskBadge.badge-high {
background-color: #dc3545;
color: white;
}
#riskBadge.badge-medium {
background-color: #ffc107;
color: black;
}
#riskBadge.badge-low {
background-color: #28a745;
color: white;
}
.alert {
font-size: 1.1rem;
}
main.js:
document.addEventListener('DOMContentLoaded', () => {
const urlInput = document.getElementById('urlInput');
const analyzeBtn = document.getElementById('analyzeBtn');
const loadingIndicator = document.getElementById('loadingIndicator');
const results = document.getElementById('results');
const riskBadge = document.getElementById('riskBadge');
const overallAssessment = document.getElementById('overallAssessment');
const riskFactors = document.getElementById('riskFactors');
const safeIndicators = document.getElementById('safeIndicators');
let riskChart = null;
function getBadgeClass(riskLevel) {
switch (riskLevel?.toUpperCase()) {
case 'HIGH':
return 'bg-danger';
case 'MEDIUM':
return 'bg-warning text-dark';
case 'LOW':
return 'bg-success';
default:
return 'bg-secondary';
}
}
function getAlertClass(riskLevel) {
switch (riskLevel?.toUpperCase()) {
case 'HIGH':
return 'alert-danger';
case 'MEDIUM':
return 'alert-warning';
case 'LOW':
return 'alert-success';
default:
return 'alert-info';
}
}
try {
loadingIndicator.classList.remove('d-none');
results.classList.add('d-none');
analyzeBtn.disabled = true;
riskFactors.innerHTML = '';
safeIndicators.innerHTML = '';
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
// Update AI recommendations
const aiRecommendations = document.getElementById('aiRecommendations');
aiRecommendations.innerHTML = data.ai_recommendations
.map(rec => `<li class="list-group-item">${rec}</li>`)
.join('');
if (riskChart) {
riskChart.destroy();
}
results.classList.remove('d-none');
} catch (error) {
console.error('Error:', error);
overallAssessment.textContent = `Error: ${error.message}`;
overallAssessment.className = 'alert alert-danger';
results.classList.remove('d-none');
} finally {
loadingIndicator.classList.add('d-none');
analyzeBtn.disabled = false;
}
});
});
Index.html:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Website Scam Analyzer</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"
rel="stylesheet">
<link href="/static/css/style.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<div class="container my-5">
<div class="text-center mb-4">
<h1>Website Scam Analyzer</h1>
<p class="lead">Enter any website URL to analyze it for potential scams and security
threats</p>
</div>
<div class="row">
<div class="col-md-6">
<h6>Risk Factors</h6>
<ul id="riskFactors" class="list-group list-group-flush"></ul>
</div>
<div class="col-md-6">
<h6>Safe Indicators</h6>
<ul id="safeIndicators" class="list-group list-group-flush"></ul>
</div>
</div>
<div class="mt-4">
<h6>SSL Certificate Information</h6>
<div id="sslInfo" class="card p-3"></div>
</div>
<div class="mt-4">
<h6>AI Recommendations</h6>
<ul id="aiRecommendations" class="list-group"></ul>
</div>
<div class="mt-4">
<h6>Trust Metrics</h6>
<div class="row">
<div class="col-md-6">
<div class="card p-3">
<h6>Trust Factor</h6>
<div class="progress">
<div id="trustFactor" class="progress-bar"
role="progressbar"></div>
</div>
</div>
</div>
<div class="col-md-6">
<div class="card p-3">
<h6>Technical Details</h6>
<div id="technicalDetails"></div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></
script>
<script src="/static/js/main.js"></script>
</body>
</html>
analyzer.py:
import urllib.parse
import logging
from bs4 import BeautifulSoup
from .threat_analyzer import DetailedThreatAnalyzer, analyze_text_sentiment,
check_ssl_certificate
# Configure logging
logger = logging.getLogger(__name__)
def generate_assessment(confidence_score):
if confidence_score > 70:
return "High risk detected! Exercise extreme caution."
elif confidence_score > 40:
return "Medium risk level. Proceed with caution."
else:
return "Low risk detected. Website appears to be legitimate."
common.py:
class DetailedThreatAnalyzer:
pass
class ThreatCategory:
pass
class ThreatSeverity:
pass
class ThreatDetail:
pass
Ml_classifier.py:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import re
from bs4 import BeautifulSoup
import tldextract
import urllib.parse
class ScamDetectionClassifier:
def __init__(self):
self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
self.scaler = StandardScaler()
self._initialize_classifier()
def _initialize_classifier(self):
"""Initialize classifier with diverse training data."""
# Features: [https, ssl_valid, domain_age, form_count, external_links,
suspicious_keywords, input_fields]
X = np.array([
# Legitimate sites (varying security levels)
[1, 1, 1, 0.2, 0.9, 0.1, 0.3], # Major tech company
[1, 1, 0.8, 0.4, 0.7, 0.2, 0.4], # E-commerce
[1, 0, 0.6, 0.3, 0.5, 0.3, 0.5], # Blog/News
[0, 0, 0.4, 0.1, 0.3, 0.4, 0.2], # Small business
# Suspicious sites
[0, 0, 0.1, 0.8, 0.2, 0.9, 0.8], # Phishing
[1, 0, 0.2, 0.7, 0.3, 0.8, 0.7], # Scam with HTTPS
[0, 0, 0.1, 0.9, 0.1, 0.9, 0.9], # Fake login
[1, 1, 0.3, 0.6, 0.4, 0.7, 0.6] # Sophisticated scam
])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
self.scaler.fit(X)
X_scaled = self.scaler.transform(X)
self.classifier.fit(X_scaled, y)
# HTTPS (0-1)
features[0] = 1 if url.startswith('https') else 0
# SSL Valid (0-1)
features[1] = 1 if ssl_info.get('is_valid', False) else 0
return self.scaler.transform([features])
return {
'is_scam': bool(prediction),
'confidence_score': float(probabilities[1]),
'trust_score': max(0, min(100, trust_score)),
'risk_probability': float(probabilities[1]),
'feature_scores': {
'ssl_security': float(features[0][0:2].mean()),
'domain_reputation': float(features[0][2]),
'form_safety': float(1 - features[0][3]),
'link_trustworthiness': float(features[0][4]),
'content_safety': float(1 - features[0][5]),
'input_safety': float(1 - features[0][6])
}
}
Thread_analyzer.py:
import os
import re
import json
import logging
import urllib.parse
from bs4 import BeautifulSoup
from textblob import TextBlob
import nltk
from .common import DetailedThreatAnalyzer, ThreatCategory, ThreatSeverity, ThreatDetail
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def analyze_text_sentiment(text):
risk_factors = []
scam_patterns = [
(r'urgent|immediate action', "Urgency tactics detected"),
(r'verify.*account|confirm.*identity', "Identity verification request"),
(r'password|credit card|bank account', "Requests sensitive information"),
(r'winner|lottery|prize|inheritance', "Promises unrealistic rewards"),
(r'cryptocurrency|bitcoin|investment', "Potential investment scam"),
]
text_lower = text.lower()
for pattern, message in scam_patterns:
if re.search(pattern, text_lower):
risk_factors.append(message)
return risk_factors
def check_ssl_certificate(url):
try:
import ssl
import socket
from datetime import datetime
hostname = urllib.parse.urlparse(url).netloc
context = ssl.create_default_context()
with socket.create_connection((hostname, 443)) as sock:
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
cert = ssock.getpeercert()
not_after = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
is_valid = datetime.now() < not_after
issuer = dict(x[0] for x in cert['issuer'])
return {
"is_valid": is_valid,
"issuer": issuer.get('organizationName', 'Unknown'),
"expires": not_after.strftime('%Y-%m-%d'),
"trust_score": 100 if is_valid else 0
}
except Exception:
return {
"is_valid": False,
"issuer": "Certificate Error",
"expires": "Unknown",
"trust_score": 0
}
app.py:
import os
import logging
from flask import Flask, render_template, request, jsonify
from urllib.parse import urlparse
import requests
from utils.analyzer import analyze_website
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.secret_key = os.environ.get("SESSION_SECRET")
def is_valid_url(url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except:
return False
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze', methods=['POST'])
def analyze():
data = request.get_json()
url = data.get('url')
if not url:
return jsonify({'error': 'URL is required'}), 400
if not is_valid_url(url):
return jsonify({'error': 'Invalid URL format'}), 400
try:
# Set headers to mimic a browser
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching website: {str(e)}")
return jsonify({'error': 'Failed to fetch website content'}), 400
except Exception as e:
logger.error(f"Error analyzing website: {str(e)}")
return jsonify({'error': 'Failed to analyze website'}), 500
app.py:
from app import app
if __name__ == "__main__":
app.run(debug=True, port=8000)
Directory Structure: