0% found this document useful (0 votes)
22 views2 pages

Step 2

Uploaded by

sam.sepiol999
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views2 pages

Step 2

Uploaded by

sam.sepiol999
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import requests

from bs4 import BeautifulSoup


import pandas as pd

# Base URL without the page number


base_url = 'https://www.mr-bricolage.mu/Trianon/maison/decoration/decoration-des-
murs-et-plafonds/papiers-peints-et-panoramiques.html?discount=1&p='

# List to store product data from all pages


all_products = []
seen_products = set() # To track unique products

# Step 1: Loop through pages 1 to 6


for page_num in range(1, 7):
# Construct the URL for the current page
url = base_url + str(page_num)

# Send an HTTP request to the page


response = requests.get(url)
response.raise_for_status() # Check if the request was successful

# Parse the HTML content using BeautifulSoup


soup = BeautifulSoup(response.text, 'html.parser')

# Extract product details and add to the list


products = soup.find_all('li', class_='item product product-item')

current_page_products = []

for product in products:


name_tag = product.find('h2', class_='product-details__name')
name = name_tag.text.strip() if name_tag else 'No Name Found'

discount_tag = product.find('span', class_='product-details__discount-


percentage')
discount = discount_tag.text.strip() if discount_tag else 'No Discount'

price_tag = product.find('span', class_='price-wrapper')


price = price_tag.text.strip().replace('₨ ', '').replace(',', '') if
price_tag else 'No Price Found'

# Create a unique identifier for each product (name + price)


product_id = f"{name}-{price}"

# Check if the product is already seen


if product_id in seen_products:
print(f"Duplicate product detected on page {page_num}. Stopping.")
break
else:
seen_products.add(product_id)
current_page_products.append({
'Name': name,
'Discount': discount,
'Price': price
})

if not current_page_products:
print(f"No new products on page {page_num}. Stopping.")
break
all_products.extend(current_page_products)
print(f"Page {page_num} processed successfully.")

# Step 2: Create a DataFrame and save it to an Excel file


df = pd.DataFrame(all_products)
df.to_excel('all_products.xlsx', index=False)

print("Data has been saved to all_products.xlsx")

You might also like