Dropdownlistscraping
Dropdownlistscraping
import re
from w3lib.html import remove_tags
class LuckypetSpiderSpider(scrapy.Spider):
name = "luckypet_spider"
allowed_domains = ["www.luckypet.com.au"]
start_urls = ["https://www.luckypet.com.au/dog-shop/food-treats.html"]
title_Link = product.css('a.thumbnail-image::attr(href)').get()
Stock_Status = product.css('span.badge::text').get()
class LuckypetSpiderSpider(scrapy.Spider):
name = "luckypet_spider"
allowed_domains = ["www.luckypet.com.au"]
start_urls = ["https://www.luckypet.com.au/dog-shop/food-treats.html"]
custom_settings = {
'DOWNLOAD_DELAY': 3,
'RANDOMIZE_DOWNLOAD_DELAY': True,
'COOKIES_ENABLED': True,
'CONCURRENT_REQUESTS': 1
}
def __init__(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
service = Service(r"C:\\Users\\Hp\\.wdm\\drivers\\chromedriver\\win64\\
chromedriver-win64\\chromedriver.exe")
self.driver = webdriver.Chrome(service=service, options=chrome_options)
self.driver.set_window_size(1920, 1080)
if Product_Title is None:
continue
title_Link = product.css('a.thumbnail-image::attr(href)').get()
Image_url = product.css('img.product-image::attr(data-src)').get()
if Image_url:
Image_url = response.urljoin(Image_url)
Stock_Status = product.css('span.badge::text').get()
item = {
'Product_Title': Product_Title,
'Title_Link': title_Link,
'Image_url': Image_url,
'Stock_Status': Stock_Status
}
yield scrapy.Request(
url=title_Link,
callback=self.parse_product_details,
meta={'item': item}
)
for _ in range(max_attempts):
try:
price_element = WebDriverWait(self.driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "span.txt-
pur.large-price.font-weight-bold"))
)
current_price = price_element.text.strip()
# If price has changed from initial price, return the new price
if current_price != initial_price:
return current_price
time.sleep(wait_time)
except:
time.sleep(wait_time)
continue
return None
self.driver.get(response.url)
time.sleep(3) # Wait for initial page load
try:
# Get initial price
initial_price_element = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "span.txt-
pur.large-price.font-weight-bold"))
)
initial_price = initial_price_element.text.strip()
# Scroll to dropdown
self.driver.execute_script("arguments[0].scrollIntoView(true);",
dropdown)
time.sleep(1)
select.value = '{option_data["value"]}';
var event = new Event('change', {{ bubbles: true }});
select.dispatchEvent(event);
"""
self.driver.execute_script(select_script)
if new_price:
print(f"Successfully scraped - Size: {option_data['text']},
Price: {new_price}")
except Exception as e:
print(f"Error processing option {option_data['text']}:
{str(e)}")
continue
except Exception as e:
print(f"No variations found or error: {str(e)}")
# Process as single product
try:
price_element = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "span.txt-
pur.large-price.font-weight-bold"))
)
price = price_element.text.strip()
item['Regular_Price'] = price
except:
item['Regular_Price'] = "Not Available"