0% found this document useful (0 votes)
5 views2 pages

To Collect Bowling Data

The document is a Python script that uses Selenium to scrape bowling statistics for cricket players from a website. It initializes a Chrome webdriver, navigates to the bowling sections for specified players and formats, extracts relevant data, and stores it in a Pandas DataFrame. Finally, it saves the collected data to a CSV file on the user's desktop if any data was successfully scraped.

Uploaded by

995aarvee
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views2 pages

To Collect Bowling Data

The document is a Python script that uses Selenium to scrape bowling statistics for cricket players from a website. It initializes a Chrome webdriver, navigates to the bowling sections for specified players and formats, extracts relevant data, and stores it in a Pandas DataFrame. Finally, it saves the collected data to a CSV file on the user's desktop if any data was successfully scraped.

Uploaded by

995aarvee
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import os

import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Initialize Chrome webdriver (make sure chromedriver is in your PATH)


driver = webdriver.Chrome()

# DataFrame to store final data


final_data = pd.DataFrame()

# List of players to iterate over (replace with actual player names)


players = ["RG Sharma", "Virat Kohli", "Jasprit Bumrah"]

# Formats to scrape
formats = ["Test", "ODI", "T20I", "TWENTY20"]

# Function to navigate to Bowling section and click it


def navigate_to_bowling_section(player, fmt):
url = f"http://www.cricmetric.com/playerstats.py?player={player.replace(' ',
'+')}&role=all&format=all&groupby=year#{fmt}-Bowling"
driver.get(url)
try:
# Wait for the Bowling tab to become clickable and then click it
bowling_tab = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, f'//*[@id="{fmt}-Bowling-tab"]'))
)
bowling_tab.click()

# Wait for the Bowling section to become visible


WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, f'//*[@id="{fmt}-
Bowling"]/div/table'))
)
print(f"Successfully navigated to Bowling section for {player} in {fmt}")
return True
except Exception as e:
print(f"Error navigating to Bowling section for {player} in {fmt}: {e}")
return False

# Iterate over each player and format for bowling data


for player in players:
for fmt in formats:
try:
if navigate_to_bowling_section(player, fmt):
# Find the table element and extract text
table = driver.find_element(By.XPATH,
f'//*[@id="{fmt}-Bowling"]/div/table').text

# Split text into lines and extract headers and data


lines = table.split('\n')
headers = lines[0].split() # Adjust delimiter if necessary

# Filter for required columns


required_columns = ["Year", "Innings", "Overs", "Runs", "Wickets",
"Econ", "Avg", "SR", "5W", "BBI", "4s", "6s", "Dot%"]
filtered_headers = [header for header in headers if header in
required_columns]

data = []
for line in lines[1:]:
row_data = line.split()
filtered_data = [row_data[headers.index(header)] for header in
filtered_headers]
data.append(filtered_data)

# Create DataFrame from extracted data


df = pd.DataFrame(data, columns=filtered_headers)

# Add additional columns for Player and Format


df['Player'] = player
df['Format'] = fmt
df['Section'] = "Bowling"

# Concatenate current DataFrame with final_data


final_data = pd.concat([final_data, df], ignore_index=True)

print(f"Successfully collected bowling data for {player} in {fmt}")

except Exception as e:
print(f"Error processing {player} for {fmt} Bowling: {e}")
continue

# Quit webdriver
driver.quit()

# Determine the desktop path


desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
csv_file_path = os.path.join(desktop_path, 'cricmetric_player_bowling_stats.csv')

# Debugging: Print final_data for inspection


print(final_data.head())

# Save final_data to CSV if data is collected


if not final_data.empty:
final_data.to_csv(csv_file_path, index=False)
print(f"Bowling data collection complete. Saved to '{csv_file_path}'.")
else:
print("No bowling data collected. Please check your scraping process.")

You might also like