0% found this document useful (0 votes)
4 views6 pages

Employee Count Google Scraping Python Script (Serpapi)

The document is a Python script that uses the SerpAPI to fetch employee counts for companies listed in an Excel file. It reads company names from 'companies.xlsx', performs Google searches to find employee counts, and updates the results in a new Excel file called 'companies_with_employee_count.xlsx'. The script includes error handling for API requests and checks if the employee count column exists before populating it.

Uploaded by

sarva842003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views6 pages

Employee Count Google Scraping Python Script (Serpapi)

The document is a Python script that uses the SerpAPI to fetch employee counts for companies listed in an Excel file. It reads company names from 'companies.xlsx', performs Google searches to find employee counts, and updates the results in a new Excel file called 'companies_with_employee_count.xlsx'. The script includes error handling for API requests and checks if the employee count column exists before populating it.

Uploaded by

sarva842003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

import pandas as pd

import requests
import pandas as pd
import requests

# SerpAPI credentials
API_KEY =
"339b9728ed3613fd6402587fe532b5b0e4652b321774919216e2e0d118e8bb97"

# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}

try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()

# Extract employee count from search results


for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")

return None

# Read the Excel file


input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"

df = pd.read_excel(input_file)

# Check if the 'Employee Count' column exists, if not, add it


if "Employee Count" not in df.columns:
df["Employee Count"] = None

# Iterate through the companies and update employee count


for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")

# Save the updated DataFrame to a new Excel file


df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")

# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}

try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()

# Extract employee count from search results


for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")

return None

# Read the Excel file


input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"

df = pd.read_excel(input_file)

# Check if the 'Employee Count' column exists, if not, add it


if "Employee Count" not in df.columns:
df["Employee Count"] = None

# Iterate through the companies and update employee count


for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")

# Save the updated DataFrame to a new Excel file


df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")
# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}

try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()

# Extract employee count from search results


for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")

return None

# Read the Excel file


input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"

df = pd.read_excel(input_file)

# Check if the 'Employee Count' column exists, if not, add it


if "Employee Count" not in df.columns:
df["Employee Count"] = None

# Iterate through the companies and update employee count


for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")

# Save the updated DataFrame to a new Excel file


df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")

You might also like