import pandas as pd
import requests
import pandas as pd
import requests
# SerpAPI credentials
API_KEY =
"339b9728ed3613fd6402587fe532b5b0e4652b321774919216e2e0d118e8bb97"
# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}
try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()
# Extract employee count from search results
for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")
return None
# Read the Excel file
input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"
df = pd.read_excel(input_file)
# Check if the 'Employee Count' column exists, if not, add it
if "Employee Count" not in df.columns:
df["Employee Count"] = None
# Iterate through the companies and update employee count
for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")
# Save the updated DataFrame to a new Excel file
df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")
# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}
try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()
# Extract employee count from search results
for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")
return None
# Read the Excel file
input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"
df = pd.read_excel(input_file)
# Check if the 'Employee Count' column exists, if not, add it
if "Employee Count" not in df.columns:
df["Employee Count"] = None
# Iterate through the companies and update employee count
for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")
# Save the updated DataFrame to a new Excel file
df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")
# Function to perform Google search using SerpAPI and extract employee count
def get_employee_count(company_name):
query = f"{company_name} Chennai employee count"
params = {
"q": query,
"location": "Chennai, Tamil Nadu, India",
"hl": "en",
"gl": "in",
"api_key": API_KEY
}
try:
response = requests.get("https://serpapi.com/search", params=params)
response.raise_for_status()
data = response.json()
# Extract employee count from search results
for result in data.get("organic_results", []):
snippet = result.get("snippet", "").lower()
if "employee" in snippet or "employees" in snippet:
# Look for numbers in the snippet
for word in snippet.split():
if word.isdigit():
return int(word)
except Exception as e:
print(f"Error searching for {company_name}: {e}")
return None
# Read the Excel file
input_file = "companies.xlsx"
output_file = "companies_with_employee_count.xlsx"
df = pd.read_excel(input_file)
# Check if the 'Employee Count' column exists, if not, add it
if "Employee Count" not in df.columns:
df["Employee Count"] = None
# Iterate through the companies and update employee count
for index, row in df.iterrows():
company_name = row["Company Name"]
if pd.isna(row["Employee Count"]): # Skip if already filled
employee_count = get_employee_count(company_name)
df.at[index, "Employee Count"] = employee_count
print(f"Processed {company_name}: {employee_count} employees")
# Save the updated DataFrame to a new Excel file
df.to_excel(output_file, index=False)
print(f"Data saved to {output_file}")