0% found this document useful (0 votes)
7 views9 pages

Combined

The document contains Python code for data manipulation using pandas, including importing a CSV file, renaming columns, handling null values, and splitting data into new columns. It also includes SQL queries for managing employee data, such as selecting, inserting, updating, and deleting records. Additionally, the document features code for creating various data visualizations using matplotlib, including pie charts, box plots, scatter plots, bar charts, and histograms.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views9 pages

Combined

The document contains Python code for data manipulation using pandas, including importing a CSV file, renaming columns, handling null values, and splitting data into new columns. It also includes SQL queries for managing employee data, such as selecting, inserting, updating, and deleting records. Additionally, the document features code for creating various data visualizations using matplotlib, including pie charts, box plots, scatter plots, bar charts, and histograms.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 9

df["phone_number"].str.

replace('[^0-9]','')
# ## Importing pandas

# In[1]:

import pandas as pd

# ## Import CSV

# In[2]:

df=pd.read_csv('Book1.csv')

# In[3]:

df

# ## Rename columns

# In[4]:

df =
df.rename(columns={'Mat':'matches','NO':'Not_Outs','HS':'Highest_Inns_Score','BF':'
Balls_faced','SR':'Batting_Strike_Rate'})

# In[5]:

df

# ## Check for null values

# In[6]:

df.isnull().any()

# In[7]:

df[df['Balls_faced'].isna()==1]

# In[8]:

df['Balls_faced'] = df['Balls_faced'].fillna(0)
# In[9]:

df['Batting_Strike_Rate'] = df['Batting_Strike_Rate'].fillna(0)

# In[10]:

df[df['Player']=='ED Weekes (WI)']

# ## Drop duplicates

# In[11]:

df.duplicated()

# In[12]:

df[df['Player'].duplicated()==1]

# In[13]:

df[df['Player'].isin(['GS Sobers (WI)','JB Hobbs (ENG)','Younis Khan (PAK)'])]

# In[14]:

df = df.drop_duplicates()

# In[15]:

df.drop(10, inplace=True)

# ## Split up span into start and end date

# In[16]:

df['Span'].str.split(pat='-')

# In[17]:

df['Rookie_Year'] = df['Span'].str.split(pat='-').str[0]
df['Retirment_Year'] = df['Span'].str.split(pat='-').str[1]
# In[18]:

df

# ## Drop the span column

# In[19]:

df = df.drop(['Span'], axis=1)

# In[20]:

df.head()

# ## Split the country from the player

# In[21]:

df['Player'].str.split(pat='(')

# In[22]:

df['Country'] = df['Player'].str.split(pat='(').str[1]

# In[23]:

df['Country'] = df['Country'].str.split(pat=')').str[0]

# In[24]:

df['Player'] = df['Player'].str.split(pat = '(').str[0]

# In[25]:

df['Player']

# In[26]:

df.head()

# ## Change datatypes
# In[27]:

df.dtypes

# In[28]:

df['Highest_Inns_Score'].str.split(pat = '*')

# In[29]:

df['Highest_Inns_Score'] = df['Highest_Inns_Score'].str.split(pat = '*').str[0]

# In[30]:

df.head()

# In[31]:

df['Highest_Inns_Score'] = df['Highest_Inns_Score'].astype('int')

# In[32]:

df.dtypes

# ## for rookie and final year

# In[33]:

df = df.astype({'Rookie_Year':'int','Retirment_Year':'int'})

# In[34]:

df.dtypes

# In[35]:

df['matches'] = df['matches'].str.split(pat = '*').str[0]

# In[36]:
df['matches'] = df['matches'].astype('int')

# ## Balls faced

# In[37]:

df

# In[38]:

df['Balls_faced'] = df['Balls_faced'].str.split(pat = '+').str[0]

# In[39]:

df = df.drop(57, axis = 0)

# In[40]:

df = df.drop(8, axis = 0)

# In[41]:

df = df.drop(16, axis = 0)

# In[42]:

df['Balls_faced'] = df['Balls_faced'].astype('int')

# In[43]:

df['Batting_Strike_Rate'] = df['Batting_Strike_Rate'].astype('float')

# In[44]:

df.dtypes

# ## Career length

# In[45]:
df['Career_Length']=df['Retirment_Year']-df['Rookie_Year']

# In[46]:

df

# ## Average career length

# In[47]:

df['Career_Length'].mean()

# ## Avg batting strike rate and played over 10 years

# In[48]:

df[df['Career_Length']>10]['Batting_Strike_Rate'].mean()

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.

SELECT * FROM databaseprac.employees;

#Adding attribute
SELECT *,
CASE
WHEN hire_date >= '2019-01-01' THEN 'New'
ELSE 'Standard'
END AS employee_type
FROM
databaseprac.employees;

#changing null values


SELECT
first_name,
last_name,
coalesce(NULLIF(title,''), 'No title') AS title
FROM
databaseprac.employees;

#removing outliers
SELECT
first_name,
last_name,
NULLIF(title, 'Honorable') AS title
FROM
databaseprac.employees;

#CHange type of attributes


SELECT
first_name,
last_name,
CAST(age as CHAR) AS age
FROM
databaseprac.employees;

#Removing duplicates
SELECT
DISTINCT first_name
FROM
databaseprac.employees;

#Inserting elements
INSERT INTO
databaseprac.employees(id,first_name,last_name,title,age,wage,hire_date)
VALUES(6,'Ahmad','Elhaj','Dr.',36,12,'2022-01-02');

#Delete elements
SET SQL_SAFE_UPDATES = 0;
DELETE FROM databaseprac.employees WHERE `first_name`='Ahmad';

#Update the table


SET SQL_SAFE_UPDATES = 0;
UPDATE databaseprac.employees
SET `title`= COALESCE(NULLIF(title,''), 'NO TITLE');
SELECT * FROM databaseprac.employees;
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.

..

##PieChart

import matplotlib.pyplot as plt

# Sample data
labels = ['A', 'B', 'C', 'D']
sizes = [15, 30, 45, 10]

# Create pie chart


plt.pie(sizes, labels=labels, autopct='%1.1f%%')
plt.title('Pie Chart')
plt.show()

##Box Plot

# Sample data
categories = ['A', 'B', 'C', 'D']
data = [[10, 15, 20, 25, 30],
[15, 20, 25, 30, 35],
[20, 25, 30, 35, 40],
[25, 30, 35, 40, 45]]

# Create box plot


plt.boxplot(data, labels=categories)
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Box Plot for Category Across Numeric')
plt.show()

##Scater Plot

# Sample data
x = [1, 2, 3, 4, 5]
y = [2, 3, 5, 7, 11]

# Create scatter plot


plt.scatter(x, y)
plt.xlabel('X-axis label')
plt.ylabel('Y-axis label')
plt.title('Scatter Plot')
plt.show()

##Bar Chart

# Sample data
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]

# Create bar chart


plt.bar(categories, values)
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart')
plt.show()

##Histogram

# Sample data
data = [1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9]

# Create histogram
plt.hist(data, edgecolor='black')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram')
plt.show()

You might also like