Combined
Combined
replace('[^0-9]','')
# ## Importing pandas
# In[1]:
import pandas as pd
# ## Import CSV
# In[2]:
df=pd.read_csv('Book1.csv')
# In[3]:
df
# ## Rename columns
# In[4]:
df =
df.rename(columns={'Mat':'matches','NO':'Not_Outs','HS':'Highest_Inns_Score','BF':'
Balls_faced','SR':'Batting_Strike_Rate'})
# In[5]:
df
# In[6]:
df.isnull().any()
# In[7]:
df[df['Balls_faced'].isna()==1]
# In[8]:
df['Balls_faced'] = df['Balls_faced'].fillna(0)
# In[9]:
df['Batting_Strike_Rate'] = df['Batting_Strike_Rate'].fillna(0)
# In[10]:
# ## Drop duplicates
# In[11]:
df.duplicated()
# In[12]:
df[df['Player'].duplicated()==1]
# In[13]:
# In[14]:
df = df.drop_duplicates()
# In[15]:
df.drop(10, inplace=True)
# In[16]:
df['Span'].str.split(pat='-')
# In[17]:
df['Rookie_Year'] = df['Span'].str.split(pat='-').str[0]
df['Retirment_Year'] = df['Span'].str.split(pat='-').str[1]
# In[18]:
df
# In[19]:
df = df.drop(['Span'], axis=1)
# In[20]:
df.head()
# In[21]:
df['Player'].str.split(pat='(')
# In[22]:
df['Country'] = df['Player'].str.split(pat='(').str[1]
# In[23]:
df['Country'] = df['Country'].str.split(pat=')').str[0]
# In[24]:
# In[25]:
df['Player']
# In[26]:
df.head()
# ## Change datatypes
# In[27]:
df.dtypes
# In[28]:
df['Highest_Inns_Score'].str.split(pat = '*')
# In[29]:
# In[30]:
df.head()
# In[31]:
df['Highest_Inns_Score'] = df['Highest_Inns_Score'].astype('int')
# In[32]:
df.dtypes
# In[33]:
df = df.astype({'Rookie_Year':'int','Retirment_Year':'int'})
# In[34]:
df.dtypes
# In[35]:
# In[36]:
df['matches'] = df['matches'].astype('int')
# ## Balls faced
# In[37]:
df
# In[38]:
# In[39]:
df = df.drop(57, axis = 0)
# In[40]:
df = df.drop(8, axis = 0)
# In[41]:
df = df.drop(16, axis = 0)
# In[42]:
df['Balls_faced'] = df['Balls_faced'].astype('int')
# In[43]:
df['Batting_Strike_Rate'] = df['Batting_Strike_Rate'].astype('float')
# In[44]:
df.dtypes
# ## Career length
# In[45]:
df['Career_Length']=df['Retirment_Year']-df['Rookie_Year']
# In[46]:
df
# In[47]:
df['Career_Length'].mean()
# In[48]:
df[df['Career_Length']>10]['Batting_Strike_Rate'].mean()
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
#Adding attribute
SELECT *,
CASE
WHEN hire_date >= '2019-01-01' THEN 'New'
ELSE 'Standard'
END AS employee_type
FROM
databaseprac.employees;
#removing outliers
SELECT
first_name,
last_name,
NULLIF(title, 'Honorable') AS title
FROM
databaseprac.employees;
#Removing duplicates
SELECT
DISTINCT first_name
FROM
databaseprac.employees;
#Inserting elements
INSERT INTO
databaseprac.employees(id,first_name,last_name,title,age,wage,hire_date)
VALUES(6,'Ahmad','Elhaj','Dr.',36,12,'2022-01-02');
#Delete elements
SET SQL_SAFE_UPDATES = 0;
DELETE FROM databaseprac.employees WHERE `first_name`='Ahmad';
..
##PieChart
# Sample data
labels = ['A', 'B', 'C', 'D']
sizes = [15, 30, 45, 10]
##Box Plot
# Sample data
categories = ['A', 'B', 'C', 'D']
data = [[10, 15, 20, 25, 30],
[15, 20, 25, 30, 35],
[20, 25, 30, 35, 40],
[25, 30, 35, 40, 45]]
##Scater Plot
# Sample data
x = [1, 2, 3, 4, 5]
y = [2, 3, 5, 7, 11]
##Bar Chart
# Sample data
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]
##Histogram
# Sample data
data = [1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9]
# Create histogram
plt.hist(data, edgecolor='black')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram')
plt.show()