457 Labs
457 Labs
Integers
#Integers are number without decimals
type(1)
print(1)
floats
#floats are number with decimals
type (4.5)
print (4.5)
4.5
Variable Definitions
age= 19
print (age)
19
string
##strings contain a sequence of character
color = "blue"
print (color)
type(color)
age= 25
print ("my age is: ", age)
print("A students")
blue
my age is: 25
A students
String indexing
# This is string
# String M o h d
# Index 0 1 2 3
"Mohd"
'Mohd'
my_string="Mohd"
my_string[0]
'M'
my_string[1]
'o'
my_string[2]
'h'
my_string[3]
'd'
my_string[4]
----------------------------------------------------------------------
-----
IndexError Traceback (most recent call
last)
Cell In[17], line 1
----> 1 my_string[4]
my_string="Mohd"
my_string[-1]
my_string[-2]
my_string[-3]
my_string[-4]
String Slicing
# string_variable [start:stop:step]
# we will apply the stop and step / string_variable>[start:stop]
Mohadhasanali="Mohadhasanali"
Mohadhasanali[0:5]
Mohadhasanali[5:10]
Mohadhasanali[10:13]
f-strings
# to define f-string we just add an f before the single or double
quotes
# within the string we surround the variables or experession with
curly braces {}
# This replaces their value in the string when we run the program
first_name="Mohamed"
favorite_language="python"
print(f"Hi, I'm {first_name}. I'm learning {favorite_language}.")
value = 20
print (f"{value} multiplied by 3 is:{value * 3}")
Booleans
# True and False
type(true)
type(false)
type(True)
type(False)
lists
#list of numbers
[1, 2, 3, 4, 5]
#float
[3.4, 2.4, 2.6, 3.5]
#letters
["a", "b", "c", "d"]
letters[0]="mohd"
letters[1]="hasan"
letters[2]="ali"
letters
Tuples
print (1, 2, 3, 4, 5)
print ("a", "b", "c", "d")
print (3.4, 2.4, 2.6, 3.5)
my_tuple=(1, 2, 3, 4, 5)
my_tuple[0]
my_tuple[1]
my_tuple[2]
my_tuple[3]
my_tuple[4]
my_tuple[6]
my_tuple[-1]
my_tuple[-2]
my_tuple[-3]
my_tuple[-4]
my_tuple[-5]
my_tuple[-6]
Tuple length
my_tuple=(1, 2, 3, 4, 100, 50, 60)
len(my_tuple)
Nasted Tuples
my_tuple=([1, 2, 3], (4, 5, 6))
my_tuple[2]
my_tuple[1]
my_tuple[0]
Tuple assignment
a, b= 1,2
a
a=1
b=2
a, b=b, a
a
Dectionaries
{"a":1, "b":2, "c":3}
my_dict={"a":1, "b":2, "c":3}
print(my_dict)
{'a': 1, 'b': 2, 'c': 3}
mc = pd.read_excel("mc.xlsx")
mc.head()
1 2014-03-08 38 11 ... 5 0
3 2014-02-10 26 11 ... 6 0
[5 rows x 29 columns]
mc.tail()
[5 rows x 29 columns]
mc.head(10)
1 2014-03-08 38 11 ... 5 0
3 2014-02-10 26 11 ... 6 0
7 2013-05-08 32 76 ... 8 0
8 2013-06-06 19 14 ... 9 0
9 2014-03-13 68 28 ... 20 1
mc.columns
64960
mc.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 29 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 2240 non-null int64
1 Year_Birth 2240 non-null int64
2 Education 2240 non-null object
3 Marital_Status 2240 non-null object
4 Income 2216 non-null float64
5 Kidhome 2240 non-null int64
6 Teenhome 2240 non-null int64
7 Dt_Customer 2240 non-null datetime64[ns]
8 Recency 2240 non-null int64
9 MntWines 2240 non-null int64
10 MntFruits 2240 non-null int64
11 MntMeatProducts 2240 non-null int64
12 MntFishProducts 2240 non-null int64
13 MntSweetProducts 2240 non-null int64
14 MntGoldProds 2240 non-null int64
15 NumDealsPurchases 2240 non-null int64
16 NumWebPurchases 2240 non-null int64
17 NumCatalogPurchases 2240 non-null int64
18 NumStorePurchases 2240 non-null int64
19 NumWebVisitsMonth 2240 non-null int64
20 AcceptedCmp3 2240 non-null int64
21 AcceptedCmp4 2240 non-null int64
22 AcceptedCmp5 2240 non-null int64
23 AcceptedCmp1 2240 non-null int64
24 AcceptedCmp2 2240 non-null int64
25 Complain 2240 non-null int64
26 Z_CostContact 2240 non-null int64
27 Z_Revenue 2240 non-null int64
28 Response 2240 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(25), object(2)
memory usage: 507.6+ KB
mc.Marital_Status.value_counts()
Marital_Status
Married 864
Together 580
Single 480
Divorced 232
Widow 77
Alone 3
Absurd 2
YOLO 2
Name: count, dtype: int64
plt.figure(figsize = (15,10))
plt.hist(mc.Marital_Status)
(array([480., 580., 864., 0., 232., 77., 0., 3., 2., 2.]),
array([0. , 0.7, 1.4, 2.1, 2.8, 3.5, 4.2, 4.9, 5.6, 6.3, 7. ]),
<BarContainer object of 10 artists>)
mc.Education.value_counts()
Education
Graduation 1127
PhD 486
Master 370
2n Cycle 203
Basic 54
Name: count, dtype: int64
plt.figure(figsize=(15,10))
plt.hist(mc.Education)
mc.isnull().sum()
ID 0
Year_Birth 0
Education 0
Marital_Status 0
Income 24
Kidhome 0
Teenhome 0
Dt_Customer 0
Recency 0
MntWines 0
MntFruits 0
MntMeatProducts 0
MntFishProducts 0
MntSweetProducts 0
MntGoldProds 0
NumDealsPurchases 0
NumWebPurchases 0
NumCatalogPurchases 0
NumStorePurchases 0
NumWebVisitsMonth 0
AcceptedCmp3 0
AcceptedCmp4 0
AcceptedCmp5 0
AcceptedCmp1 0
AcceptedCmp2 0
Complain 0
Z_CostContact 0
Z_Revenue 0
Response 0
dtype: int64
mc["Income"].median()
51381.5
m=mc["Income"].median()
m
51381.5
mc["Income"].fillna(value=m,inplace=True)
/var/folders/kl/wl42mn2d1mzgnbj8qrc0vp5m0000gp/T/
ipykernel_5300/786660505.py:1: FutureWarning: A value is trying to be
set on a copy of a DataFrame or Series through chained assignment
using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never
work because the intermediate object on which we are setting values
always behaves as a copy.
mc["Income"].fillna(value=m,inplace=True)
mc.isnull().sum()
ID 0
Year_Birth 0
Education 0
Marital_Status 0
Income 0
Kidhome 0
Teenhome 0
Dt_Customer 0
Recency 0
MntWines 0
MntFruits 0
MntMeatProducts 0
MntFishProducts 0
MntSweetProducts 0
MntGoldProds 0
NumDealsPurchases 0
NumWebPurchases 0
NumCatalogPurchases 0
NumStorePurchases 0
NumWebVisitsMonth 0
AcceptedCmp3 0
AcceptedCmp4 0
AcceptedCmp5 0
AcceptedCmp1 0
AcceptedCmp2 0
Complain 0
Z_CostContact 0
Z_Revenue 0
Response 0
dtype: int64
1975
666666.0
1730.0
52237.97544642857
#BOX PLOT
plt.figure(figsize=(10, 6))
sns.boxplot(x="Income",data=mc, color='lightblue')
<Axes: xlabel='Income'>
#distribution & box plot of income
sns.distplot(mc.Income)
/var/folders/kl/wl42mn2d1mzgnbj8qrc0vp5m0000gp/T/
ipykernel_5300/3302670981.py:2: UserWarning:
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(mc.Income)
52237.97544642857
AcceptCmp=mc["AcceptedCmp1"]+mc["AcceptedCmp2"]+mc["AcceptedCmp3"]
+mc["AcceptedCmp4"]+mc["AcceptedCmp5"]
mc["AcceptCmp"]=AcceptCmp
mc.head(5)
1 0 0 0 0 3
2 0 0 0 0 3
3 0 0 0 0 3
4 0 0 0 0 3
[5 rows x 30 columns]
plt.hist(mc.AcceptCmp)
AcceptCmp
0 1777
1 325
2 83
3 44
4 11
Name: count, dtype: int64