Introduction To Data Science
Introduction To Data Science
Step1:Start
Step 2: Get the knowledge of input. Here we need 3
variables; a and b will be the user input and c will hold
the result.
Step 3: Declare a, b, c variables.
Step 4: Take input for a and b variable from the user.
Step 5: Know the problem and find the solution using
operators, data structures and logic
We need to multiply a and b variables so we use *
operator and assign the result to c.
That is c <- a * b
Step 6: Check how to give output, Here we need to print
the output. So write print c
Step 7: End
Example 1: Write an algorithm to find the maximum of all the elements present in the array.
Follow the algorithm approach as below:
List Comprehension
newlist = [expression for item in iterable if condition == True]
fruits = ["apple", "banana", "cherry", "kiwi", "mango"]
newlist = []
for x in fruits:
if "a" in x:
newlist.append(x)
print(newlist)
print(newlist)
thislist = ["orange", "mango", "kiwi", "pineapple", "banana"]
thislist.sort()
print(thislist)
set3 = set1.union(set2)
print(set3)
set1 = {"a", "b" , "c"}
set2 = {1, 2, 3}
set1.update(set2)
print(set1)
x = {"apple", "banana", "cherry"}
y = {"google", "microsoft", "apple"}
x.intersection_update(y)
print(x)
x = {"apple", "banana", "cherry"}
y = {"google", "microsoft", "apple"}
z = x.intersection(y)
print(z)
x = {"apple", "banana", "cherry"}
y = {"google", "microsoft", "apple"}
x.symmetric_difference_update(y)
print(x)
x = {"apple", "banana", "cherry"}
y = {"google", "microsoft", "apple"}
z = x.symmetric_difference(y)
print(z)
Python Dictionaries
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
print(len(thisdict))
String, int, boolean, and list data types:
thisdict = {
"brand": "Ford",
"electric": False,
"year": 1964,
"colors": ["red", "white", "blue"]
}
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
print(type(thisdict))
Accessing Items
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
x = thisdict["model"]
x = thisdict.keys()
x = thisdict.values()
x = thisdict.items()
Check if "model" is present in the dictionary:
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
if "model" in thisdict:
print("Yes, 'model' is one of the keys in the thisdict dictionary")
Change the "year" to 2018:
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict["year"] = 2018
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
Adding items
thisdict.update({"year": 2020})
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict["color"] = "red"
print(thisdict)
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict.update({"color": "red"})
The pop() method removes the item with the specified key name:
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict.pop("model")
print(thisdict)
The popitem() method removes the last inserted item (in versions before 3.7, a random item is
removed instead):
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
thisdict.popitem()
print(thisdict)
The del keyword removes the item with the specified key name:
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
del thisdict["model"]
print(thisdict)
The del keyword can also delete the dictionary completely:
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
del thisdict
print(thisdict) #this will cause an error because "thisdict" no longer exists.
Print all key names in the dictionary, one by one:
for x in thisdict:
print(x)
for x in thisdict.values():
print(x)
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
mydict = thisdict.copy()
print(mydict)
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
mydict = dict(thisdict)
print(mydict)
Nested Dictionaries
Create a dictionary that contain three dictionaries:
myfamily = {
"child1" : {
"name" : "Emil",
"year" : 2004
},
"child2" : {
"name" : "Tobias",
"year" : 2007
},
"child3" : {
"name" : "Linus",
"year" : 2011
}
}
child1 = {
"name" : "Emil",
"year" : 2004
}
child2 = {
"name" : "Tobias",
"year" : 2007
}
child3 = {
"name" : "Linus",
"year" : 2011
}
myfamily = {
"child1" : child1,
"child2" : child2,
"child3" : child3
}
A lambda function is a small anonymous function.
A lambda function can take any number of arguments, but can only have one expression.
The power of lambda is better shown when you use them as an anonymous function inside
another function.
def myfunc(n):
return lambda a : a * n
mydoubler = myfunc(2)
print(mydoubler(11))
Python Arrays
cars = ["Ford", "Volvo", "BMW"]
x = cars[0]
x = len(cars)
for x in cars:
print(x)
cars.append("Honda")
Delete the second element of the cars array:
cars.pop(1)
Delete the element that has the value "Volvo":
cars.remove("Volvo")
NumPy Creating Arrays
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)
print(type(arr))
print(np.__version__)
print(type(arr))
Create a 0-D array with value 42
arr = np.array(42)
Create a 1-D array containing the values 1,2,3,4,5:
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)
Create a 2-D array containing two arrays with the values 1,2,3 and 4,5,6:
import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr)
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])
import numpy as np
arr = np.array([1, 2, 3, 4])
print(arr[0])
print(arr[1])
print(arr[2] + arr[3])
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print('2nd element on 1st row: ', arr[0, 1])
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print('5th element on 2nd row: ', arr[1, 4])
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(arr[0, 1, 2])
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print('Last element from 2nd dim: ', arr[1, -1])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[1:5])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[4:])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[:4])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[-3:-1])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[1:5:2])
arr = np.array([1, 2, 3, 4, 5, 6, 7])
print(arr[::2])
From the second element, slice elements from index 1 to index 4 (not included):
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(arr[1, 1:4])
From both elements, return index 2:
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(arr[0:2, 2])
From both elements, slice index 1 to index 4 (not included), this will return a 2-D array
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(arr[0:2, 1:4])
import pandas as pd
mydataset = {
'cars': ["BMW", "Volvo", "Ford"],
'passings': [3, 7, 2]
}
myvar = pandas.DataFrame(mydataset)
print(myvar)
print(pd.__version__)
A Pandas Series is like a column in a table.
a = [1, 7, 2]
myvar = pd.Series(a)
print(myvar)
import pandas as pd
data = {
"calories": [420, 380, 390],
"duration": [50, 40, 45]
}
#load data into a DataFrame object:
df = pd.DataFrame(data)
print(df)
print(df.loc[0])
Pandas use loc attribute to return one or more rows
# importing pandas package
import pandas as pd
# making data frame from csv file
df = pd.read_csv("nba.csv")
# printing the first 10 rows of the dataframe
df[:10]
# Applying aggregation across all the columns
# sum and min will be found for each
# numeric type column in df dataframe
df.aggregate(['sum', 'min'])
# importing pandas package
import pandas as pd
# making data frame from csv file
df = pd.read_csv("nba.csv")
# We are going to find aggregation for these columns
df.aggregate({"Number":['sum', 'min'],
"Age":['max', 'min'],
"Weight":['min', 'sum'],
"Salary":['sum']})
# importing pandas as pd
import pandas as pd
# Creating the Series
sr = pd.Series(['New York', 'Chicago', 'Toronto', 'Lisbon', 'Rio'])
# Create the Index
index_ = ['City 1', 'City 2', 'City 3', 'City 4', 'City 5']
# set the index
sr.index = index_
# Print the series
print(sr)
# change 'Rio' to 'Montreal'
# we have used a lambda function
result = sr.apply(lambda x : 'Montreal' if x =='Rio' else x )
# Print the result
print(result)