DSC Lab Programs
DSC Lab Programs
# pgm 1
import numpy as np
matrix1 = [
[3, 4, 2],
[5, 1, 8],
[3, 1, 9]
]
matrix2 = [
[3, 7, 5],
[2, 9, 8],
[1, 5, 8]
]
# pgm 2
import numpy as np
n = 8
# pgm 3
import numpy as np
# pgm 4
import numpy as np
array = np.array([
[3, 7, 1],
[10, 3, 2],
[5, 6, 7]
])
print(array)
print()
# pgm 5
import numpy as np
print(np.argsort(array))
reversedArray = np.flipud(array)
print(reversedArray)
# pgm 6
import numpy as np
list = [
np.array([3, 2, 8, 9]),
np.array([4, 12, 34, 25, 78]),
np.array([23, 12, 67])
]
result = []
for i in range(len(list)):
result.append(np.mean(list[i]))
print(result)
# pgm 7
import numpy as np
the_array = np.array([])
is_empty = the_array.size == 0
print(is_empty)
# pgm 8
import numpy as np
print(array1 - array2)
print("-" * 20)
print(array1 * array2)
print("-" * 20)
print(array2 / array1)
print("-" * 40)
print(array1 ** array2)
print("-" * 40)
# pgm 9
#NumPy Aggregate and Statistical Functions
import numpy as np
# pgm 10
#How to print a NumPy array without scientific notation in Python?Suppress
Scientific Notation
import numpy as np
np.set_printoptions(suppress=True,formatter={'float_kind': '{:f}'.format})
# pgm 11
#Sum of all elements
import numpy as np
newarr = arr.reshape(4, 3)
column_sums = newarr[:, :].sum()
print(column_sums)
# pgm 12
#How do you find the mean across a column in Python?Calculate mean values across a
column
import numpy as np
mean_array = the_array.mean(axis=1)
print(mean_array)
# pgm 13
#How do you convert a one dimensional array to a two dimensional array in Python?4
Rows with 2 Cols
import numpy as np
newarr = arr.reshape(4, 2)
print(newarr)
newarr = arr.reshape(2, 4)
print(newarr)
import numpy as np
fac = np.vectorize(factorial)
if __name__ == "__main__":
print("Actual:", e ** 3) # Using e from the standard library
print("N (terms)\tMaclaurin\tError")
# pgm 15
prod = np.prod(the_array)
print(prod)
prod = np.prod(the_array, 0)
print(prod)
prod = np.prod(the_array, 1)
print(prod)
prod = np.prod(the_array)
print(prod)
# pgm 1
# import pandas as pd
import pandas as pd
# list of strings
lst = ['Good', 'Morning', 'Have', 'a',
'nice', 'day', 'Welcome']
# pgm 2
# Python code demonstrate creating
# DataFrame from dict narray / lists
# By default addresses.
import pandas as pd
# Create DataFrame
df = pd.DataFrame(data)
# pgm 3
# Import pandas package
import pandas as pd
# pgm 4
# pgm 5
print(first)
# pgm 6
# pgm 7
import pandas as pd
# pgm 8
# Pandas Series in order to find null values in a series.
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score': [100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score': [np.nan, 40, 80, 98]}
# pgm 9
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score': [100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score': [np.nan, 40, 80, 98]}
# pgm 10
# Now we drop rows with at least one Nan value (Null value)
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score': [100, 90, np.nan, 95],
'Second Score': [30, np.nan, 45, 56],
'Third Score': [52, 40, 80, 98],
'Fourth Score': [np.nan, np.nan, np.nan, 65]}
# pgm 11
# n order to iterate over rows, we can use three function iteritems(), iterrows(),
itertuples() .
#These three function will help in iteration over rows.
# importing pandas as pd
import pandas as pd
# dictionary of lists
dict = {'name': ["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score': [90, 40, 80, 98]}
print(df)
# pgm 12
# Now we apply iterrows() function in order to get a each element of rows.
# importing pandas as pd
import pandas as pd
# dictionary of lists
dict = {'name': ["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score': [90, 40, 80, 98]}
# pgm 13
# importing pandas as pd
import pandas as pd
# dictionary of lists
dict = {'name': ["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score': [90, 40, 80, 98]}
print(df)
4 Reading data from text files, Excel and the web and exploring various
commands for doing descriptive analytics on the Iris data set
print("describe")
print(data.describe())
print("create another column in the data frame with number for different species")
Target = []
for i in range(len(data['Species'])):
if data['Species'][i] == "Iris-setosa":
Target.append("1")
elif data['Species'][i] == 'Iris-versicolor':
Target.append("2")
else:
Target.append('3')
data['Target'] = Target
print(data.to_string)
print("draw a scatterplot")
data.plot.scatter(x='PetalLengthCm', y='PetalWidthCm')
plt.show()
print(" plot the petal length and width using hexagonal binning ")
data.plot.hexbin(x="PetalLengthCm", y="PetalWidthCm", gridsize=25)
plt.show()
5 Use the diabetes data set from UCI and Pima Indians Diabetes data set for
performing the following:
a. Univariate analysis: Frequency, Mean, Median, Mode, Variance, Standard
Deviation, Skewness and Kurtosis.
b. Bivariate analysis: Linear and logistic regression modelling
c. Multiple Regression analysis
d. Also compare the results of the above analysis for the two data sets
print("Finding if there are any null and Zero values in the data set")
print(diab.isnull().values.any())
print("we’ll drop 0 values and create a our new dataset which can be used for
further analysis")
'''Creating a dataset called 'dia' from original dataset 'diab' with excludes all
rows with have
zeros only for Glucose,BP, Skinthickness, Insulin and BMI, as other columns can
contain
Zero values'''
drop_Glu=diab.index[diab.Glucose == 0].tolist()
drop_BP=diab.index[diab.BloodPressure == 0].tolist()
drop_Skin = diab.index[diab.SkinThickness==0].tolist()
drop_Ins = diab.index[diab.Insulin==0].tolist()
drop_BMI = diab.index[diab.BMI==0].tolist()
c=drop_Glu+drop_BP+drop_Skin+drop_Ins+drop_BMI
dia=diab.drop(diab.index[c])
print(dia.info())
print(dia.describe())
dia1 = dia[dia.Outcome==1]
dia0 = dia[dia.Outcome==0]
print(dia0)
print("calculate Regression")
import numpy as np
import matplotlib.pyplot as plt
# putting labels
plt.xlabel('x')
plt.ylabel('y')
def main():
# observations / data
x = diab["Glucose"]
y = diab["Age"]
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
if __name__ == "__main__":
main()
print("calculate Regression")
import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
# putting labels
plt.xlabel('x')
plt.ylabel('y')
def main():
# observations / data
x = data1_bool["Obesity"]
y = data1_bool["Age"]
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
if __name__ == "__main__":
main()
6 Apply and explore various plotting functions on UCI data sets.
a. Normal curves
b. Density and contour plots
c. Correlation and scatter plots
d. Histograms
e. Three-dimensional plotting
print("diabetes dataset")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print("Finding if there are any null and Zero values in the data set")
print(diab.isnull().values.any())
print("find out how many Zero values are included in each variable")
print((diab.Pregnancies == 0).sum(),(diab.Glucose==0).sum(),
(diab.BloodPressure==0).sum(),(diab.SkinThickness==0).sum(),
(diab.Insulin==0).sum(),(diab.BMI==0).sum(),
(diab.DiabetesPedigreeFunction==0).sum(),(diab.Age==0).sum())
print("we’ll drop 0 values and create a our new dataset which can be used for
further analysis")
## Creating a dataset called 'dia' from original dataset 'diab' with excludes all
rows with have zeros only for Glucose, BP, Skinthickness, Insulin and BMI, as other
columns can contain Zero values.
drop_Glu=diab.index[diab.Glucose == 0].tolist()
drop_BP=diab.index[diab.BloodPressure == 0].tolist()
drop_Skin = diab.index[diab.SkinThickness==0].tolist()
drop_Ins = diab.index[diab.Insulin==0].tolist()
drop_BMI = diab.index[diab.BMI==0].tolist()
c=drop_Glu+drop_BP+drop_Skin+drop_Ins+drop_BMI
dia=diab.drop(diab.index[c])
print(dia.info())
print(dia.describe())
dia1 = dia[dia.Outcome==1]
dia0 = dia[dia.Outcome==0]
print(dia0)
print("create another column in the data frame with number for different species")
Target = []
for i in range(len(diab['Outcome'])):
if diab['Outcome'][i] == 0:
Target.append("Negative")
else:
Target.append('Positive')
diab['Target'] = Target
print(diab.to_string)
print("Multivariate Plots")
# Compute the correlation matrix
correlations = diab.corr(method = 'pearson')
# Setup the bounding box for the zoom and bounds of the map
bbox = [my_coords[0]-zoom_scale,my_coords[0]+zoom_scale,\
my_coords[1]-zoom_scale,my_coords[1]+zoom_scale]
plt.figure(figsize=(12,6))
# Define the projection, scale, the corners of the map, and the resolution.
m = Basemap(projection='merc',llcrnrlat=bbox[0],urcrnrlat=bbox[1],\
llcrnrlon=bbox[2],urcrnrlon=bbox[3],lat_ts=10,resolution='i')
#empty circle
plt.figure(figsize=(8, 8))
m = Basemap(projection='ortho', resolution=None, lat_0=50, lon_0=-100)
m.bluemarble(scale=0.5);
plt.title("Empty circle", fontsize=18)
plt.show()
9. Reading data from text files, Excel and the web and exploring various
commands for doing descriptive analytics on the Housing dataset.
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
housing = pd.read_csv('D:\edith esther\CS3361 DSC LAB\housing.csv')
print(housing.shape)
print(housing.head())
print(housing.head(10))
print(housing.tail())
print(housing.plot("median_income", "median_house_value"))
plt.show()
print(housing.plot.scatter("median_income", "median_house_value"))
plt.show()
x_train, x_test, y_train, y_test = train_test_split(housing.median_income,
housing.median_house_value,
test_size = 0.2)
regr = LinearRegression()
regr.fit(np.array(x_train).reshape(-1,1), y_train)
preds = regr.predict(np.array(x_test).reshape(-1,1))
print(y_test.head())
print(preds)
residuals = preds - y_test
print(plt.hist(residuals))
plt.show()
print(mean_squared_error(y_test, preds) ** 0.5)