0% found this document useful (0 votes)
18 views

Py 2

The document contains examples demonstrating various pandas functionality like concatenating DataFrames, summarizing DataFrames, indexing and slicing Series, plotting clustering results, and more. The examples cover a wide range of pandas topics.

Uploaded by

Aaditya Khanna
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

Py 2

The document contains examples demonstrating various pandas functionality like concatenating DataFrames, summarizing DataFrames, indexing and slicing Series, plotting clustering results, and more. The examples cover a wide range of pandas topics.

Uploaded by

Aaditya Khanna
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

import pandas as pd

left=pd.DataFrame({
"id":[1,2,3,4,5],
"a":["Ent1","Ent2","Ent3","Ent4","Ent5"],
"b":["sub1","sub2","sub4","sub6","sub5"]})

right=pd.DataFrame({
"id":[1,2,3,4,5],
"a":["Enta","Entb","Entc","Entd","Ente"],
"b":["sub2","sub4","sub3","sub6","sub5"]})

frames=[left,right]
print(pd.concat(frames,sort=False))

import pandas as pd
d={"Name":pd.Series(["Tom","James","Ricky","Vin","Steve","Smith","Jack","Lee","David","Gasper","Be
tina","Andres"]),
"Age":pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
"Rating":pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}
df=pd.DataFrame(d)
print(df.sum(0))

import pandas as pd
from datetime import datetime
from datetime import date

'''
#System Date
print(date.today())

#System Date and Time


print(datetime.today())

#System Time
print(datetime.now().time())
'''

start=datetime(2011,5,1)
index=pd.date_range(start,periods=10,freq="S")
print(index)

import pandas as pd
data={"a":0.536,"b":1.456,"c":2.7852}
s=pd.Series(data)

#mat=s.as_matrix()
mat=s.values
print(mat)

import pandas as pd
df=pd.DataFrame([[0.23,1.56,-0.45],
[1.2,-2.1,1.5],
[-5.1,3.2,-6.5],
[0.25,-0.36,-89],
[0.39,-0.78,-1.58]],index=["a","c","e","f","h"],columns=["one","two","three"])
df=df.reindex(["a","b","c","d","e","f","g","h"])

print(df["one"].sum())

import pandas as pd
s=pd.Series([1,2,3,4,5],index=["a","b","c","d","e"])
print(s[:3])

import matplotlib.pyplot as plt


import numpy as np
from sklearn.cluster import KMeans
X=np.array([[5,3],
[10,15],
[15,12],
[60,78],
[55,52],
[80,91],])

kmeans=KMeans(n_clusters=2)
kmeans.fit(X)
plt.scatter(X[:,0],X[:,1],c=kmeans.labels_,cmap='rainbow')
plt.show()

import networkx as nx
import matplotlib.pyplot as plt
my_graph=nx.Graph()

my_graph.add_edges_from([
("a","b"),
(2,3),
(3,4),
(4,5),
(2,4)])

nx.draw(my_graph,with_labels=True,font_weight='bold')
plt.show()

import numpy as np
from sklearn.decomposition import PCA

X=np.array([[-1,-1,2],
[-2,-1,3],
[-3,-2,5]])

pca=PCA(n_components=3)
pca.fit(X)
print(pca.transform(X).shape)

import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X=np.array([[-1,-1],
[-2,-5],
[200,800],
[3,1],
[2,3],
[50,10],
[-1,-3]])

Y=np.array([0,0,2,1,1,2,0])
clf=LinearDiscriminantAnalysis()
clf.fit(X,Y)
print(clf.predict([[-1,-3]]))

from sklearn.metrics import accuracy_score

y_pred=[1,1,0,1,0,0,1,0,0,0,1,1,0]
y_true=[1,0,0,1,0,0,1,1,1,0,1,0,0]
print(accuracy_score(y_true,y_pred))

import pandas as pd

df=pd.DataFrame({"A":[2,4,8,0],
"B":[2,0,0,0],
"C":[10,2,1,8],
"D":[4,6,1,0]},
index=["W","X","Y","Z"])

df["A"].sample(n=3,random_state=1)
print(df)

def sayHello():
print("Hello World")

sayHello()
sayHello()

class tester:
def __init__(self,id):
self.id=str(id)
id="224"

temp=tester(12)
print(temp.id)

values=[1,2,3,4]
numbers=set(values)

def checknums(num):
if num in numbers:
return True
else:
return False

for i in filter(checknums,values):
print(i)

f=open("Counter_input.txt","r")
f.close()
print(f.closed)
f=open("Counter_input.txt","r")
print(f.closed)

import pandas as pd
d={"Name":pd.Series(["Tom","James","Ricky","Steve","Steve","Smith","Back","Bee","Bavid","","Basper
","Andres"]),
"Age":pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
"Rating":pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}

df=pd.DataFrame(d)
#Bydefault describe function will give number values.
print(df.describe(include='all'))

import pandas as pd
d={"Name":pd.Series(["Tom","James","Ricky","Van","Steve","Smith","Jack","Lee","David","Gasper","B
etina","Andres"]),
"Age":pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
"Rating":pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}

df=pd.DataFrame(d)
print(df.sum(0))

from sklearn.metrics import confusion_matrix

y_true=[2,0,2,2,0,1]
y_pred=[0,0,2,2,0,2]

print(confusion_matrix(y_true,y_pred))

from scipy.spatial import distance


import pandas as pd

p=pd.Series([1,2,3])
q=pd.Series([1,3,5])

print(distance.euclidean(p,q))

import numpy as np
from sklearn.decomposition import PCA
print(pca.explained_variance_ratio_[1])

import matplotlib.pyplot as plt


import networkx as nx

G=nx.Graph()
G.add_node("a")
G.add_nodes_from(["b","c"])
G.add_edge(1,2)
edge=("d","e")
G.add_edge(*edge)
edge=("a","b")
G.add_edge(*edge)
G.add_edges_from([("a","c"),("c","d"),("a",1),(1,"d"),("a",2)])
nx.draw(G)
plt.show()

#PEMDAS Rule

X=4+2**5//10
print(X)

s="python"
print(list(s))

import pandas as pd
import numpy as np

df=pd.DataFrame({"A":[1,5,3,4,2],
"B":[3,2,4,3,4],
"C":[2,2,7,3,4],
"D":[4,3,6,12,7]},
columns=["A","B","C","D"])

#Row - 0
#Column - 1
print(df.apply(np.mean,axis=1))

import pandas as pd
d={"Name":pd.Series(["Tom","James","Ricky","Van","Steve","Smith","Jack","Lee","David","Gasper","B
etina","Andres"]),
"Age":pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),
"Rating":pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}

df=pd.DataFrame(d)
print(df.std(0))

import pandas as pd
df=pd.DataFrame([[-1,-2,-5,-8],
[-9,-3,-5,-1],
[-1,-2,-6,-3]],
index=pd.date_range("1/1/2000",periods=3),
columns=["A","B","C","D"])
print(df.rolling(window=2).mean())

import pandas as pd
import numpy as np

s=["a","b",np.nan]
print(pd.get_dummies(s))

import pandas as pd

df1=pd.DataFrame([[2.23,4.00,-6.56],
[8.12,-10,-12.89],
[14.23,-1.36,-1.8],
[20.56,-3.2,60.3]],
columns=["col1","col2","col3"])

print(df1.rename(columns={"col1":"column1","col2":"column2"},
index={0:"apple",1:"banana",2:"durian"}))
import pandas as pd
import numpy as np

X=pd.DataFrame(np.arange(29.5).reshape((6,5)))
rows=X.sample(n=3)
print("Sampled rows: \n",rows)

print(1=="1")

import pandas as pd
df=pd.DataFrame({"A":[2,4,8,0],
"B":[2,0,0,0],
"C":[10,2,1,8],
"D":[4,6,1,0]},index=["W","X","Y","Z"])

df["A"].sample(n=4,random_state=4)
print(df)

import pandas as pd
import numpy as np
df1=pd.DataFrame([[2.23,4.00,-6.56],
[8.12,-10,-12.89],
[14.23,-1.36,-1.8],
[20.56,-3.2,60.3]],columns=["col1","col2","col3"])

print(df1.reindex([0,1,2,3,4,5],method="ffill",limit=1))

import pandas as pd
import numpy as np
mylist=list("abcdefghijklmnopqrstuvwxyz")
myarr=np.arange(26)
mydict=dict(zip(mylist,myarr))
ser=pd.Series(mydict)

df=ser.to_frame().reset_index()
print(df.head(5))
import pandas as pd
import numpy as np
s=pd.Series(["1. Ant. ","2. Bee!\n","3. Cat?\t",np.nan])
print(s.str.rstrip(".!? \n\t"))
weather=['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy']
temp=['Hot','Hot','Hot','Mild','Cool','Cool']
play=['No','No','Yes','Yes','Yes','No']

from sklearn import preprocessing


le=preprocessing.LabelEncoder()
weather_encoded=le.fit_transform(weather)
print(weather_encoded)#[2,2,0,1,1,1]

temp_encoded=le.fit_transform(temp)
label=le.fit_transform(play)

print("Temp",temp_encoded)#[1,1,1,2,0,0]
print("Play",label)#[0,0,1,1,1,0]

features=zip(weather_encoded,temp_encoded)
print(features)

You might also like