Marvel Vs DC

Download as pdf or txt
Download as pdf or txt
You are on page 1of 1

In [1]: import pandas as pd

In [7]: data1 = pd.read_excel('Downloads/MarvelDC.xlsx', 'Marvel')

In [5]: data.head()

Out[5]:
Company Film Release Adjusted Worldwide Domestic Foreign

0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000

1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000

2 Marvel Spider-Man 2002 1088.227139 821.708551 403.706375 418.002176

3 Marvel Spider-Man 3 2007 1023.671832 890.871626 336.530303 554.341323

4 Marvel Spider-Man 2 2004 988.524779 783.766341 373.585825 410.180516

In [6]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Company 37 non-null object
1 Film 37 non-null object
2 Release 37 non-null int64
3 Adjusted 37 non-null float64
4 Worldwide 37 non-null float64
5 Domestic 37 non-null float64
6 Foreign 37 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 2.1+ KB

In [8]: data2 = pd.read_excel('Downloads/MarvelDC.xlsx', 'DC')

In [9]: data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Company 26 non-null object
1 Film 26 non-null object
2 Release 26 non-null int64
3 Adjusted 26 non-null float64
4 Worldwide 26 non-null float64
5 Domestic 26 non-null float64
6 Foreign 26 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 1.5+ KB

In [10]: data1.describe()

Out[10]:
Release Adjusted Worldwide Domestic Foreign

count 37.000000 37.000000 37.000000 37.000000 37.000000

mean 2006.297297 457.839381 409.866944 183.371500 235.234645

std 6.967643 378.577633 355.334076 132.979399 224.414327

min 1986.000000 0.018544 0.010173 8.050977 0.010173

25% 2003.000000 162.581808 131.183530 82.348319 72.661713

50% 2007.000000 392.498627 330.579719 161.675229 175.883639

75% 2012.000000 681.718448 644.783140 234.362462 410.180516

max 2014.000000 1575.847963 1518.594910 623.357910 895.237000

In [11]: #print the movie with minim world wide gross, and release year

In [12]: d= data1['Worldwide']

In [15]: data.loc[(d.idxmin()), ['Film','Release', 'Worldwide']]

Out[15]: Film Captain America


Release 1990
Worldwide 0.010173
Name: 36, dtype: object

In [16]: data.loc[(d.idxmax()), ['Film','Release', 'Worldwide']]

Out[16]: Film Avengers


Release 2012
Worldwide 1518.59491
Name: 0, dtype: object

In [17]: data1.describe()

Out[17]:
Release Adjusted Worldwide Domestic Foreign

count 37.000000 37.000000 37.000000 37.000000 37.000000

mean 2006.297297 457.839381 409.866944 183.371500 235.234645

std 6.967643 378.577633 355.334076 132.979399 224.414327

min 1986.000000 0.018544 0.010173 8.050977 0.010173

25% 2003.000000 162.581808 131.183530 82.348319 72.661713

50% 2007.000000 392.498627 330.579719 161.675229 175.883639

75% 2012.000000 681.718448 644.783140 234.362462 410.180516

max 2014.000000 1575.847963 1518.594910 623.357910 895.237000

In [18]: data2.describe()

Out[18]:
Release Adjusted Worldwide Domestic Foreign

count 26.000000 26.000000 26.000000 26.000000 26.000000

mean 1997.923077 342.719417 245.679767 130.799766 208.767570

std 12.601343 355.696167 287.491573 133.755522 156.876772

min 1966.000000 0.370471 0.192816 0.192816 0.356195

25% 1989.000000 35.463337 31.792626 27.744169 103.435164

50% 2003.500000 232.490153 182.262093 107.417497 163.080000

75% 2008.000000 460.762565 327.451362 178.731258 327.868943

max 2013.000000 1125.323899 1084.439099 534.858444 636.300000

In [ ]: DC: 254.7, 287.49, 182, 0.19 and 1084


MarveL: 409.8, 355.3, 330.5, 0.010, and 1518/59

In [19]: #Line graph


import seaborn as sns

In [20]: ax = sns.lineplot(data=data1, x='Release', y='Worldwide', marker ='o')


ax.set_title('Worldwide Gross-Marvel', color='green')

Out[20]: Text(0.5, 1.0, 'Worldwide Gross-Marvel')

In [21]: #TO identify whether there exists outliers


a = sns.histplot(data['Worldwide'])
a.set_title("Distribution of Wordlwide Gross- Marvel", color='green')
for i in a.containers:
a.bar_label(i)

In [ ]: #Outlier
income: 16 :5000 - 8000
2: 500, 1000
2: 50,000 and 100,000

In [ ]: 500-----1000----5000------80000----50000------------100,0000

In [22]: 5000+8000/2

Out[22]: 9000.0

In [ ]: #Whisker plot or box plot

In [23]: aa = sns.boxplot(data['Worldwide'])
a.set_title('Whisker plot', color='red')

/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a


keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(

Out[23]: Text(0.5, 1.0, 'Whisker plot')

In [ ]: # Percentile

In [24]: per25 = data['Worldwide'].quantile(0.25) # computing 25th percentile

In [25]: per75 = data['Worldwide'].quantile(0.75)# 75th percentile

In [26]: iqr = per75-per25 # Interquartile range

In [28]: upper = per75+1.5 * iqr # creating upper limit

In [29]: upper

Out[29]: 1415.1825549999999

In [30]: L = per25 - 1.5*iqr # Lower limit

In [31]: L

Out[31]: -639.215885

In [33]: out = data1.loc[(data1['Worldwide']>upper), ['Release', 'Film', 'Worldwide']] # Identifying the outlier from the dat
a

In [34]: out

Out[34]:
Release Film Worldwide

0 2012 Avengers 1518.59491

In [35]: lower = data.loc[(data['Worldwide']<L), ['Release', 'Film', 'Worldwide']]

In [36]: lower

Out[36]:
Release Film Worldwide

In [37]: data1.loc[(data1['Worldwide']>1000), ['Release', 'Film', 'Worldwide']]

Out[37]:
Release Film Worldwide

0 2012 Avengers 1518.594910

1 2013 Iron Man 3 1215.439994

In [40]: w = data1['Worldwide']
data1['World_Gross']= ' '
d = data1['World_Gross']

In [ ]: #<500 : <500
#500 - 1000: 500-1000
#>1000: >1000

In [41]: j = 0

In [42]: for i in w:
if i <500:
d.loc[j] = '<500'
elif i>=500 and i<=1000:
d.loc[j] = '500-1000'
else:
d.loc[j] = '>1000'
j=j+1

/var/folders/2p/sc3q6t7x3w3gns_3143jbz700000gn/T/ipykernel_5276/380579830.py:7: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning


-a-view-versus-a-copy
d.loc[j] = '>1000'
/var/folders/2p/sc3q6t7x3w3gns_3143jbz700000gn/T/ipykernel_5276/380579830.py:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning


-a-view-versus-a-copy
d.loc[j] = '500-1000'
/var/folders/2p/sc3q6t7x3w3gns_3143jbz700000gn/T/ipykernel_5276/380579830.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning


-a-view-versus-a-copy
d.loc[j] = '<500'

In [43]: data1.head()

Out[43]:
Company Film Release Adjusted Worldwide Domestic Foreign World_Gross

0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000 >1000

1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000 >1000

2 Marvel Spider-Man 2002 1088.227139 821.708551 403.706375 418.002176 500-1000

3 Marvel Spider-Man 3 2007 1023.671832 890.871626 336.530303 554.341323 500-1000

4 Marvel Spider-Man 2 2004 988.524779 783.766341 373.585825 410.180516 500-1000

In [45]: N = data1['World_Gross'].value_counts()

In [46]: N

Out[46]: <500 25
500-1000 10
>1000 2
Name: World_Gross, dtype: int64

In [47]: data1['World_Gross'].value_counts(normalize = True)

Out[47]: <500 0.675676


500-1000 0.270270
>1000 0.054054
Name: World_Gross, dtype: float64

In [49]: ax = sns.countplot(data1['World_Gross'])
ax.set_title('Distribution of World-wide Gross-Marvel',color='blue')
for i in ax.containers:
ax.bar_label(i)

/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a


keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(

In [ ]: #Release- display - count of movies releaes before 1990, 1990-2000, 2000 - 2010, aftr 2010

You might also like