Marvel Vs DC

In [1]: import pandas as pd
In [7]: data1 = pd.read_excel('Downloads/MarvelDC.xlsx', 'Marvel')
In [5]: data.head()
Out[5]:
Company Film Release Adjusted Worldwide Domestic Foreign
0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000
1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000
2 Marvel Spider-Man 2002 1088.227139 821.708551 403.706375 418.002176
3 Marvel Spider-Man 3 2007 1023.671832 890.871626 336.530303 554.341323
4 Marvel Spider-Man 2 2004 988.524779 783.766341 373.585825 410.180516
In [6]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37 entries, 0 to 36
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Company 37 non-null object
1 Film 37 non-null object
2 Release 37 non-null int64
3 Adjusted 37 non-null float64
4 Worldwide 37 non-null float64
5 Domestic 37 non-null float64
6 Foreign 37 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 2.1+ KB
In [8]: data2 = pd.read_excel('Downloads/MarvelDC.xlsx', 'DC')
In [9]: data2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Company 26 non-null object
1 Film 26 non-null object
2 Release 26 non-null int64
3 Adjusted 26 non-null float64
4 Worldwide 26 non-null float64
5 Domestic 26 non-null float64
6 Foreign 26 non-null float64
dtypes: float64(4), int64(1), object(2)
memory usage: 1.5+ KB
In [10]: data1.describe()
Out[10]:
Release Adjusted Worldwide Domestic Foreign
count 37.000000 37.000000 37.000000 37.000000 37.000000
mean 2006.297297 457.839381 409.866944 183.371500 235.234645
std 6.967643 378.577633 355.334076 132.979399 224.414327
min 1986.000000 0.018544 0.010173 8.050977 0.010173
25% 2003.000000 162.581808 131.183530 82.348319 72.661713
50% 2007.000000 392.498627 330.579719 161.675229 175.883639
75% 2012.000000 681.718448 644.783140 234.362462 410.180516
max 2014.000000 1575.847963 1518.594910 623.357910 895.237000
In [11]: #print the movie with minim world wide gross, and release year
In [12]: d= data1['Worldwide']
In [15]: data.loc[(d.idxmin()), ['Film','Release', 'Worldwide']]
Out[15]: Film Captain America

Release 1990
Worldwide 0.010173
Name: 36, dtype: object
In [16]: data.loc[(d.idxmax()), ['Film','Release', 'Worldwide']]
Out[16]: Film Avengers

Release 2012
Worldwide 1518.59491
Name: 0, dtype: object
Out[17]:
count 37.000000 37.000000 37.000000 37.000000 37.000000
mean 2006.297297 457.839381 409.866944 183.371500 235.234645
std 6.967643 378.577633 355.334076 132.979399 224.414327
min 1986.000000 0.018544 0.010173 8.050977 0.010173
25% 2003.000000 162.581808 131.183530 82.348319 72.661713
50% 2007.000000 392.498627 330.579719 161.675229 175.883639
75% 2012.000000 681.718448 644.783140 234.362462 410.180516
max 2014.000000 1575.847963 1518.594910 623.357910 895.237000
Out[18]:
count 26.000000 26.000000 26.000000 26.000000 26.000000
mean 1997.923077 342.719417 245.679767 130.799766 208.767570
std 12.601343 355.696167 287.491573 133.755522 156.876772
min 1966.000000 0.370471 0.192816 0.192816 0.356195
25% 1989.000000 35.463337 31.792626 27.744169 103.435164
50% 2003.500000 232.490153 182.262093 107.417497 163.080000
75% 2008.000000 460.762565 327.451362 178.731258 327.868943
max 2013.000000 1125.323899 1084.439099 534.858444 636.300000
In [ ]: DC: 254.7, 287.49, 182, 0.19 and 1084

MarveL: 409.8, 355.3, 330.5, 0.010, and 1518/59
In [19]: #Line graph

import seaborn as sns
In [20]: ax = sns.lineplot(data=data1, x='Release', y='Worldwide', marker ='o')

ax.set_title('Worldwide Gross-Marvel', color='green')
Out[20]: Text(0.5, 1.0, 'Worldwide Gross-Marvel')
In [21]: #TO identify whether there exists outliers

a = sns.histplot(data['Worldwide'])
a.set_title("Distribution of Wordlwide Gross- Marvel", color='green')
for i in a.containers:
a.bar_label(i)
In [ ]: #Outlier
income: 16 :5000 - 8000
2: 500, 1000
2: 50,000 and 100,000
In [ ]: 500-----1000----5000------80000----50000------------100,0000
In [22]: 5000+8000/2
Out[22]: 9000.0
In [ ]: #Whisker plot or box plot
In [23]: aa = sns.boxplot(data['Worldwide'])
a.set_title('Whisker plot', color='red')
/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a

keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[23]: Text(0.5, 1.0, 'Whisker plot')
In [ ]: # Percentile
In [24]: per25 = data['Worldwide'].quantile(0.25) # computing 25th percentile
In [25]: per75 = data['Worldwide'].quantile(0.75)# 75th percentile
In [26]: iqr = per75-per25 # Interquartile range
In [28]: upper = per75+1.5 * iqr # creating upper limit
In [29]: upper
Out[29]: 1415.1825549999999
In [30]: L = per25 - 1.5*iqr # Lower limit
In [31]: L
Out[31]: -639.215885
In [33]: out = data1.loc[(data1['Worldwide']>upper), ['Release', 'Film', 'Worldwide']] # Identifying the outlier from the dat
a
In [34]: out
Out[34]:
Release Film Worldwide
0 2012 Avengers 1518.59491
In [35]: lower = data.loc[(data['Worldwide']<L), ['Release', 'Film', 'Worldwide']]
In [36]: lower
Out[36]:
In [37]: data1.loc[(data1['Worldwide']>1000), ['Release', 'Film', 'Worldwide']]
Out[37]:
0 2012 Avengers 1518.594910
1 2013 Iron Man 3 1215.439994
In [40]: w = data1['Worldwide']
data1['World_Gross']= ' '
d = data1['World_Gross']
In [ ]: #<500 : <500
#500 - 1000: 500-1000
#>1000: >1000
In [41]: j = 0
In [42]: for i in w:
if i <500:
d.loc[j] = '<500'
elif i>=500 and i<=1000:
d.loc[j] = '500-1000'
else:
d.loc[j] = '>1000'
j=j+1
/var/folders/2p/sc3q6t7x3w3gns_3143jbz700000gn/T/ipykernel_5276/380579830.py:7: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

-a-view-versus-a-copy
d.loc[j] = '>1000'

d.loc[j] = '500-1000'

d.loc[j] = '<500'
In [43]: data1.head()
Out[43]:
Company Film Release Adjusted Worldwide Domestic Foreign World_Gross
0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000 >1000
1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000 >1000
2 Marvel Spider-Man 2002 1088.227139 821.708551 403.706375 418.002176 500-1000
3 Marvel Spider-Man 3 2007 1023.671832 890.871626 336.530303 554.341323 500-1000
4 Marvel Spider-Man 2 2004 988.524779 783.766341 373.585825 410.180516 500-1000
In [45]: N = data1['World_Gross'].value_counts()
In [46]: N
Out[46]: <500 25
500-1000 10
>1000 2
Name: World_Gross, dtype: int64
In [47]: data1['World_Gross'].value_counts(normalize = True)
Out[47]: <500 0.675676

500-1000 0.270270
>1000 0.054054
Name: World_Gross, dtype: float64
In [49]: ax = sns.countplot(data1['World_Gross'])
ax.set_title('Distribution of World-wide Gross-Marvel',color='blue')
for i in ax.containers:
ax.bar_label(i)
/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a

keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(
In [ ]: #Release- display - count of movies releaes before 1990, 1990-2000, 2000 - 2010, aftr 2010

Marvel Vs DC

Uploaded by

Copyright:

Available Formats

Marvel Vs DC

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Marvel Vs DC

Uploaded by

Copyright:

Available Formats

In [1]: import pandas as pd

In [7]: data1 = pd.read_excel('Downloads/MarvelDC.xlsx', 'Marvel')

0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000

1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000

2 Marvel Spider-Man 2002 1088.227139 821.708551 403.706375 418.002176

3 Marvel Spider-Man 3 2007 1023.671832 890.871626 336.530303 554.341323

4 Marvel Spider-Man 2 2004 988.524779 783.766341 373.585825 410.180516

In [8]: data2 = pd.read_excel('Downloads/MarvelDC.xlsx', 'DC')

count 37.000000 37.000000 37.000000 37.000000 37.000000

mean 2006.297297 457.839381 409.866944 183.371500 235.234645

std 6.967643 378.577633 355.334076 132.979399 224.414327

min 1986.000000 0.018544 0.010173 8.050977 0.010173

25% 2003.000000 162.581808 131.183530 82.348319 72.661713

50% 2007.000000 392.498627 330.579719 161.675229 175.883639

75% 2012.000000 681.718448 644.783140 234.362462 410.180516

max 2014.000000 1575.847963 1518.594910 623.357910 895.237000

In [15]: data.loc[(d.idxmin()), ['Film','Release', 'Worldwide']]

Out[15]: Film Captain America

In [16]: data.loc[(d.idxmax()), ['Film','Release', 'Worldwide']]

Out[16]: Film Avengers

count 37.000000 37.000000 37.000000 37.000000 37.000000

mean 2006.297297 457.839381 409.866944 183.371500 235.234645

std 6.967643 378.577633 355.334076 132.979399 224.414327

min 1986.000000 0.018544 0.010173 8.050977 0.010173

25% 2003.000000 162.581808 131.183530 82.348319 72.661713

50% 2007.000000 392.498627 330.579719 161.675229 175.883639

75% 2012.000000 681.718448 644.783140 234.362462 410.180516

max 2014.000000 1575.847963 1518.594910 623.357910 895.237000

count 26.000000 26.000000 26.000000 26.000000 26.000000

mean 1997.923077 342.719417 245.679767 130.799766 208.767570

std 12.601343 355.696167 287.491573 133.755522 156.876772

min 1966.000000 0.370471 0.192816 0.192816 0.356195

25% 1989.000000 35.463337 31.792626 27.744169 103.435164

50% 2003.500000 232.490153 182.262093 107.417497 163.080000

75% 2008.000000 460.762565 327.451362 178.731258 327.868943

max 2013.000000 1125.323899 1084.439099 534.858444 636.300000

In [ ]: DC: 254.7, 287.49, 182, 0.19 and 1084

In [19]: #Line graph

In [20]: ax = sns.lineplot(data=data1, x='Release', y='Worldwide', marker ='o')

Out[20]: Text(0.5, 1.0, 'Worldwide Gross-Marvel')

In [21]: #TO identify whether there exists outliers

In [ ]: #Whisker plot or box plot

/opt/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a

Out[23]: Text(0.5, 1.0, 'Whisker plot')

In [24]: per25 = data['Worldwide'].quantile(0.25) # computing 25th percentile

In [25]: per75 = data['Worldwide'].quantile(0.75)# 75th percentile

In [26]: iqr = per75-per25 # Interquartile range

In [28]: upper = per75+1.5 * iqr # creating upper limit

In [30]: L = per25 - 1.5*iqr # Lower limit

0 2012 Avengers 1518.59491

In [35]: lower = data.loc[(data['Worldwide']<L), ['Release', 'Film', 'Worldwide']]

In [37]: data1.loc[(data1['Worldwide']>1000), ['Release', 'Film', 'Worldwide']]

0 2012 Avengers 1518.594910

1 2013 Iron Man 3 1215.439994

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning

0 Marvel Avengers 2012 1575.847963 1518.594910 623.357910 895.237000 >1000

1 Marvel Iron Man 3 2013 1243.055923 1215.439994 409.013994 806.426000 >1000