{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "cEr06C20L8WK"
},
"source": [
"# Introduction to Python \n",
"\n",
"## Pandas Intro\n",
"\n",
"Some examples from [here](https://towardsdatascience.com/40-examples-to-master-pandas-c69d058f434e)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "WmxnkxF8L8WL"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "Vv-0YZfBL8WP"
},
"outputs": [],
"source": [
"datapath = \"../Data/\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1F6OvLAcL8WR"
},
"source": [
"## Pandas Data Structures: [Series](https://pandas.pydata.org/pandas-docs/stable/reference/series.html)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating Series"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "JryIewnkL8WS",
"outputId": "a06421dc-4c4a-4ebc-ac41-dc3e87eebed3"
},
"outputs": [
{
"data": {
"text/plain": [
"0 4\n",
"1 12\n",
"2 -5\n",
"3 3\n",
"4 5\n",
"dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj = pd.Series([4, 12, -5, 3, 5])\n",
"obj"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#dir(obj)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exporting to Numpy array"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "r9li9Ii-L8WU",
"outputId": "e63fde87-980d-4b2b-b26e-bfb3c1acaf59"
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 4, 12, -5, 3, 5])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj.values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Examining the index object"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "wjI6Wts0L8WX",
"outputId": "c4980045-c9b8-4a0b-bc85-a87dd7d99e39"
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=5, step=1)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj.index"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Redefining the index"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "yHqJC6b8L8WZ",
"outputId": "a9373822-231c-4c2d-9241-9a4c6edb0cd1"
},
"outputs": [
{
"data": {
"text/plain": [
"Bob 4\n",
"Steve 12\n",
"Jeff -5\n",
"Ryan 3\n",
"Fernie 5\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan', 'Fernie']\n",
"obj"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating series and passing the inidex as parameter"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "jI-k8rRGL8Wc",
"outputId": "4f37ef09-5899-486c-fdbe-b80cf2663242"
},
"outputs": [
{
"data": {
"text/plain": [
"d 4\n",
"b 7\n",
"a -5\n",
"c 3\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])\n",
"obj2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Slicing and accessing elements in the series"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj2['c']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "u8e_SNzcL8Wj",
"outputId": "4ddd882c-d8bc-4d21-ff39-66e76ba47e4b"
},
"outputs": [
{
"data": {
"text/plain": [
"c 3\n",
"a -5\n",
"d 4\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj2[['c', 'a', 'd']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Slicing using boolean expressions"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "2Q-BkxadL8Wm",
"outputId": "15badfe1-934c-4b24-a151-247bcd4971e1"
},
"outputs": [
{
"data": {
"text/plain": [
"a -5\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj2[obj2 < 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Scalar operations with the series"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "LhjvPFD8L8Wo",
"outputId": "b37a6c1f-5a8f-46bc-cdab-0e3faaba1bfa"
},
"outputs": [
{
"data": {
"text/plain": [
"d 8\n",
"b 14\n",
"a -10\n",
"c 6\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj2 * 2"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "51z3zmZYL8Ws",
"outputId": "12b72973-26a1-450f-952d-f27c5d1c11dc"
},
"outputs": [
{
"data": {
"text/plain": [
"d 54.598150\n",
"b 1096.633158\n",
"a 0.006738\n",
"c 20.085537\n",
"dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.exp(obj2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating Series from dictionaries"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "CoqYtDi2L8Wu",
"outputId": "81291bfc-271b-4c77-eacd-406338fd1e88"
},
"outputs": [
{
"data": {
"text/plain": [
"Ohio 35000\n",
"Texas 71000\n",
"Oregon 16000\n",
"Utah 5000\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n",
"obj3 = pd.Series(sdata)\n",
"obj3"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "U3ldGKqTL8Wx",
"outputId": "eb244782-419e-44e8-f216-1a000c3779d7"
},
"outputs": [
{
"data": {
"text/plain": [
"California NaN\n",
"Ohio 35000.0\n",
"Oregon 16000.0\n",
"Texas 71000.0\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"states = ['California', 'Ohio', 'Oregon', 'Texas']\n",
"\n",
"obj4 = pd.Series(sdata, index=states)\n",
"obj4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"isnull() and notnull() methods"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "FGYJqsm8L8Wz",
"outputId": "a45c5aa6-a07e-449b-e416-1fd7138006b2"
},
"outputs": [
{
"data": {
"text/plain": [
"California True\n",
"Ohio False\n",
"Oregon False\n",
"Texas False\n",
"dtype: bool"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.isnull(obj4)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"id": "RZrJBBM-L8W2",
"outputId": "08a78774-20bb-473e-ba38-3c68b1a3db98"
},
"outputs": [
{
"data": {
"text/plain": [
"California False\n",
"Ohio True\n",
"Oregon True\n",
"Texas True\n",
"dtype: bool"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.notnull(obj4)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "6pWvwb46L8W8",
"outputId": "5471c51a-de44-4e99-d746-e88aa00d4a28"
},
"outputs": [
{
"data": {
"text/plain": [
"California NaN\n",
"Ohio 70000.0\n",
"Oregon 32000.0\n",
"Texas 142000.0\n",
"Utah 5010.0\n",
"dtype: float64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj3.add(obj4, fill_value=10)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"id": "d6_O2TahL8W-",
"outputId": "cbc3d183-f1dd-4f76-dac8-9a635910bc09"
},
"outputs": [
{
"data": {
"text/plain": [
"State\n",
"California NaN\n",
"Ohio 35000.0\n",
"Oregon 16000.0\n",
"Texas 71000.0\n",
"Name: Population, dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"obj4.name = 'Population'\n",
"obj4.index.name = 'State'\n",
"obj4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pandas Data Structures: [Date Time Range](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify start and end, with the default daily frequency."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n",
" '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts_idx1 = pd.date_range(start='1/1/2018', end='1/08/2018')\n",
"ts_idx1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify start and periods, the number of periods (days)."
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n",
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
" '2016-01-09', '2016-01-10'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts_idx2 = pd.date_range(\"20160101\", periods=10, freq='D')\n",
"ts_idx2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify end and periods, the number of periods (days)."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',\n",
" '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts_idx3 = pd.date_range(end='1/1/2018', periods=8)\n",
"ts_idx3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify start, end, and periods; the frequency is generated automatically (linearly spaced)."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',\n",
" '2018-04-27 00:00:00'],\n",
" dtype='datetime64[ns]', freq=None)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts_idx4 = pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)\n",
"ts_idx4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Other parameters"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',\n",
" '2018-05-31'],\n",
" dtype='datetime64[ns]', freq='M')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='1/1/2018', periods=5, freq='M')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',\n",
" '2019-01-31'],\n",
" dtype='datetime64[ns]', freq='3M')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='1/1/2018', periods=5, freq='3M')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-02 00:00:00+00:00',\n",
" '2018-01-03 00:00:00+00:00', '2018-01-04 00:00:00+00:00',\n",
" '2018-01-05 00:00:00+00:00'],\n",
" dtype='datetime64[ns, Europe/Lisbon]', freq='D')"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='1/1/2018', periods=5, tz='Europe/Lisbon')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='2017-01-01', end='2017-01-04', inclusive=None)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left')"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VSkGDQr5L8XA"
},
"source": [
"## Pandas Data Structures: [Dataframe](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating Dataframe from a dictionary of lists"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"id": "JJBQnbqDL8XA",
"outputId": "7b6e7083-65ae-471f-ac54-1e19dd42f896"
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" year | \n",
" pop | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Ohio | \n",
" 2000 | \n",
" 1.5 | \n",
"
\n",
" \n",
" 1 | \n",
" Ohio | \n",
" 2001 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2 | \n",
" Ohio | \n",
" 2002 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 3 | \n",
" Nevada | \n",
" 2001 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 4 | \n",
" Nevada | \n",
" 2002 | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state year pop\n",
"0 Ohio 2000 1.5\n",
"1 Ohio 2001 1.7\n",
"2 Ohio 2002 3.6\n",
"3 Nevada 2001 2.4\n",
"4 Nevada 2002 2.9"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n",
" 'year': [2000, 2001, 2002, 2001, 2002],\n",
" 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}\n",
"\n",
"frame = pd.DataFrame(data)\n",
"frame"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"#dir(frame)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"print(type(frame))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"print(type(frame['state']))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specifying the names of columns"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"id": "GlmmkIaQL8XC",
"outputId": "fccf280c-9a02-4bf1-f9ec-70e022c6d442"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
"
\n",
" \n",
" 1 | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2 | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 3 | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 4 | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop\n",
"0 2000 Ohio 1.5\n",
"1 2001 Ohio 1.7\n",
"2 2002 Ohio 3.6\n",
"3 2001 Nevada 2.4\n",
"4 2002 Nevada 2.9"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = pd.DataFrame(data, columns=['year', 'state', 'pop'])\n",
"d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Making a column as the index"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"id": "dHHFSDK5L8XE"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2000 | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
"
\n",
" \n",
" 2001 | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop\n",
"year \n",
"2000 2000 Ohio 1.5\n",
"2001 2001 Ohio 1.7\n",
"2002 2002 Ohio 3.6\n",
"2001 2001 Nevada 2.4\n",
"2002 2002 Nevada 2.9"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.set_index('year', inplace=True, drop=False)\n",
"d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dropping a column"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
"
\n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2000 Ohio 1.5\n",
"2001 Ohio 1.7\n",
"2002 Ohio 3.6\n",
"2001 Nevada 2.4\n",
"2002 Nevada 2.9"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.drop('year', axis=1, inplace=True)\n",
"d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dropping one or some lines"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"id": "_m00ehvGL8XH",
"outputId": "a292d2bf-dcf5-4fb5-ced4-60b34cd172d4"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
"
\n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2000 Ohio 1.5\n",
"2001 Ohio 1.7\n",
"2002 Ohio 3.6\n",
"2001 Nevada 2.4\n",
"2002 Nevada 2.9"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.drop(2000, axis=0, inplace=False)\n",
"d"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"id": "AdSUXcgwL8XJ",
"outputId": "6b161b59-0870-4cb0-cee9-e533d51b65df"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2001 Ohio 1.7\n",
"2002 Ohio 3.6\n",
"2001 Nevada 2.4\n",
"2002 Nevada 2.9"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.drop(2000, axis=0, inplace=True)\n",
"d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Slicing by column / index"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2001 1.7\n",
"2002 3.6\n",
"2001 2.4\n",
"2002 2.9\n",
"Name: pop, dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d['pop']"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2001 1.7\n",
"2002 3.6\n",
"2001 2.4\n",
"2002 2.9\n",
"Name: pop, dtype: float64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.loc[:,'pop']"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2001 Ohio 1.7\n",
"2001 Nevada 2.4"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.loc[2001]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2001 Ohio 1.7\n",
"2001 Nevada 2.4\n",
"2002 Ohio 3.6\n",
"2002 Nevada 2.9"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.loc[[2001,2002], [\"state\", \"pop\"]]"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 3.6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" pop\n",
"year \n",
"2001 1.7\n",
"2002 3.6"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.iloc[[0,1],[1]]"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2001 Ohio 1.7\n",
"2002 Ohio 3.6\n",
"2001 Nevada 2.4"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.iloc[0:3,[0,1]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Accessing index and columns objects"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Int64Index([2001, 2002, 2001, 2002], dtype='int64', name='year')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.index"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"id": "1bEokh0LL8XZ",
"outputId": "fcba92bf-2184-4542-80d3-457b1ee8b365"
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['state', 'pop'], dtype='object')"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using alternate notation to access columns as a property"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"id": "6ehEuDNOL8Xb",
"outputId": "98660696-6174-4d3c-8084-ff3c967af7c4"
},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2001 Ohio\n",
"2002 Ohio\n",
"2001 Nevada\n",
"2002 Nevada\n",
"Name: state, dtype: object"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d['state']"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"id": "8fwIym5IL8Xe",
"outputId": "77ca9eb7-91d7-49f9-9c8a-b13b625c6dc2"
},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2001 Ohio\n",
"2002 Ohio\n",
"2001 Nevada\n",
"2002 Nevada\n",
"Name: state, dtype: object"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.state"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Slicing with boolean conditions (&, |, ==, !=)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" state | \n",
" pop | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" state pop\n",
"year \n",
"2001 Ohio 1.7\n",
"2002 Ohio 3.6\n",
"2001 Nevada 2.4"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = d[(d[\"state\"] != 'Nevada') | (d.index != 2002)]\n",
"d"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating Dataframes from existing structures"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"id": "pHmbuPv5L8XK",
"outputId": "9dbd2851-aae4-46e0-8571-02b02ab410b2"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" NaN | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" NaN | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" NaN | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" NaN | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt\n",
"one 2000 Ohio 1.5 NaN\n",
"two 2001 Ohio 1.7 NaN\n",
"three 2002 Ohio 3.6 NaN\n",
"four 2001 Nevada 2.4 NaN\n",
"five 2002 Nevada 2.9 NaN"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2 = pd.DataFrame(data, \n",
" columns=['year', 'state', 'pop', 'debt'],\n",
" index=['one', 'two', 'three', 'four', 'five'])\n",
"frame2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating new column"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"id": "dUocNBSbL8XN",
"outputId": "abe4c48e-142a-4162-fde8-b4db4b63b3e6"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
" new | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt new\n",
"one 2000 Ohio 1.5 NaN 13\n",
"two 2001 Ohio 1.7 NaN 13\n",
"three 2002 Ohio 3.6 NaN 13\n",
"four 2001 Nevada 2.4 NaN 13\n",
"five 2002 Nevada 2.9 NaN 13"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2['new'] = 13\n",
"frame2"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"id": "-jAr37_vL8XT",
"outputId": "ac4f4191-3cfb-46f9-b2ac-0c0e2de61805"
},
"outputs": [
{
"data": {
"text/plain": [
"year 2002\n",
"state Ohio\n",
"pop 3.6\n",
"debt NaN\n",
"new 13\n",
"Name: three, dtype: object"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2.loc['three']"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.6"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2.loc['three', 'pop']"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"id": "thLPV8M9L8XV",
"outputId": "a87def93-007b-475b-9277-3fa007e792ac"
},
"outputs": [
{
"data": {
"text/plain": [
"year 2002\n",
"state Ohio\n",
"pop 3.6\n",
"debt NaN\n",
"new 13\n",
"Name: three, dtype: object"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2.iloc[2]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Ohio'"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2.iloc[2,1]"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"id": "oriaA7V8L8Xx",
"outputId": "a23d458f-b216-4a7f-f0ae-d68d0cdaffc3"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" NaN | \n",
" 1.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 NaN 1.5"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop = {'Nevada': {2001: 2.4, 2002: 2.9},'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n",
"\n",
"frame3 = pd.DataFrame(pop)\n",
"frame3"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"id": "1IznlSKvL8Xi",
"outputId": "69d179ef-9fd5-4653-df34-5ca596eab8ed"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
" new | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" 16.5 | \n",
" 13 | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" 16.5 | \n",
" 13 | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" 16.5 | \n",
" 13 | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" 16.5 | \n",
" 13 | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" 16.5 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt new\n",
"one 2000 Ohio 1.5 16.5 13\n",
"two 2001 Ohio 1.7 16.5 13\n",
"three 2002 Ohio 3.6 16.5 13\n",
"four 2001 Nevada 2.4 16.5 13\n",
"five 2002 Nevada 2.9 16.5 13"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2['debt'] = 16.5\n",
"frame2"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"id": "V3opjIPvL8Xk",
"outputId": "e45c75dd-9c6a-4240-ceca-0185f443f86a"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
" new | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" 0.0 | \n",
" 13 | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" 1.0 | \n",
" 13 | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" 2.0 | \n",
" 13 | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" 3.0 | \n",
" 13 | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" 4.0 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt new\n",
"one 2000 Ohio 1.5 0.0 13\n",
"two 2001 Ohio 1.7 1.0 13\n",
"three 2002 Ohio 3.6 2.0 13\n",
"four 2001 Nevada 2.4 3.0 13\n",
"five 2002 Nevada 2.9 4.0 13"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2['debt'] = np.arange(5.)\n",
"frame2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating a column from a series"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"id": "xmPu8JO7L8Xm",
"outputId": "f74a2fa9-c4fe-4f8b-9d11-abcc4215743a"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
" new | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" -1.2 | \n",
" 13 | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" NaN | \n",
" 13 | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" -1.5 | \n",
" 13 | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" -1.7 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt new\n",
"one 2000 Ohio 1.5 NaN 13\n",
"two 2001 Ohio 1.7 -1.2 13\n",
"three 2002 Ohio 3.6 NaN 13\n",
"four 2001 Nevada 2.4 -1.5 13\n",
"five 2002 Nevada 2.9 -1.7 13"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])\n",
"\n",
"frame2['debt'] = val\n",
"frame2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating a column from a bolean expression"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"id": "B9qiCoeqL8Xo",
"outputId": "9def766d-748d-4a61-e232-49518874f7a8"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" state | \n",
" pop | \n",
" debt | \n",
" new | \n",
" eastern | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 2000 | \n",
" Ohio | \n",
" 1.5 | \n",
" NaN | \n",
" 13 | \n",
" True | \n",
"
\n",
" \n",
" two | \n",
" 2001 | \n",
" Ohio | \n",
" 1.7 | \n",
" -1.2 | \n",
" 13 | \n",
" True | \n",
"
\n",
" \n",
" three | \n",
" 2002 | \n",
" Ohio | \n",
" 3.6 | \n",
" NaN | \n",
" 13 | \n",
" True | \n",
"
\n",
" \n",
" four | \n",
" 2001 | \n",
" Nevada | \n",
" 2.4 | \n",
" -1.5 | \n",
" 13 | \n",
" False | \n",
"
\n",
" \n",
" five | \n",
" 2002 | \n",
" Nevada | \n",
" 2.9 | \n",
" -1.7 | \n",
" 13 | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year state pop debt new eastern\n",
"one 2000 Ohio 1.5 NaN 13 True\n",
"two 2001 Ohio 1.7 -1.2 13 True\n",
"three 2002 Ohio 3.6 NaN 13 True\n",
"four 2001 Nevada 2.4 -1.5 13 False\n",
"five 2002 Nevada 2.9 -1.7 13 False"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame2['eastern'] = frame2.state == 'Ohio'\n",
"frame2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Deleting a column"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"id": "Pstsb7rTL8Xq",
"outputId": "6ba15086-1cd2-49d3-a2fa-507418b722f8"
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['year', 'state', 'pop', 'debt', 'new'], dtype='object')"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del frame2['eastern']\n",
"frame2.columns"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"id": "7CTgyP5bL8X7",
"outputId": "bcf2e719-8dc6-43e3-bc11-7448dbf2a7ff"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2003 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2003 NaN NaN"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(pop, index=[2001, 2002, 2003])"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"id": "I9IZWJZML8X_",
"outputId": "7d04cda7-166f-4273-9b75-e59cdee02ab5"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Ohio | \n",
" Nevada | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 1.7 | \n",
" 2.4 | \n",
"
\n",
" \n",
" 2002 | \n",
" 3.6 | \n",
" 2.9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Ohio Nevada\n",
"2001 1.7 2.4\n",
"2002 3.6 2.9"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdata = {'Ohio': frame3['Ohio'][:-1],'Nevada': frame3['Nevada'][:2]}\n",
"pd.DataFrame(pdata)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"id": "Rz0Y8hCkL8YB",
"outputId": "aaeed6d6-1d33-4056-ebc8-738053b44311"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" state | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" year | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" NaN | \n",
" 1.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"state Nevada Ohio\n",
"year \n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 NaN 1.5"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame3.index.name = 'year'\n",
"frame3.columns.name = 'state'\n",
"frame3"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"id": "1TFUFB4AL8YE",
"outputId": "b673bbd5-fc17-4807-f52d-2fa781fc79fe"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" NaN | \n",
" 1.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 NaN 1.5"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop = {'Nevada': {2001: 2.4, 2002: 2.9},'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n",
"frame4 = pd.DataFrame(pop)\n",
"frame4"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"id": "zJWgEm7zL8YF",
"outputId": "8260ccc7-6032-4f1d-fec2-b49f22c237d1"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" 2.0 | \n",
" 1.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 2.0 1.5"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame4.loc[2000,'Nevada'] = 2\n",
"frame4"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"id": "iZToNmINL8YH",
"outputId": "326e15ae-2fee-4666-d8cd-4f6290342429"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" 2.0 | \n",
" 1.5 | \n",
"
\n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 32.0 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 32.0 | \n",
"
\n",
" \n",
" 2000 | \n",
" 2.0 | \n",
" 32.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 2.0 1.5\n",
"2001 2.4 32.0\n",
"2002 2.9 32.0\n",
"2000 2.0 32.0"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame5 = pd.concat([frame4, frame4], axis=0)\n",
"frame5.iloc[3:,1] = 32\n",
"frame5"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"id": "eVth2N0_L8YJ",
"outputId": "0bd36e9c-581c-4bab-9141-4a5fa558124f"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Nevada | \n",
" Ohio | \n",
"
\n",
" \n",
" \n",
" \n",
" 2001 | \n",
" 2.4 | \n",
" 1.7 | \n",
"
\n",
" \n",
" 2002 | \n",
" 2.9 | \n",
" 3.6 | \n",
"
\n",
" \n",
" 2000 | \n",
" 2.0 | \n",
" 1.5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2000 2.0 1.5"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame5.drop_duplicates(['Nevada'], inplace=True)\n",
"frame5"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n",
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
" '2016-01-09', '2016-01-10'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dates = pd.date_range(\"20160101\", periods=10, freq='D')\n",
"dates"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"id": "pCUFSR3ML8YL"
},
"outputs": [],
"source": [
"data = np.random.random((10,3))"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"id": "eX-2uqSsL8YR",
"outputId": "e69d1950-84f6-455d-fbe2-0df36edb29b1"
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.3940105 , 0.16598653, 0.32306814],\n",
" [0.46469848, 0.12793369, 0.23421117],\n",
" [0.58451733, 0.82838421, 0.74140121],\n",
" [0.51013677, 0.9891163 , 0.91843721],\n",
" [0.37910817, 0.34414402, 0.3448779 ],\n",
" [0.9496902 , 0.27825066, 0.21221705],\n",
" [0.06240444, 0.77187425, 0.06477811],\n",
" [0.13257664, 0.70945323, 0.09441618],\n",
" [0.0072288 , 0.83029155, 0.15235712],\n",
" [0.1302358 , 0.72444933, 0.67439275]])"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"id": "SvDpOoEYL8YV",
"outputId": "49779dd1-3142-4e38-c0ca-187e32d59929"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.394010 | \n",
" 0.165987 | \n",
" 0.323068 | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.127934 | \n",
" 0.234211 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.949690 | \n",
" 0.278251 | \n",
" 0.212217 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" 0.132577 | \n",
" 0.709453 | \n",
" 0.094416 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.007229 | \n",
" 0.830292 | \n",
" 0.152357 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.130236 | \n",
" 0.724449 | \n",
" 0.674393 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-01 0.394010 0.165987 0.323068\n",
"2016-01-02 0.464698 0.127934 0.234211\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-05 0.379108 0.344144 0.344878\n",
"2016-01-06 0.949690 0.278251 0.212217\n",
"2016-01-07 0.062404 0.771874 0.064778\n",
"2016-01-08 0.132577 0.709453 0.094416\n",
"2016-01-09 0.007229 0.830292 0.152357\n",
"2016-01-10 0.130236 0.724449 0.674393"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names = ['Column1', 'Column2', 'Column3']\n",
"df = pd.DataFrame(data, index=dates, columns=column_names)\n",
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"id": "zFJZeaMQL8Ya",
"outputId": "92460c5c-a8dc-4b5f-fe1c-52133be01365"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.127934 | \n",
" 0.234211 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-02 0.464698 0.127934 0.234211\n",
"2016-01-03 0.584517 0.828384 0.741401"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[1:3]"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"id": "45hwfY72L8Yc",
"outputId": "8b30a088-d247-459c-ed20-cedb12c99326"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.949690 | \n",
" 0.278251 | \n",
" 0.212217 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-05 0.379108 0.344144 0.344878\n",
"2016-01-06 0.949690 0.278251 0.212217\n",
"2016-01-07 0.062404 0.771874 0.064778"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['20160104':'20160107']"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2016-01-01 0.165987\n",
"2016-01-02 0.127934\n",
"2016-01-03 0.828384\n",
"2016-01-04 0.989116\n",
"2016-01-05 0.344144\n",
"2016-01-06 0.278251\n",
"2016-01-07 0.771874\n",
"2016-01-08 0.709453\n",
"2016-01-09 0.830292\n",
"2016-01-10 0.724449\n",
"Freq: D, Name: Column2, dtype: float64"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['2016-01-01':'2016-01-11','Column2']"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"id": "QsuCiRzuL8YX",
"outputId": "b1f271e6-7e4e-4b99-ae8a-d25405d48145"
},
"outputs": [
{
"data": {
"text/plain": [
"2016-01-01 0.165987\n",
"2016-01-02 0.127934\n",
"2016-01-03 0.828384\n",
"2016-01-04 0.989116\n",
"2016-01-05 0.344144\n",
"2016-01-06 0.278251\n",
"2016-01-07 0.771874\n",
"2016-01-08 0.709453\n",
"2016-01-09 0.830292\n",
"2016-01-10 0.724449\n",
"Freq: D, Name: Column2, dtype: float64"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[0:11,1]"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"id": "v9yt82jyL8Yf",
"outputId": "6a0feefb-a6cb-407c-c5cb-591b584dee94"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-05 0.379108 0.344144 0.344878"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df.index > '20160102') & (df.index < '20160106')]"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"id": "LFGw7iv4L8Yh",
"outputId": "5f27335e-8e65-4758-82a2-2c0462b4ed70"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.007229 | \n",
" 0.830292 | \n",
" 0.152357 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.130236 | \n",
" 0.724449 | \n",
" 0.674393 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-07 0.062404 0.771874 0.064778\n",
"2016-01-09 0.007229 0.830292 0.152357\n",
"2016-01-10 0.130236 0.724449 0.674393"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('(Column1 < Column2) & (Column1 < Column3)')"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"id": "WMF0I3X5L8Yi",
"outputId": "162717d9-1062-467b-d09f-9c6e9f024822"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.394010 | \n",
" 0.323068 | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.234211 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column3\n",
"2016-01-01 0.394010 0.323068\n",
"2016-01-02 0.464698 0.234211"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['20160101':'20160102',['Column1','Column3']]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"id": "gjt7LI1IL8Yn",
"outputId": "db9bf1ca-30db-4d4c-f8a6-0ceca7ceec2c"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2\n",
"2016-01-04 0.510137 0.989116\n",
"2016-01-05 0.379108 0.344144"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[3:5, 0:2]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"id": "xOOyyU33L8Yl",
"outputId": "cebf0c4c-8046-461b-aa71-b9f39d3eff3b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"DatetimeIndex: 10 entries, 2016-01-01 to 2016-01-10\n",
"Freq: D\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Column1 10 non-null float64\n",
" 1 Column2 10 non-null float64\n",
" 2 Column3 10 non-null float64\n",
"dtypes: float64(3)\n",
"memory usage: 620.0 bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"id": "p7yej71hL8Yr",
"outputId": "35a5cfd0-eecf-4111-d6fc-7fe57058567c"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 10.000000 | \n",
" 10.000000 | \n",
" 10.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.361461 | \n",
" 0.576988 | \n",
" 0.376016 | \n",
"
\n",
" \n",
" std | \n",
" 0.288804 | \n",
" 0.314204 | \n",
" 0.296958 | \n",
"
\n",
" \n",
" min | \n",
" 0.007229 | \n",
" 0.127934 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.130821 | \n",
" 0.294724 | \n",
" 0.167322 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.386559 | \n",
" 0.716951 | \n",
" 0.278640 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.498777 | \n",
" 0.814257 | \n",
" 0.592014 | \n",
"
\n",
" \n",
" max | \n",
" 0.949690 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"count 10.000000 10.000000 10.000000\n",
"mean 0.361461 0.576988 0.376016\n",
"std 0.288804 0.314204 0.296958\n",
"min 0.007229 0.127934 0.064778\n",
"25% 0.130821 0.294724 0.167322\n",
"50% 0.386559 0.716951 0.278640\n",
"75% 0.498777 0.814257 0.592014\n",
"max 0.949690 0.989116 0.918437"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"id": "F5Dk5Je-L8Yv",
"outputId": "bbdc29a5-2759-4558-c686-d963066a8ca2"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.394010 | \n",
" 0.165987 | \n",
" 0.323068 | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.127934 | \n",
" 0.234211 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.949690 | \n",
" 0.278251 | \n",
" 0.212217 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" 0.132577 | \n",
" 0.709453 | \n",
" 0.094416 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.007229 | \n",
" 0.830292 | \n",
" 0.152357 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.130236 | \n",
" 0.724449 | \n",
" 0.674393 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-01 0.394010 0.165987 0.323068\n",
"2016-01-02 0.464698 0.127934 0.234211\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-05 0.379108 0.344144 0.344878\n",
"2016-01-06 0.949690 0.278251 0.212217\n",
"2016-01-07 0.062404 0.771874 0.064778\n",
"2016-01-08 0.132577 0.709453 0.094416\n",
"2016-01-09 0.007229 0.830292 0.152357\n",
"2016-01-10 0.130236 0.724449 0.674393"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_index(axis=0, ascending=True,) # inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"id": "ZdzzGZMpL8Yw",
"outputId": "92c40e02-85a3-4d56-fcb6-e939500c43fa"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.394010 | \n",
" 0.165987 | \n",
" 0.323068 | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.127934 | \n",
" 0.234211 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.949690 | \n",
" 0.278251 | \n",
" 0.212217 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" 0.132577 | \n",
" 0.709453 | \n",
" 0.094416 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.007229 | \n",
" 0.830292 | \n",
" 0.152357 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.130236 | \n",
" 0.724449 | \n",
" 0.674393 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-01 0.394010 0.165987 0.323068\n",
"2016-01-02 0.464698 0.127934 0.234211\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-04 0.510137 0.989116 0.918437\n",
"2016-01-05 0.379108 0.344144 0.344878\n",
"2016-01-06 0.949690 0.278251 0.212217\n",
"2016-01-07 0.062404 0.771874 0.064778\n",
"2016-01-08 0.132577 0.709453 0.094416\n",
"2016-01-09 0.007229 0.830292 0.152357\n",
"2016-01-10 0.130236 0.724449 0.674393"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[sorted(df.columns)]"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"id": "5RW7CbCQL8Yz",
"outputId": "0383cf8f-2de9-4239-a48b-dac3a105f9be"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column1 | \n",
" Column2 | \n",
" Column3 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-02 | \n",
" 0.464698 | \n",
" 0.127934 | \n",
" 0.234211 | \n",
"
\n",
" \n",
" 2016-01-01 | \n",
" 0.394010 | \n",
" 0.165987 | \n",
" 0.323068 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.949690 | \n",
" 0.278251 | \n",
" 0.212217 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.379108 | \n",
" 0.344144 | \n",
" 0.344878 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" 0.132577 | \n",
" 0.709453 | \n",
" 0.094416 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.130236 | \n",
" 0.724449 | \n",
" 0.674393 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.062404 | \n",
" 0.771874 | \n",
" 0.064778 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.584517 | \n",
" 0.828384 | \n",
" 0.741401 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.007229 | \n",
" 0.830292 | \n",
" 0.152357 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.510137 | \n",
" 0.989116 | \n",
" 0.918437 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column1 Column2 Column3\n",
"2016-01-02 0.464698 0.127934 0.234211\n",
"2016-01-01 0.394010 0.165987 0.323068\n",
"2016-01-06 0.949690 0.278251 0.212217\n",
"2016-01-05 0.379108 0.344144 0.344878\n",
"2016-01-08 0.132577 0.709453 0.094416\n",
"2016-01-10 0.130236 0.724449 0.674393\n",
"2016-01-07 0.062404 0.771874 0.064778\n",
"2016-01-03 0.584517 0.828384 0.741401\n",
"2016-01-09 0.007229 0.830292 0.152357\n",
"2016-01-04 0.510137 0.989116 0.918437"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by='Column2')"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"id": "yGX8iYskL8Y2"
},
"outputs": [],
"source": [
"dates1 = pd.date_range(\"20160101\", periods=6)\n",
"data1 = np.random.random((6,2))\n",
"column_names1 = ['ColumnA', 'ColumnB']\n",
"\n",
"dates2 = pd.date_range(\"20160104\", periods=7)\n",
"data2 = np.random.random((7,2))\n",
"column_names2 = ['ColumnC', 'ColumnD']\n",
"\n",
"df1 = pd.DataFrame(data1, index=dates1, columns=column_names1)\n",
"df2 = pd.DataFrame(data2, index=dates2, columns=column_names2)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {
"id": "FdexPwrQL8Y4",
"outputId": "ae9b61a2-4a09-4696-c0b9-9ad2479682ad"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB\n",
"2016-01-01 0.961191 0.214906\n",
"2016-01-02 0.562981 0.602063\n",
"2016-01-03 0.583841 0.367577\n",
"2016-01-04 0.562917 0.712821\n",
"2016-01-05 0.794997 0.518708\n",
"2016-01-06 0.946344 0.972193"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {
"id": "yOJ_bMBRL8Y5",
"outputId": "b14eb2f3-19d9-4698-c924-cfde04f2bc18"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-04 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" 0.427601 | \n",
" 0.153653 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" 0.945375 | \n",
" 0.084841 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" 0.630937 | \n",
" 0.256707 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" 0.017057 | \n",
" 0.316170 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnC ColumnD\n",
"2016-01-04 0.017940 0.876961\n",
"2016-01-05 0.309962 0.768632\n",
"2016-01-06 0.273469 0.435210\n",
"2016-01-07 0.427601 0.153653\n",
"2016-01-08 0.945375 0.084841\n",
"2016-01-09 0.630937 0.256707\n",
"2016-01-10 0.017057 0.316170"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"id": "jymASUXyL8Y6",
"outputId": "73ca0b63-be54-4ddd-9597-01dcc5e0b216"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"2016-01-01 0.961191 0.214906 NaN NaN\n",
"2016-01-02 0.562981 0.602063 NaN NaN\n",
"2016-01-03 0.583841 0.367577 NaN NaN\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.join.html\n",
"df1.join(df2, how='left')"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"id": "dwi5DcXdL8Y-",
"outputId": "fb77c0dc-4b65-492a-95c8-c5685982c1b9"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" NaN | \n",
" NaN | \n",
" 0.427601 | \n",
" 0.153653 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" NaN | \n",
" NaN | \n",
" 0.945375 | \n",
" 0.084841 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" NaN | \n",
" NaN | \n",
" 0.630937 | \n",
" 0.256707 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" NaN | \n",
" NaN | \n",
" 0.017057 | \n",
" 0.316170 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210\n",
"2016-01-07 NaN NaN 0.427601 0.153653\n",
"2016-01-08 NaN NaN 0.945375 0.084841\n",
"2016-01-09 NaN NaN 0.630937 0.256707\n",
"2016-01-10 NaN NaN 0.017057 0.316170"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.join(df2, how='right')"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"id": "LehqC2xKL8Y_",
"outputId": "da540f4b-9ee8-49e9-a2fe-18c5939f1b32"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
" 2016-01-07 | \n",
" NaN | \n",
" NaN | \n",
" 0.427601 | \n",
" 0.153653 | \n",
"
\n",
" \n",
" 2016-01-08 | \n",
" NaN | \n",
" NaN | \n",
" 0.945375 | \n",
" 0.084841 | \n",
"
\n",
" \n",
" 2016-01-09 | \n",
" NaN | \n",
" NaN | \n",
" 0.630937 | \n",
" 0.256707 | \n",
"
\n",
" \n",
" 2016-01-10 | \n",
" NaN | \n",
" NaN | \n",
" 0.017057 | \n",
" 0.316170 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"2016-01-01 0.961191 0.214906 NaN NaN\n",
"2016-01-02 0.562981 0.602063 NaN NaN\n",
"2016-01-03 0.583841 0.367577 NaN NaN\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210\n",
"2016-01-07 NaN NaN 0.427601 0.153653\n",
"2016-01-08 NaN NaN 0.945375 0.084841\n",
"2016-01-09 NaN NaN 0.630937 0.256707\n",
"2016-01-10 NaN NaN 0.017057 0.316170"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.join(df2, how='outer')"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"id": "BnxIHD1wL8ZA",
"outputId": "a02abc4f-beec-4741-cbc1-179168bafadf"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.join(df2, how='inner')"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"id": "ypYiciNsL8ZC",
"outputId": "8ea83eee-e408-4fa2-fb6a-709be802654a"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
" ColumnA_df2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
" 1.562917 | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
" 1.794997 | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
" 1.946344 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD ColumnA_df2\n",
"2016-01-01 0.961191 0.214906 NaN NaN NaN\n",
"2016-01-02 0.562981 0.602063 NaN NaN NaN\n",
"2016-01-03 0.583841 0.367577 NaN NaN NaN\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961 1.562917\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632 1.794997\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210 1.946344"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2['ColumnA'] = df1.ColumnA+1 #Example when columns have the same name\n",
"\n",
"df1.join(df2, how='left', rsuffix='_df2')"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"id": "VKWLYtQyL8ZD"
},
"outputs": [],
"source": [
"del df2['ColumnA']\n",
"df3 = df1.join(df2)\n",
"\n",
"# add a column to df to group on\n",
"df3['ProfitLoss'] = pd.Series(['Profit', \n",
" 'Loss', \n",
" 'Profit', \n",
" 'Same', \n",
" 'Profit', \n",
" 'Loss', \n",
" 'Profit', \n",
" 'Profit', \n",
" 'Same', \n",
" 'Loss'], index=dates)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"id": "tVryLR1kL8ZF"
},
"outputs": [],
"source": [
"df3['Student'] = pd.Series(['Alex',\n",
" 'Alex',\n",
" 'Alex',\n",
" 'Marcos',\n",
" 'Hannah',\n",
" 'Hannah',\n",
" 'Marcos',\n",
" 'Hannah',\n",
" 'Hannah',\n",
" 'Barbara'], index=dates)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"id": "q9vjZfwcL8ZG",
"outputId": "50e5fb5e-fb40-469a-f4a2-3758b12b7039"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
" ProfitLoss | \n",
" Student | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
" NaN | \n",
" NaN | \n",
" Profit | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
" Loss | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
" Profit | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
" Same | \n",
" Marcos | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
" Profit | \n",
" Hannah | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
" Loss | \n",
" Hannah | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD ProfitLoss Student\n",
"2016-01-01 0.961191 0.214906 NaN NaN Profit Alex\n",
"2016-01-02 0.562981 0.602063 NaN NaN Loss Alex\n",
"2016-01-03 0.583841 0.367577 NaN NaN Profit Alex\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961 Same Marcos\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632 Profit Hannah\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210 Loss Hannah"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"id": "aQ5xKKMWL8ZI",
"outputId": "993632fa-1ffe-46bd-97b1-492300cc1dc0"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" ProfitLoss | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Loss | \n",
" 0.754662 | \n",
" 0.787128 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
" Profit | \n",
" 0.780010 | \n",
" 0.367064 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" Same | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"ProfitLoss \n",
"Loss 0.754662 0.787128 0.273469 0.435210\n",
"Profit 0.780010 0.367064 0.309962 0.768632\n",
"Same 0.562917 0.712821 0.017940 0.876961"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grupos = df3.groupby('ProfitLoss')#.mean()\n",
"grupos.mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Verifying Python's ordering heuristics"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"id": "bbIbVPYVL8ZJ",
"outputId": "7af165bc-e886-4aa8-b1c0-f3950152ba1f"
},
"outputs": [
{
"data": {
"text/plain": [
"'name2'"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max(['name1', 'name2', 'Name3'])"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"id": "lW_LdufBL8ZK",
"outputId": "930a5e51-ec3f-444b-8124-9de6bba0a0e9"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
"
\n",
" \n",
" Student | \n",
" ProfitLoss | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Alex | \n",
" Loss | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" Profit | \n",
" 0.961191 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" Hannah | \n",
" Loss | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
"
\n",
" \n",
" Profit | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
"
\n",
" \n",
" Marcos | \n",
" Same | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD\n",
"Student ProfitLoss \n",
"Alex Loss 0.562981 0.602063 NaN NaN\n",
" Profit 0.961191 0.367577 NaN NaN\n",
"Hannah Loss 0.946344 0.972193 0.273469 0.435210\n",
" Profit 0.794997 0.518708 0.309962 0.768632\n",
"Marcos Same 0.562917 0.712821 0.017940 0.876961"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4 = df3.groupby(['Student','ProfitLoss']).max()\n",
"df4"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"id": "Fer2LHsrL8ZM",
"outputId": "adbc1636-4d72-4b0a-bc3c-814812976fe0"
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Alex', 'Alex', 'Hannah', 'Hannah', 'Marcos'], dtype='object', name='Student')"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4.index.get_level_values('Student')"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"id": "aUM-BJMYL8ZN",
"outputId": "f992ff24-9751-40bd-ebd4-e0f9e948f1e8"
},
"outputs": [
{
"data": {
"text/plain": [
"0.7949966854120388"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4.loc[('Hannah','Profit'), 'ColumnA']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Pandas Useful Functions](https://pandas.pydata.org/pandas-docs/stable/reference/general_functions.html)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ColumnA | \n",
" ColumnB | \n",
" ColumnC | \n",
" ColumnD | \n",
" ProfitLoss | \n",
" Student | \n",
"
\n",
" \n",
" \n",
" \n",
" 2016-01-01 | \n",
" 0.961191 | \n",
" 0.214906 | \n",
" NaN | \n",
" NaN | \n",
" Profit | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-02 | \n",
" 0.562981 | \n",
" 0.602063 | \n",
" NaN | \n",
" NaN | \n",
" Loss | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-03 | \n",
" 0.583841 | \n",
" 0.367577 | \n",
" NaN | \n",
" NaN | \n",
" Profit | \n",
" Alex | \n",
"
\n",
" \n",
" 2016-01-04 | \n",
" 0.562917 | \n",
" 0.712821 | \n",
" 0.017940 | \n",
" 0.876961 | \n",
" Same | \n",
" Marcos | \n",
"
\n",
" \n",
" 2016-01-05 | \n",
" 0.794997 | \n",
" 0.518708 | \n",
" 0.309962 | \n",
" 0.768632 | \n",
" Profit | \n",
" Hannah | \n",
"
\n",
" \n",
" 2016-01-06 | \n",
" 0.946344 | \n",
" 0.972193 | \n",
" 0.273469 | \n",
" 0.435210 | \n",
" Loss | \n",
" Hannah | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ColumnA ColumnB ColumnC ColumnD ProfitLoss Student\n",
"2016-01-01 0.961191 0.214906 NaN NaN Profit Alex\n",
"2016-01-02 0.562981 0.602063 NaN NaN Loss Alex\n",
"2016-01-03 0.583841 0.367577 NaN NaN Profit Alex\n",
"2016-01-04 0.562917 0.712821 0.017940 0.876961 Same Marcos\n",
"2016-01-05 0.794997 0.518708 0.309962 0.768632 Profit Hannah\n",
"2016-01-06 0.946344 0.972193 0.273469 0.435210 Loss Hannah"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transpose"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 2016-01-01 | \n",
" 2016-01-02 | \n",
" 2016-01-03 | \n",
" 2016-01-04 | \n",
" 2016-01-05 | \n",
" 2016-01-06 | \n",
"
\n",
" \n",
" \n",
" \n",
" ColumnA | \n",
" 0.961191 | \n",
" 0.562981 | \n",
" 0.583841 | \n",
" 0.562917 | \n",
" 0.794997 | \n",
" 0.946344 | \n",
"
\n",
" \n",
" ColumnB | \n",
" 0.214906 | \n",
" 0.602063 | \n",
" 0.367577 | \n",
" 0.712821 | \n",
" 0.518708 | \n",
" 0.972193 | \n",
"
\n",
" \n",
" ColumnC | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0.01794 | \n",
" 0.309962 | \n",
" 0.273469 | \n",
"
\n",
" \n",
" ColumnD | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0.876961 | \n",
" 0.768632 | \n",
" 0.43521 | \n",
"
\n",
" \n",
" ProfitLoss | \n",
" Profit | \n",
" Loss | \n",
" Profit | \n",
" Same | \n",
" Profit | \n",
" Loss | \n",
"
\n",
" \n",
" Student | \n",
" Alex | \n",
" Alex | \n",
" Alex | \n",
" Marcos | \n",
" Hannah | \n",
" Hannah | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06\n",
"ColumnA 0.961191 0.562981 0.583841 0.562917 0.794997 0.946344\n",
"ColumnB 0.214906 0.602063 0.367577 0.712821 0.518708 0.972193\n",
"ColumnC NaN NaN NaN 0.01794 0.309962 0.273469\n",
"ColumnD NaN NaN NaN 0.876961 0.768632 0.43521\n",
"ProfitLoss Profit Loss Profit Same Profit Loss\n",
"Student Alex Alex Alex Marcos Hannah Hannah"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.T"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 2016-01-01 | \n",
" 2016-01-02 | \n",
" 2016-01-03 | \n",
" 2016-01-04 | \n",
" 2016-01-05 | \n",
" 2016-01-06 | \n",
"
\n",
" \n",
" \n",
" \n",
" ColumnA | \n",
" 0.961191 | \n",
" 0.562981 | \n",
" 0.583841 | \n",
" 0.562917 | \n",
" 0.794997 | \n",
" 0.946344 | \n",
"
\n",
" \n",
" ColumnB | \n",
" 0.214906 | \n",
" 0.602063 | \n",
" 0.367577 | \n",
" 0.712821 | \n",
" 0.518708 | \n",
" 0.972193 | \n",
"
\n",
" \n",
" ColumnC | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0.01794 | \n",
" 0.309962 | \n",
" 0.273469 | \n",
"
\n",
" \n",
" ColumnD | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 0.876961 | \n",
" 0.768632 | \n",
" 0.43521 | \n",
"
\n",
" \n",
" ProfitLoss | \n",
" Profit | \n",
" Loss | \n",
" Profit | \n",
" Same | \n",
" Profit | \n",
" Loss | \n",
"
\n",
" \n",
" Student | \n",
" Alex | \n",
" Alex | \n",
" Alex | \n",
" Marcos | \n",
" Hannah | \n",
" Hannah | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06\n",
"ColumnA 0.961191 0.562981 0.583841 0.562917 0.794997 0.946344\n",
"ColumnB 0.214906 0.602063 0.367577 0.712821 0.518708 0.972193\n",
"ColumnC NaN NaN NaN 0.01794 0.309962 0.273469\n",
"ColumnD NaN NaN NaN 0.876961 0.768632 0.43521\n",
"ProfitLoss Profit Loss Profit Same Profit Loss\n",
"Student Alex Alex Alex Marcos Hannah Hannah"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.transpose()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [idmin & idmax](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2016-01-01 00:00:00', freq='D')"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3['ColumnA'].idxmax()"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ColumnA 0.961191\n",
"ColumnB 0.214906\n",
"ColumnC NaN\n",
"ColumnD NaN\n",
"ProfitLoss Profit\n",
"Student Alex\n",
"Name: 2016-01-01 00:00:00, dtype: object"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.loc[df3['ColumnA'].idxmax()]"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9611911784461947"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max(df3['ColumnA'])"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2016-01-04 00:00:00', freq='D')"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3['ColumnA'].idxmin()"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ColumnA 0.562917\n",
"ColumnB 0.712821\n",
"ColumnC 0.01794\n",
"ColumnD 0.876961\n",
"ProfitLoss Same\n",
"Student Marcos\n",
"Name: 2016-01-04 00:00:00, dtype: object"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.loc[df3['ColumnA'].idxmin()]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5629166477301453"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"min(df3['ColumnA'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Not Equal - ne](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ne.html) "
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
"5 False\n",
"6 True\n",
"7 True\n",
"8 True\n",
"9 True\n",
"10 True\n",
"11 True\n",
"12 True\n",
"Name: x, dtype: bool"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame()\n",
"df['x'] = [0,0,0,0,0,0,1,2,3,4,5,6,7]\n",
"\n",
"df['x'].ne(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Nsmallest "
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 0\n",
"2 0\n",
"Name: x, dtype: int64"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['x'].nsmallest(3)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x\n",
"0 0\n",
"1 0\n",
"2 0"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.nsmallest(3, 'x')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Nlargest "
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12 7\n",
"11 6\n",
"10 5\n",
"Name: x, dtype: int64"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['x'].nlargest(3)"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" 12 | \n",
" 7 | \n",
"
\n",
" \n",
" 11 | \n",
" 6 | \n",
"
\n",
" \n",
" 10 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x\n",
"12 7\n",
"11 6\n",
"10 5"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.nlargest(3, 'x')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Is in "
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" True | \n",
"
\n",
" \n",
" 5 | \n",
" True | \n",
"
\n",
" \n",
" 6 | \n",
" False | \n",
"
\n",
" \n",
" 7 | \n",
" True | \n",
"
\n",
" \n",
" 8 | \n",
" False | \n",
"
\n",
" \n",
" 9 | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x\n",
"0 True\n",
"1 True\n",
"2 True\n",
"3 True\n",
"4 True\n",
"5 True\n",
"6 False\n",
"7 True\n",
"8 False\n",
"9 False"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isin([0,2]).head(10)"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 6 | \n",
" NaN | \n",
"
\n",
" \n",
" 7 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 8 | \n",
" NaN | \n",
"
\n",
" \n",
" 9 | \n",
" NaN | \n",
"
\n",
" \n",
" 10 | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" NaN | \n",
"
\n",
" \n",
" 12 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x\n",
"0 0.0\n",
"1 0.0\n",
"2 0.0\n",
"3 0.0\n",
"4 0.0\n",
"5 0.0\n",
"6 NaN\n",
"7 2.0\n",
"8 NaN\n",
"9 NaN\n",
"10 NaN\n",
"11 NaN\n",
"12 NaN"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.isin([0,2])] #.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" 6 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 8 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 10 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 11 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 12 | \n",
" 7.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x\n",
"6 1.0\n",
"8 3.0\n",
"9 4.0\n",
"10 5.0\n",
"11 6.0\n",
"12 7.0"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[~df.isin([0,2])].dropna()"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Test | \n",
" Student | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" T2 | \n",
" Student 1 | \n",
" 54 | \n",
" 75 | \n",
" 66 | \n",
" 99 | \n",
" 32 | \n",
"
\n",
" \n",
" 1 | \n",
" T2 | \n",
" Student 2 | \n",
" 29 | \n",
" 91 | \n",
" 96 | \n",
" 69 | \n",
" 58 | \n",
"
\n",
" \n",
" 2 | \n",
" T1 | \n",
" Student 3 | \n",
" 28 | \n",
" 10 | \n",
" 22 | \n",
" 33 | \n",
" 25 | \n",
"
\n",
" \n",
" 3 | \n",
" T2 | \n",
" Student 4 | \n",
" 29 | \n",
" 23 | \n",
" 41 | \n",
" 0 | \n",
" 42 | \n",
"
\n",
" \n",
" 4 | \n",
" T3 | \n",
" Student 5 | \n",
" 40 | \n",
" 71 | \n",
" 45 | \n",
" 42 | \n",
" 87 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Test Student Maths Physics Chemistry Biology Computer_Science\n",
"0 T2 Student 1 54 75 66 99 32\n",
"1 T2 Student 2 29 91 96 69 58\n",
"2 T1 Student 3 28 10 22 33 25\n",
"3 T2 Student 4 29 23 41 0 42\n",
"4 T3 Student 5 40 71 45 42 87"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create a test dataframe\n",
"# Untidy dataframe\n",
"# x : Subjects\n",
"# y : Student names\n",
"\n",
"marks = pd.DataFrame(np.random.randint(0, 100, size = (20,5)), \n",
" columns = ['Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science'])\n",
"\n",
"marks['Student'] = ['Student ' + str(i) for i in range(1,21)]\n",
"marks['Test'] = np.random.choice(['T1', 'T2', 'T3'], size=len(marks))\n",
"marks = marks[['Test','Student','Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science']]\n",
"\n",
"display(marks.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Agg](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.agg.html)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" sum | \n",
" 994.0 | \n",
" 1084.0 | \n",
" 818.0 | \n",
" 1121.00 | \n",
" 999.00 | \n",
"
\n",
" \n",
" min | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.00 | \n",
" 11.00 | \n",
"
\n",
" \n",
" mean | \n",
" 49.7 | \n",
" 54.2 | \n",
" 40.9 | \n",
" 56.05 | \n",
" 49.95 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Maths Physics Chemistry Biology Computer_Science\n",
"sum 994.0 1084.0 818.0 1121.00 999.00\n",
"min 5.0 0.0 0.0 0.00 11.00\n",
"mean 49.7 54.2 40.9 56.05 49.95"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"marks[['Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science']].agg(['sum', 'min', 'mean'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Group By"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"grouped = marks.groupby('Test')"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T1\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Test | \n",
" Student | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" T1 | \n",
" Student 3 | \n",
" 28 | \n",
" 10 | \n",
" 22 | \n",
" 33 | \n",
" 25 | \n",
"
\n",
" \n",
" 5 | \n",
" T1 | \n",
" Student 6 | \n",
" 43 | \n",
" 50 | \n",
" 48 | \n",
" 95 | \n",
" 71 | \n",
"
\n",
" \n",
" 6 | \n",
" T1 | \n",
" Student 7 | \n",
" 99 | \n",
" 97 | \n",
" 0 | \n",
" 12 | \n",
" 73 | \n",
"
\n",
" \n",
" 10 | \n",
" T1 | \n",
" Student 11 | \n",
" 85 | \n",
" 3 | \n",
" 95 | \n",
" 50 | \n",
" 55 | \n",
"
\n",
" \n",
" 11 | \n",
" T1 | \n",
" Student 12 | \n",
" 97 | \n",
" 93 | \n",
" 22 | \n",
" 63 | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Test Student Maths Physics Chemistry Biology Computer_Science\n",
"2 T1 Student 3 28 10 22 33 25\n",
"5 T1 Student 6 43 50 48 95 71\n",
"6 T1 Student 7 99 97 0 12 73\n",
"10 T1 Student 11 85 3 95 50 55\n",
"11 T1 Student 12 97 93 22 63 30"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"T2\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Test | \n",
" Student | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" T2 | \n",
" Student 1 | \n",
" 54 | \n",
" 75 | \n",
" 66 | \n",
" 99 | \n",
" 32 | \n",
"
\n",
" \n",
" 1 | \n",
" T2 | \n",
" Student 2 | \n",
" 29 | \n",
" 91 | \n",
" 96 | \n",
" 69 | \n",
" 58 | \n",
"
\n",
" \n",
" 3 | \n",
" T2 | \n",
" Student 4 | \n",
" 29 | \n",
" 23 | \n",
" 41 | \n",
" 0 | \n",
" 42 | \n",
"
\n",
" \n",
" 7 | \n",
" T2 | \n",
" Student 8 | \n",
" 17 | \n",
" 0 | \n",
" 72 | \n",
" 75 | \n",
" 17 | \n",
"
\n",
" \n",
" 9 | \n",
" T2 | \n",
" Student 10 | \n",
" 15 | \n",
" 94 | \n",
" 55 | \n",
" 69 | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Test Student Maths Physics Chemistry Biology Computer_Science\n",
"0 T2 Student 1 54 75 66 99 32\n",
"1 T2 Student 2 29 91 96 69 58\n",
"3 T2 Student 4 29 23 41 0 42\n",
"7 T2 Student 8 17 0 72 75 17\n",
"9 T2 Student 10 15 94 55 69 45"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"T3\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Test | \n",
" Student | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" T3 | \n",
" Student 5 | \n",
" 40 | \n",
" 71 | \n",
" 45 | \n",
" 42 | \n",
" 87 | \n",
"
\n",
" \n",
" 8 | \n",
" T3 | \n",
" Student 9 | \n",
" 75 | \n",
" 23 | \n",
" 1 | \n",
" 65 | \n",
" 38 | \n",
"
\n",
" \n",
" 12 | \n",
" T3 | \n",
" Student 13 | \n",
" 92 | \n",
" 88 | \n",
" 58 | \n",
" 18 | \n",
" 54 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Test Student Maths Physics Chemistry Biology Computer_Science\n",
"4 T3 Student 5 40 71 45 42 87\n",
"8 T3 Student 9 75 23 1 65 38\n",
"12 T3 Student 13 92 88 58 18 54"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for group in grouped:\n",
" print(group[0])\n",
" display(group[1].head())"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Test\n",
"T1 55.111111\n",
"T2 36.375000\n",
"T3 69.000000\n",
"Name: Maths, dtype: float64"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped['Maths'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Test Student\n",
"0 T1 9\n",
"1 T2 8\n",
"2 T3 3\n"
]
}
],
"source": [
"grouped2 = grouped.agg({\"Student\": \"nunique\"})\n",
"grouped2 = grouped2.reset_index()\n",
"print(grouped2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pivot"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"id": "h7UmJLl8L8Xs",
"outputId": "7c07a5fa-bbc7-4e52-c64f-b30109932018"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" Student | \n",
" Student 1 | \n",
" Student 10 | \n",
" Student 11 | \n",
" Student 12 | \n",
" Student 13 | \n",
" Student 14 | \n",
" Student 15 | \n",
" Student 16 | \n",
" Student 17 | \n",
" Student 18 | \n",
" Student 19 | \n",
" Student 2 | \n",
" Student 20 | \n",
" Student 3 | \n",
" Student 4 | \n",
" Student 5 | \n",
" Student 6 | \n",
" Student 7 | \n",
" Student 8 | \n",
" Student 9 | \n",
"
\n",
" \n",
" Test | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" T1 | \n",
" NaN | \n",
" NaN | \n",
" 95.0 | \n",
" 22.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 43.0 | \n",
" 4.0 | \n",
" 14.0 | \n",
" 53.0 | \n",
" NaN | \n",
" NaN | \n",
" 22.0 | \n",
" NaN | \n",
" NaN | \n",
" 48.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" T2 | \n",
" 66.0 | \n",
" 55.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 31.0 | \n",
" 25.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 96.0 | \n",
" 27.0 | \n",
" NaN | \n",
" 41.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 72.0 | \n",
" NaN | \n",
"
\n",
" \n",
" T3 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 58.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 45.0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Student Student 1 Student 10 Student 11 Student 12 Student 13 \\\n",
"Test \n",
"T1 NaN NaN 95.0 22.0 NaN \n",
"T2 66.0 55.0 NaN NaN NaN \n",
"T3 NaN NaN NaN NaN 58.0 \n",
"\n",
"Student Student 14 Student 15 Student 16 Student 17 Student 18 \\\n",
"Test \n",
"T1 NaN NaN 43.0 4.0 14.0 \n",
"T2 31.0 25.0 NaN NaN NaN \n",
"T3 NaN NaN NaN NaN NaN \n",
"\n",
"Student Student 19 Student 2 Student 20 Student 3 Student 4 Student 5 \\\n",
"Test \n",
"T1 53.0 NaN NaN 22.0 NaN NaN \n",
"T2 NaN 96.0 27.0 NaN 41.0 NaN \n",
"T3 NaN NaN NaN NaN NaN 45.0 \n",
"\n",
"Student Student 6 Student 7 Student 8 Student 9 \n",
"Test \n",
"T1 48.0 0.0 NaN NaN \n",
"T2 NaN NaN 72.0 NaN \n",
"T3 NaN NaN NaN 1.0 "
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pivot = marks.pivot(index='Test', columns='Student', values='Chemistry') \n",
"pivot.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [Melt](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.melt.html)"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Test | \n",
" Student | \n",
" Maths | \n",
" Physics | \n",
" Chemistry | \n",
" Biology | \n",
" Computer_Science | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" T2 | \n",
" Student 1 | \n",
" 54 | \n",
" 75 | \n",
" 66 | \n",
" 99 | \n",
" 32 | \n",
"
\n",
" \n",
" 1 | \n",
" T2 | \n",
" Student 2 | \n",
" 29 | \n",
" 91 | \n",
" 96 | \n",
" 69 | \n",
" 58 | \n",
"
\n",
" \n",
" 2 | \n",
" T1 | \n",
" Student 3 | \n",
" 28 | \n",
" 10 | \n",
" 22 | \n",
" 33 | \n",
" 25 | \n",
"
\n",
" \n",
" 3 | \n",
" T2 | \n",
" Student 4 | \n",
" 29 | \n",
" 23 | \n",
" 41 | \n",
" 0 | \n",
" 42 | \n",
"
\n",
" \n",
" 4 | \n",
" T3 | \n",
" Student 5 | \n",
" 40 | \n",
" 71 | \n",
" 45 | \n",
" 42 | \n",
" 87 | \n",
"
\n",
" \n",
" 5 | \n",
" T1 | \n",
" Student 6 | \n",
" 43 | \n",
" 50 | \n",
" 48 | \n",
" 95 | \n",
" 71 | \n",
"
\n",
" \n",
" 6 | \n",
" T1 | \n",
" Student 7 | \n",
" 99 | \n",
" 97 | \n",
" 0 | \n",
" 12 | \n",
" 73 | \n",
"
\n",
" \n",
" 7 | \n",
" T2 | \n",
" Student 8 | \n",
" 17 | \n",
" 0 | \n",
" 72 | \n",
" 75 | \n",
" 17 | \n",
"
\n",
" \n",
" 8 | \n",
" T3 | \n",
" Student 9 | \n",
" 75 | \n",
" 23 | \n",
" 1 | \n",
" 65 | \n",
" 38 | \n",
"
\n",
" \n",
" 9 | \n",
" T2 | \n",
" Student 10 | \n",
" 15 | \n",
" 94 | \n",
" 55 | \n",
" 69 | \n",
" 45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Test Student Maths Physics Chemistry Biology Computer_Science\n",
"0 T2 Student 1 54 75 66 99 32\n",
"1 T2 Student 2 29 91 96 69 58\n",
"2 T1 Student 3 28 10 22 33 25\n",
"3 T2 Student 4 29 23 41 0 42\n",
"4 T3 Student 5 40 71 45 42 87\n",
"5 T1 Student 6 43 50 48 95 71\n",
"6 T1 Student 7 99 97 0 12 73\n",
"7 T2 Student 8 17 0 72 75 17\n",
"8 T3 Student 9 75 23 1 65 38\n",
"9 T2 Student 10 15 94 55 69 45"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"marks.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Student | \n",
" variable | \n",
" Frequency | \n",
"
\n",
" \n",
" \n",
" \n",
" 110 | \n",
" Student 11 | \n",
" Computer_Science | \n",
" 55 | \n",
"
\n",
" \n",
" 111 | \n",
" Student 12 | \n",
" Computer_Science | \n",
" 30 | \n",
"
\n",
" \n",
" 112 | \n",
" Student 13 | \n",
" Computer_Science | \n",
" 54 | \n",
"
\n",
" \n",
" 113 | \n",
" Student 14 | \n",
" Computer_Science | \n",
" 74 | \n",
"
\n",
" \n",
" 114 | \n",
" Student 15 | \n",
" Computer_Science | \n",
" 11 | \n",
"
\n",
" \n",
" 115 | \n",
" Student 16 | \n",
" Computer_Science | \n",
" 61 | \n",
"
\n",
" \n",
" 116 | \n",
" Student 17 | \n",
" Computer_Science | \n",
" 18 | \n",
"
\n",
" \n",
" 117 | \n",
" Student 18 | \n",
" Computer_Science | \n",
" 31 | \n",
"
\n",
" \n",
" 118 | \n",
" Student 19 | \n",
" Computer_Science | \n",
" 79 | \n",
"
\n",
" \n",
" 119 | \n",
" Student 20 | \n",
" Computer_Science | \n",
" 98 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Student variable Frequency\n",
"110 Student 11 Computer_Science 55\n",
"111 Student 12 Computer_Science 30\n",
"112 Student 13 Computer_Science 54\n",
"113 Student 14 Computer_Science 74\n",
"114 Student 15 Computer_Science 11\n",
"115 Student 16 Computer_Science 61\n",
"116 Student 17 Computer_Science 18\n",
"117 Student 18 Computer_Science 31\n",
"118 Student 19 Computer_Science 79\n",
"119 Student 20 Computer_Science 98"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tidy = pd.melt(marks, id_vars = 'Student', value_name = 'Frequency')\n",
"tidy.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 120 entries, 0 to 119\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Student 120 non-null object\n",
" 1 variable 120 non-null object\n",
" 2 Frequency 120 non-null object\n",
"dtypes: object(3)\n",
"memory usage: 2.9+ KB\n"
]
}
],
"source": [
"tidy.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"name": "15_Pandas_Basics.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}