{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "cEr06C20L8WK" }, "source": [ "# Introduction to Python \n", "\n", "## Pandas Intro\n", "\n", "Some examples from [here](https://towardsdatascience.com/40-examples-to-master-pandas-c69d058f434e)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "WmxnkxF8L8WL" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "Vv-0YZfBL8WP" }, "outputs": [], "source": [ "datapath = \"../Data/\"" ] }, { "cell_type": "markdown", "metadata": { "id": "1F6OvLAcL8WR" }, "source": [ "## Pandas Data Structures: [Series](https://pandas.pydata.org/pandas-docs/stable/reference/series.html)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating Series" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "JryIewnkL8WS", "outputId": "a06421dc-4c4a-4ebc-ac41-dc3e87eebed3" }, "outputs": [ { "data": { "text/plain": [ "0 4\n", "1 12\n", "2 -5\n", "3 3\n", "4 5\n", "dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj = pd.Series([4, 12, -5, 3, 5])\n", "obj" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#dir(obj)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Exporting to Numpy array" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "r9li9Ii-L8WU", "outputId": "e63fde87-980d-4b2b-b26e-bfb3c1acaf59" }, "outputs": [ { "data": { "text/plain": [ "array([ 4, 12, -5, 3, 5])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj.values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Examining the index object" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "wjI6Wts0L8WX", "outputId": "c4980045-c9b8-4a0b-bc85-a87dd7d99e39" }, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=5, step=1)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Redefining the index" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "yHqJC6b8L8WZ", "outputId": "a9373822-231c-4c2d-9241-9a4c6edb0cd1" }, "outputs": [ { "data": { "text/plain": [ "Bob 4\n", "Steve 12\n", "Jeff -5\n", "Ryan 3\n", "Fernie 5\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan', 'Fernie']\n", "obj" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating series and passing the inidex as parameter" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "jI-k8rRGL8Wc", "outputId": "4f37ef09-5899-486c-fdbe-b80cf2663242" }, "outputs": [ { "data": { "text/plain": [ "d 4\n", "b 7\n", "a -5\n", "c 3\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])\n", "obj2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Slicing and accessing elements in the series" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj2['c']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "u8e_SNzcL8Wj", "outputId": "4ddd882c-d8bc-4d21-ff39-66e76ba47e4b" }, "outputs": [ { "data": { "text/plain": [ "c 3\n", "a -5\n", "d 4\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj2[['c', 'a', 'd']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Slicing using boolean expressions" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "2Q-BkxadL8Wm", "outputId": "15badfe1-934c-4b24-a151-247bcd4971e1" }, "outputs": [ { "data": { "text/plain": [ "a -5\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj2[obj2 < 0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Scalar operations with the series" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "LhjvPFD8L8Wo", "outputId": "b37a6c1f-5a8f-46bc-cdab-0e3faaba1bfa" }, "outputs": [ { "data": { "text/plain": [ "d 8\n", "b 14\n", "a -10\n", "c 6\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj2 * 2" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "51z3zmZYL8Ws", "outputId": "12b72973-26a1-450f-952d-f27c5d1c11dc" }, "outputs": [ { "data": { "text/plain": [ "d 54.598150\n", "b 1096.633158\n", "a 0.006738\n", "c 20.085537\n", "dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.exp(obj2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating Series from dictionaries" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "CoqYtDi2L8Wu", "outputId": "81291bfc-271b-4c77-eacd-406338fd1e88" }, "outputs": [ { "data": { "text/plain": [ "Ohio 35000\n", "Texas 71000\n", "Oregon 16000\n", "Utah 5000\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n", "obj3 = pd.Series(sdata)\n", "obj3" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "U3ldGKqTL8Wx", "outputId": "eb244782-419e-44e8-f216-1a000c3779d7" }, "outputs": [ { "data": { "text/plain": [ "California NaN\n", "Ohio 35000.0\n", "Oregon 16000.0\n", "Texas 71000.0\n", "dtype: float64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "states = ['California', 'Ohio', 'Oregon', 'Texas']\n", "\n", "obj4 = pd.Series(sdata, index=states)\n", "obj4" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "isnull() and notnull() methods" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "FGYJqsm8L8Wz", "outputId": "a45c5aa6-a07e-449b-e416-1fd7138006b2" }, "outputs": [ { "data": { "text/plain": [ "California True\n", "Ohio False\n", "Oregon False\n", "Texas False\n", "dtype: bool" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.isnull(obj4)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "RZrJBBM-L8W2", "outputId": "08a78774-20bb-473e-ba38-3c68b1a3db98" }, "outputs": [ { "data": { "text/plain": [ "California False\n", "Ohio True\n", "Oregon True\n", "Texas True\n", "dtype: bool" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.notnull(obj4)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "id": "6pWvwb46L8W8", "outputId": "5471c51a-de44-4e99-d746-e88aa00d4a28" }, "outputs": [ { "data": { "text/plain": [ "California NaN\n", "Ohio 70000.0\n", "Oregon 32000.0\n", "Texas 142000.0\n", "Utah 5010.0\n", "dtype: float64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj3.add(obj4, fill_value=10)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "d6_O2TahL8W-", "outputId": "cbc3d183-f1dd-4f76-dac8-9a635910bc09" }, "outputs": [ { "data": { "text/plain": [ "State\n", "California NaN\n", "Ohio 35000.0\n", "Oregon 16000.0\n", "Texas 71000.0\n", "Name: Population, dtype: float64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "obj4.name = 'Population'\n", "obj4.index.name = 'State'\n", "obj4" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pandas Data Structures: [Date Time Range](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify start and end, with the default daily frequency." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n", " '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_idx1 = pd.date_range(start='1/1/2018', end='1/08/2018')\n", "ts_idx1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify start and periods, the number of periods (days)." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n", " '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n", " '2016-01-09', '2016-01-10'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_idx2 = pd.date_range(\"20160101\", periods=10, freq='D')\n", "ts_idx2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify end and periods, the number of periods (days)." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',\n", " '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_idx3 = pd.date_range(end='1/1/2018', periods=8)\n", "ts_idx3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify start, end, and periods; the frequency is generated automatically (linearly spaced)." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',\n", " '2018-04-27 00:00:00'],\n", " dtype='datetime64[ns]', freq=None)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_idx4 = pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)\n", "ts_idx4" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Other parameters" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',\n", " '2018-05-31'],\n", " dtype='datetime64[ns]', freq='M')" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='1/1/2018', periods=5, freq='M')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',\n", " '2019-01-31'],\n", " dtype='datetime64[ns]', freq='3M')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='1/1/2018', periods=5, freq='3M')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-02 00:00:00+00:00',\n", " '2018-01-03 00:00:00+00:00', '2018-01-04 00:00:00+00:00',\n", " '2018-01-05 00:00:00+00:00'],\n", " dtype='datetime64[ns, Europe/Lisbon]', freq='D')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='1/1/2018', periods=5, tz='Europe/Lisbon')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='2017-01-01', end='2017-01-04', inclusive=None)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left')" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')" ] }, { "cell_type": "markdown", "metadata": { "id": "VSkGDQr5L8XA" }, "source": [ "## Pandas Data Structures: [Dataframe](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating Dataframe from a dictionary of lists" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "id": "JJBQnbqDL8XA", "outputId": "7b6e7083-65ae-471f-ac54-1e19dd42f896" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateyearpop
0Ohio20001.5
1Ohio20011.7
2Ohio20023.6
3Nevada20012.4
4Nevada20022.9
\n", "
" ], "text/plain": [ " state year pop\n", "0 Ohio 2000 1.5\n", "1 Ohio 2001 1.7\n", "2 Ohio 2002 3.6\n", "3 Nevada 2001 2.4\n", "4 Nevada 2002 2.9" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n", " 'year': [2000, 2001, 2002, 2001, 2002],\n", " 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}\n", "\n", "frame = pd.DataFrame(data)\n", "frame" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "#dir(frame)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(frame))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(frame['state']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specifying the names of columns" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "id": "GlmmkIaQL8XC", "outputId": "fccf280c-9a02-4bf1-f9ec-70e022c6d442" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepop
02000Ohio1.5
12001Ohio1.7
22002Ohio3.6
32001Nevada2.4
42002Nevada2.9
\n", "
" ], "text/plain": [ " year state pop\n", "0 2000 Ohio 1.5\n", "1 2001 Ohio 1.7\n", "2 2002 Ohio 3.6\n", "3 2001 Nevada 2.4\n", "4 2002 Nevada 2.9" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = pd.DataFrame(data, columns=['year', 'state', 'pop'])\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Making a column as the index" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "id": "dHHFSDK5L8XE" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepop
year
20002000Ohio1.5
20012001Ohio1.7
20022002Ohio3.6
20012001Nevada2.4
20022002Nevada2.9
\n", "
" ], "text/plain": [ " year state pop\n", "year \n", "2000 2000 Ohio 1.5\n", "2001 2001 Ohio 1.7\n", "2002 2002 Ohio 3.6\n", "2001 2001 Nevada 2.4\n", "2002 2002 Nevada 2.9" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.set_index('year', inplace=True, drop=False)\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dropping a column" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2000Ohio1.5
2001Ohio1.7
2002Ohio3.6
2001Nevada2.4
2002Nevada2.9
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2000 Ohio 1.5\n", "2001 Ohio 1.7\n", "2002 Ohio 3.6\n", "2001 Nevada 2.4\n", "2002 Nevada 2.9" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.drop('year', axis=1, inplace=True)\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dropping one or some lines" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "id": "_m00ehvGL8XH", "outputId": "a292d2bf-dcf5-4fb5-ced4-60b34cd172d4" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2000Ohio1.5
2001Ohio1.7
2002Ohio3.6
2001Nevada2.4
2002Nevada2.9
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2000 Ohio 1.5\n", "2001 Ohio 1.7\n", "2002 Ohio 3.6\n", "2001 Nevada 2.4\n", "2002 Nevada 2.9" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.drop(2000, axis=0, inplace=False)\n", "d" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "id": "AdSUXcgwL8XJ", "outputId": "6b161b59-0870-4cb0-cee9-e533d51b65df" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2001Ohio1.7
2002Ohio3.6
2001Nevada2.4
2002Nevada2.9
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2001 Ohio 1.7\n", "2002 Ohio 3.6\n", "2001 Nevada 2.4\n", "2002 Nevada 2.9" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.drop(2000, axis=0, inplace=True)\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Slicing by column / index" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "year\n", "2001 1.7\n", "2002 3.6\n", "2001 2.4\n", "2002 2.9\n", "Name: pop, dtype: float64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d['pop']" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "year\n", "2001 1.7\n", "2002 3.6\n", "2001 2.4\n", "2002 2.9\n", "Name: pop, dtype: float64" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.loc[:,'pop']" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2001Ohio1.7
2001Nevada2.4
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2001 Ohio 1.7\n", "2001 Nevada 2.4" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.loc[2001]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2001Ohio1.7
2001Nevada2.4
2002Ohio3.6
2002Nevada2.9
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2001 Ohio 1.7\n", "2001 Nevada 2.4\n", "2002 Ohio 3.6\n", "2002 Nevada 2.9" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.loc[[2001,2002], [\"state\", \"pop\"]]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pop
year
20011.7
20023.6
\n", "
" ], "text/plain": [ " pop\n", "year \n", "2001 1.7\n", "2002 3.6" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.iloc[[0,1],[1]]" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2001Ohio1.7
2002Ohio3.6
2001Nevada2.4
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2001 Ohio 1.7\n", "2002 Ohio 3.6\n", "2001 Nevada 2.4" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.iloc[0:3,[0,1]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Accessing index and columns objects" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([2001, 2002, 2001, 2002], dtype='int64', name='year')" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.index" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "id": "1bEokh0LL8XZ", "outputId": "fcba92bf-2184-4542-80d3-457b1ee8b365" }, "outputs": [ { "data": { "text/plain": [ "Index(['state', 'pop'], dtype='object')" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using alternate notation to access columns as a property" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "id": "6ehEuDNOL8Xb", "outputId": "98660696-6174-4d3c-8084-ff3c967af7c4" }, "outputs": [ { "data": { "text/plain": [ "year\n", "2001 Ohio\n", "2002 Ohio\n", "2001 Nevada\n", "2002 Nevada\n", "Name: state, dtype: object" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d['state']" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "id": "8fwIym5IL8Xe", "outputId": "77ca9eb7-91d7-49f9-9c8a-b13b625c6dc2" }, "outputs": [ { "data": { "text/plain": [ "year\n", "2001 Ohio\n", "2002 Ohio\n", "2001 Nevada\n", "2002 Nevada\n", "Name: state, dtype: object" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.state" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Slicing with boolean conditions (&, |, ==, !=)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
statepop
year
2001Ohio1.7
2002Ohio3.6
2001Nevada2.4
\n", "
" ], "text/plain": [ " state pop\n", "year \n", "2001 Ohio 1.7\n", "2002 Ohio 3.6\n", "2001 Nevada 2.4" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = d[(d[\"state\"] != 'Nevada') | (d.index != 2002)]\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating Dataframes from existing structures" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "id": "pHmbuPv5L8XK", "outputId": "9dbd2851-aae4-46e0-8571-02b02ab410b2" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebt
one2000Ohio1.5NaN
two2001Ohio1.7NaN
three2002Ohio3.6NaN
four2001Nevada2.4NaN
five2002Nevada2.9NaN
\n", "
" ], "text/plain": [ " year state pop debt\n", "one 2000 Ohio 1.5 NaN\n", "two 2001 Ohio 1.7 NaN\n", "three 2002 Ohio 3.6 NaN\n", "four 2001 Nevada 2.4 NaN\n", "five 2002 Nevada 2.9 NaN" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2 = pd.DataFrame(data, \n", " columns=['year', 'state', 'pop', 'debt'],\n", " index=['one', 'two', 'three', 'four', 'five'])\n", "frame2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating new column" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "id": "dUocNBSbL8XN", "outputId": "abe4c48e-142a-4162-fde8-b4db4b63b3e6" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebtnew
one2000Ohio1.5NaN13
two2001Ohio1.7NaN13
three2002Ohio3.6NaN13
four2001Nevada2.4NaN13
five2002Nevada2.9NaN13
\n", "
" ], "text/plain": [ " year state pop debt new\n", "one 2000 Ohio 1.5 NaN 13\n", "two 2001 Ohio 1.7 NaN 13\n", "three 2002 Ohio 3.6 NaN 13\n", "four 2001 Nevada 2.4 NaN 13\n", "five 2002 Nevada 2.9 NaN 13" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2['new'] = 13\n", "frame2" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "id": "-jAr37_vL8XT", "outputId": "ac4f4191-3cfb-46f9-b2ac-0c0e2de61805" }, "outputs": [ { "data": { "text/plain": [ "year 2002\n", "state Ohio\n", "pop 3.6\n", "debt NaN\n", "new 13\n", "Name: three, dtype: object" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2.loc['three']" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.6" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2.loc['three', 'pop']" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "id": "thLPV8M9L8XV", "outputId": "a87def93-007b-475b-9277-3fa007e792ac" }, "outputs": [ { "data": { "text/plain": [ "year 2002\n", "state Ohio\n", "pop 3.6\n", "debt NaN\n", "new 13\n", "Name: three, dtype: object" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2.iloc[2]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Ohio'" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2.iloc[2,1]" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "id": "oriaA7V8L8Xx", "outputId": "a23d458f-b216-4a7f-f0ae-d68d0cdaffc3" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
2000NaN1.5
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 NaN 1.5" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop = {'Nevada': {2001: 2.4, 2002: 2.9},'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n", "\n", "frame3 = pd.DataFrame(pop)\n", "frame3" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "id": "1IznlSKvL8Xi", "outputId": "69d179ef-9fd5-4653-df34-5ca596eab8ed" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebtnew
one2000Ohio1.516.513
two2001Ohio1.716.513
three2002Ohio3.616.513
four2001Nevada2.416.513
five2002Nevada2.916.513
\n", "
" ], "text/plain": [ " year state pop debt new\n", "one 2000 Ohio 1.5 16.5 13\n", "two 2001 Ohio 1.7 16.5 13\n", "three 2002 Ohio 3.6 16.5 13\n", "four 2001 Nevada 2.4 16.5 13\n", "five 2002 Nevada 2.9 16.5 13" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2['debt'] = 16.5\n", "frame2" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "id": "V3opjIPvL8Xk", "outputId": "e45c75dd-9c6a-4240-ceca-0185f443f86a" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebtnew
one2000Ohio1.50.013
two2001Ohio1.71.013
three2002Ohio3.62.013
four2001Nevada2.43.013
five2002Nevada2.94.013
\n", "
" ], "text/plain": [ " year state pop debt new\n", "one 2000 Ohio 1.5 0.0 13\n", "two 2001 Ohio 1.7 1.0 13\n", "three 2002 Ohio 3.6 2.0 13\n", "four 2001 Nevada 2.4 3.0 13\n", "five 2002 Nevada 2.9 4.0 13" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2['debt'] = np.arange(5.)\n", "frame2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a column from a series" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "id": "xmPu8JO7L8Xm", "outputId": "f74a2fa9-c4fe-4f8b-9d11-abcc4215743a" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebtnew
one2000Ohio1.5NaN13
two2001Ohio1.7-1.213
three2002Ohio3.6NaN13
four2001Nevada2.4-1.513
five2002Nevada2.9-1.713
\n", "
" ], "text/plain": [ " year state pop debt new\n", "one 2000 Ohio 1.5 NaN 13\n", "two 2001 Ohio 1.7 -1.2 13\n", "three 2002 Ohio 3.6 NaN 13\n", "four 2001 Nevada 2.4 -1.5 13\n", "five 2002 Nevada 2.9 -1.7 13" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])\n", "\n", "frame2['debt'] = val\n", "frame2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a column from a bolean expression" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "id": "B9qiCoeqL8Xo", "outputId": "9def766d-748d-4a61-e232-49518874f7a8" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebtneweastern
one2000Ohio1.5NaN13True
two2001Ohio1.7-1.213True
three2002Ohio3.6NaN13True
four2001Nevada2.4-1.513False
five2002Nevada2.9-1.713False
\n", "
" ], "text/plain": [ " year state pop debt new eastern\n", "one 2000 Ohio 1.5 NaN 13 True\n", "two 2001 Ohio 1.7 -1.2 13 True\n", "three 2002 Ohio 3.6 NaN 13 True\n", "four 2001 Nevada 2.4 -1.5 13 False\n", "five 2002 Nevada 2.9 -1.7 13 False" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame2['eastern'] = frame2.state == 'Ohio'\n", "frame2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deleting a column" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "id": "Pstsb7rTL8Xq", "outputId": "6ba15086-1cd2-49d3-a2fa-507418b722f8" }, "outputs": [ { "data": { "text/plain": [ "Index(['year', 'state', 'pop', 'debt', 'new'], dtype='object')" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "del frame2['eastern']\n", "frame2.columns" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "id": "7CTgyP5bL8X7", "outputId": "bcf2e719-8dc6-43e3-bc11-7448dbf2a7ff" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
2003NaNNaN
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2003 NaN NaN" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(pop, index=[2001, 2002, 2003])" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "id": "I9IZWJZML8X_", "outputId": "7d04cda7-166f-4273-9b75-e59cdee02ab5" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevada
20011.72.4
20023.62.9
\n", "
" ], "text/plain": [ " Ohio Nevada\n", "2001 1.7 2.4\n", "2002 3.6 2.9" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdata = {'Ohio': frame3['Ohio'][:-1],'Nevada': frame3['Nevada'][:2]}\n", "pd.DataFrame(pdata)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "id": "Rz0Y8hCkL8YB", "outputId": "aaeed6d6-1d33-4056-ebc8-738053b44311" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateNevadaOhio
year
20012.41.7
20022.93.6
2000NaN1.5
\n", "
" ], "text/plain": [ "state Nevada Ohio\n", "year \n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 NaN 1.5" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame3.index.name = 'year'\n", "frame3.columns.name = 'state'\n", "frame3" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "id": "1TFUFB4AL8YE", "outputId": "b673bbd5-fc17-4807-f52d-2fa781fc79fe" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
2000NaN1.5
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 NaN 1.5" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop = {'Nevada': {2001: 2.4, 2002: 2.9},'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n", "frame4 = pd.DataFrame(pop)\n", "frame4" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "id": "zJWgEm7zL8YF", "outputId": "8260ccc7-6032-4f1d-fec2-b49f22c237d1" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
20002.01.5
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 2.0 1.5" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame4.loc[2000,'Nevada'] = 2\n", "frame4" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "id": "iZToNmINL8YH", "outputId": "326e15ae-2fee-4666-d8cd-4f6290342429" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
20002.01.5
20012.432.0
20022.932.0
20002.032.0
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 2.0 1.5\n", "2001 2.4 32.0\n", "2002 2.9 32.0\n", "2000 2.0 32.0" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame5 = pd.concat([frame4, frame4], axis=0)\n", "frame5.iloc[3:,1] = 32\n", "frame5" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "id": "eVth2N0_L8YJ", "outputId": "0bd36e9c-581c-4bab-9141-4a5fa558124f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
20012.41.7
20022.93.6
20002.01.5
\n", "
" ], "text/plain": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2000 2.0 1.5" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame5.drop_duplicates(['Nevada'], inplace=True)\n", "frame5" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',\n", " '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n", " '2016-01-09', '2016-01-10'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dates = pd.date_range(\"20160101\", periods=10, freq='D')\n", "dates" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "id": "pCUFSR3ML8YL" }, "outputs": [], "source": [ "data = np.random.random((10,3))" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "id": "eX-2uqSsL8YR", "outputId": "e69d1950-84f6-455d-fbe2-0df36edb29b1" }, "outputs": [ { "data": { "text/plain": [ "array([[0.3940105 , 0.16598653, 0.32306814],\n", " [0.46469848, 0.12793369, 0.23421117],\n", " [0.58451733, 0.82838421, 0.74140121],\n", " [0.51013677, 0.9891163 , 0.91843721],\n", " [0.37910817, 0.34414402, 0.3448779 ],\n", " [0.9496902 , 0.27825066, 0.21221705],\n", " [0.06240444, 0.77187425, 0.06477811],\n", " [0.13257664, 0.70945323, 0.09441618],\n", " [0.0072288 , 0.83029155, 0.15235712],\n", " [0.1302358 , 0.72444933, 0.67439275]])" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "id": "SvDpOoEYL8YV", "outputId": "49779dd1-3142-4e38-c0ca-187e32d59929" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-010.3940100.1659870.323068
2016-01-020.4646980.1279340.234211
2016-01-030.5845170.8283840.741401
2016-01-040.5101370.9891160.918437
2016-01-050.3791080.3441440.344878
2016-01-060.9496900.2782510.212217
2016-01-070.0624040.7718740.064778
2016-01-080.1325770.7094530.094416
2016-01-090.0072290.8302920.152357
2016-01-100.1302360.7244490.674393
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-01 0.394010 0.165987 0.323068\n", "2016-01-02 0.464698 0.127934 0.234211\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-05 0.379108 0.344144 0.344878\n", "2016-01-06 0.949690 0.278251 0.212217\n", "2016-01-07 0.062404 0.771874 0.064778\n", "2016-01-08 0.132577 0.709453 0.094416\n", "2016-01-09 0.007229 0.830292 0.152357\n", "2016-01-10 0.130236 0.724449 0.674393" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "column_names = ['Column1', 'Column2', 'Column3']\n", "df = pd.DataFrame(data, index=dates, columns=column_names)\n", "df.head(10)" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "id": "zFJZeaMQL8Ya", "outputId": "92460c5c-a8dc-4b5f-fe1c-52133be01365" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-020.4646980.1279340.234211
2016-01-030.5845170.8283840.741401
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-02 0.464698 0.127934 0.234211\n", "2016-01-03 0.584517 0.828384 0.741401" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[1:3]" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "id": "45hwfY72L8Yc", "outputId": "8b30a088-d247-459c-ed20-cedb12c99326" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-040.5101370.9891160.918437
2016-01-050.3791080.3441440.344878
2016-01-060.9496900.2782510.212217
2016-01-070.0624040.7718740.064778
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-05 0.379108 0.344144 0.344878\n", "2016-01-06 0.949690 0.278251 0.212217\n", "2016-01-07 0.062404 0.771874 0.064778" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['20160104':'20160107']" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2016-01-01 0.165987\n", "2016-01-02 0.127934\n", "2016-01-03 0.828384\n", "2016-01-04 0.989116\n", "2016-01-05 0.344144\n", "2016-01-06 0.278251\n", "2016-01-07 0.771874\n", "2016-01-08 0.709453\n", "2016-01-09 0.830292\n", "2016-01-10 0.724449\n", "Freq: D, Name: Column2, dtype: float64" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc['2016-01-01':'2016-01-11','Column2']" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "id": "QsuCiRzuL8YX", "outputId": "b1f271e6-7e4e-4b99-ae8a-d25405d48145" }, "outputs": [ { "data": { "text/plain": [ "2016-01-01 0.165987\n", "2016-01-02 0.127934\n", "2016-01-03 0.828384\n", "2016-01-04 0.989116\n", "2016-01-05 0.344144\n", "2016-01-06 0.278251\n", "2016-01-07 0.771874\n", "2016-01-08 0.709453\n", "2016-01-09 0.830292\n", "2016-01-10 0.724449\n", "Freq: D, Name: Column2, dtype: float64" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[0:11,1]" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "id": "v9yt82jyL8Yf", "outputId": "6a0feefb-a6cb-407c-c5cb-591b584dee94" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-030.5845170.8283840.741401
2016-01-040.5101370.9891160.918437
2016-01-050.3791080.3441440.344878
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-05 0.379108 0.344144 0.344878" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.index > '20160102') & (df.index < '20160106')]" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "id": "LFGw7iv4L8Yh", "outputId": "5f27335e-8e65-4758-82a2-2c0462b4ed70" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-030.5845170.8283840.741401
2016-01-040.5101370.9891160.918437
2016-01-070.0624040.7718740.064778
2016-01-090.0072290.8302920.152357
2016-01-100.1302360.7244490.674393
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-07 0.062404 0.771874 0.064778\n", "2016-01-09 0.007229 0.830292 0.152357\n", "2016-01-10 0.130236 0.724449 0.674393" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.query('(Column1 < Column2) & (Column1 < Column3)')" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "id": "WMF0I3X5L8Yi", "outputId": "162717d9-1062-467b-d09f-9c6e9f024822" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column3
2016-01-010.3940100.323068
2016-01-020.4646980.234211
\n", "
" ], "text/plain": [ " Column1 Column3\n", "2016-01-01 0.394010 0.323068\n", "2016-01-02 0.464698 0.234211" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc['20160101':'20160102',['Column1','Column3']]" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "id": "gjt7LI1IL8Yn", "outputId": "db9bf1ca-30db-4d4c-f8a6-0ceca7ceec2c" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2
2016-01-040.5101370.989116
2016-01-050.3791080.344144
\n", "
" ], "text/plain": [ " Column1 Column2\n", "2016-01-04 0.510137 0.989116\n", "2016-01-05 0.379108 0.344144" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[3:5, 0:2]" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "id": "xOOyyU33L8Yl", "outputId": "cebf0c4c-8046-461b-aa71-b9f39d3eff3b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "DatetimeIndex: 10 entries, 2016-01-01 to 2016-01-10\n", "Freq: D\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Column1 10 non-null float64\n", " 1 Column2 10 non-null float64\n", " 2 Column3 10 non-null float64\n", "dtypes: float64(3)\n", "memory usage: 620.0 bytes\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "id": "p7yej71hL8Yr", "outputId": "35a5cfd0-eecf-4111-d6fc-7fe57058567c" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
count10.00000010.00000010.000000
mean0.3614610.5769880.376016
std0.2888040.3142040.296958
min0.0072290.1279340.064778
25%0.1308210.2947240.167322
50%0.3865590.7169510.278640
75%0.4987770.8142570.592014
max0.9496900.9891160.918437
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "count 10.000000 10.000000 10.000000\n", "mean 0.361461 0.576988 0.376016\n", "std 0.288804 0.314204 0.296958\n", "min 0.007229 0.127934 0.064778\n", "25% 0.130821 0.294724 0.167322\n", "50% 0.386559 0.716951 0.278640\n", "75% 0.498777 0.814257 0.592014\n", "max 0.949690 0.989116 0.918437" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "id": "F5Dk5Je-L8Yv", "outputId": "bbdc29a5-2759-4558-c686-d963066a8ca2" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-010.3940100.1659870.323068
2016-01-020.4646980.1279340.234211
2016-01-030.5845170.8283840.741401
2016-01-040.5101370.9891160.918437
2016-01-050.3791080.3441440.344878
2016-01-060.9496900.2782510.212217
2016-01-070.0624040.7718740.064778
2016-01-080.1325770.7094530.094416
2016-01-090.0072290.8302920.152357
2016-01-100.1302360.7244490.674393
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-01 0.394010 0.165987 0.323068\n", "2016-01-02 0.464698 0.127934 0.234211\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-05 0.379108 0.344144 0.344878\n", "2016-01-06 0.949690 0.278251 0.212217\n", "2016-01-07 0.062404 0.771874 0.064778\n", "2016-01-08 0.132577 0.709453 0.094416\n", "2016-01-09 0.007229 0.830292 0.152357\n", "2016-01-10 0.130236 0.724449 0.674393" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_index(axis=0, ascending=True,) # inplace=True)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "id": "ZdzzGZMpL8Yw", "outputId": "92c40e02-85a3-4d56-fcb6-e939500c43fa" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-010.3940100.1659870.323068
2016-01-020.4646980.1279340.234211
2016-01-030.5845170.8283840.741401
2016-01-040.5101370.9891160.918437
2016-01-050.3791080.3441440.344878
2016-01-060.9496900.2782510.212217
2016-01-070.0624040.7718740.064778
2016-01-080.1325770.7094530.094416
2016-01-090.0072290.8302920.152357
2016-01-100.1302360.7244490.674393
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-01 0.394010 0.165987 0.323068\n", "2016-01-02 0.464698 0.127934 0.234211\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-04 0.510137 0.989116 0.918437\n", "2016-01-05 0.379108 0.344144 0.344878\n", "2016-01-06 0.949690 0.278251 0.212217\n", "2016-01-07 0.062404 0.771874 0.064778\n", "2016-01-08 0.132577 0.709453 0.094416\n", "2016-01-09 0.007229 0.830292 0.152357\n", "2016-01-10 0.130236 0.724449 0.674393" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[sorted(df.columns)]" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "id": "5RW7CbCQL8Yz", "outputId": "0383cf8f-2de9-4239-a48b-dac3a105f9be" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Column1Column2Column3
2016-01-020.4646980.1279340.234211
2016-01-010.3940100.1659870.323068
2016-01-060.9496900.2782510.212217
2016-01-050.3791080.3441440.344878
2016-01-080.1325770.7094530.094416
2016-01-100.1302360.7244490.674393
2016-01-070.0624040.7718740.064778
2016-01-030.5845170.8283840.741401
2016-01-090.0072290.8302920.152357
2016-01-040.5101370.9891160.918437
\n", "
" ], "text/plain": [ " Column1 Column2 Column3\n", "2016-01-02 0.464698 0.127934 0.234211\n", "2016-01-01 0.394010 0.165987 0.323068\n", "2016-01-06 0.949690 0.278251 0.212217\n", "2016-01-05 0.379108 0.344144 0.344878\n", "2016-01-08 0.132577 0.709453 0.094416\n", "2016-01-10 0.130236 0.724449 0.674393\n", "2016-01-07 0.062404 0.771874 0.064778\n", "2016-01-03 0.584517 0.828384 0.741401\n", "2016-01-09 0.007229 0.830292 0.152357\n", "2016-01-04 0.510137 0.989116 0.918437" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(by='Column2')" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "id": "yGX8iYskL8Y2" }, "outputs": [], "source": [ "dates1 = pd.date_range(\"20160101\", periods=6)\n", "data1 = np.random.random((6,2))\n", "column_names1 = ['ColumnA', 'ColumnB']\n", "\n", "dates2 = pd.date_range(\"20160104\", periods=7)\n", "data2 = np.random.random((7,2))\n", "column_names2 = ['ColumnC', 'ColumnD']\n", "\n", "df1 = pd.DataFrame(data1, index=dates1, columns=column_names1)\n", "df2 = pd.DataFrame(data2, index=dates2, columns=column_names2)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "id": "FdexPwrQL8Y4", "outputId": "ae9b61a2-4a09-4696-c0b9-9ad2479682ad" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnB
2016-01-010.9611910.214906
2016-01-020.5629810.602063
2016-01-030.5838410.367577
2016-01-040.5629170.712821
2016-01-050.7949970.518708
2016-01-060.9463440.972193
\n", "
" ], "text/plain": [ " ColumnA ColumnB\n", "2016-01-01 0.961191 0.214906\n", "2016-01-02 0.562981 0.602063\n", "2016-01-03 0.583841 0.367577\n", "2016-01-04 0.562917 0.712821\n", "2016-01-05 0.794997 0.518708\n", "2016-01-06 0.946344 0.972193" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "id": "yOJ_bMBRL8Y5", "outputId": "b14eb2f3-19d9-4698-c924-cfde04f2bc18" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnCColumnD
2016-01-040.0179400.876961
2016-01-050.3099620.768632
2016-01-060.2734690.435210
2016-01-070.4276010.153653
2016-01-080.9453750.084841
2016-01-090.6309370.256707
2016-01-100.0170570.316170
\n", "
" ], "text/plain": [ " ColumnC ColumnD\n", "2016-01-04 0.017940 0.876961\n", "2016-01-05 0.309962 0.768632\n", "2016-01-06 0.273469 0.435210\n", "2016-01-07 0.427601 0.153653\n", "2016-01-08 0.945375 0.084841\n", "2016-01-09 0.630937 0.256707\n", "2016-01-10 0.017057 0.316170" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "id": "jymASUXyL8Y6", "outputId": "73ca0b63-be54-4ddd-9597-01dcc5e0b216" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
2016-01-010.9611910.214906NaNNaN
2016-01-020.5629810.602063NaNNaN
2016-01-030.5838410.367577NaNNaN
2016-01-040.5629170.7128210.0179400.876961
2016-01-050.7949970.5187080.3099620.768632
2016-01-060.9463440.9721930.2734690.435210
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "2016-01-01 0.961191 0.214906 NaN NaN\n", "2016-01-02 0.562981 0.602063 NaN NaN\n", "2016-01-03 0.583841 0.367577 NaN NaN\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.join.html\n", "df1.join(df2, how='left')" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "id": "dwi5DcXdL8Y-", "outputId": "fb77c0dc-4b65-492a-95c8-c5685982c1b9" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
2016-01-040.5629170.7128210.0179400.876961
2016-01-050.7949970.5187080.3099620.768632
2016-01-060.9463440.9721930.2734690.435210
2016-01-07NaNNaN0.4276010.153653
2016-01-08NaNNaN0.9453750.084841
2016-01-09NaNNaN0.6309370.256707
2016-01-10NaNNaN0.0170570.316170
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210\n", "2016-01-07 NaN NaN 0.427601 0.153653\n", "2016-01-08 NaN NaN 0.945375 0.084841\n", "2016-01-09 NaN NaN 0.630937 0.256707\n", "2016-01-10 NaN NaN 0.017057 0.316170" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.join(df2, how='right')" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "id": "LehqC2xKL8Y_", "outputId": "da540f4b-9ee8-49e9-a2fe-18c5939f1b32" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
2016-01-010.9611910.214906NaNNaN
2016-01-020.5629810.602063NaNNaN
2016-01-030.5838410.367577NaNNaN
2016-01-040.5629170.7128210.0179400.876961
2016-01-050.7949970.5187080.3099620.768632
2016-01-060.9463440.9721930.2734690.435210
2016-01-07NaNNaN0.4276010.153653
2016-01-08NaNNaN0.9453750.084841
2016-01-09NaNNaN0.6309370.256707
2016-01-10NaNNaN0.0170570.316170
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "2016-01-01 0.961191 0.214906 NaN NaN\n", "2016-01-02 0.562981 0.602063 NaN NaN\n", "2016-01-03 0.583841 0.367577 NaN NaN\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210\n", "2016-01-07 NaN NaN 0.427601 0.153653\n", "2016-01-08 NaN NaN 0.945375 0.084841\n", "2016-01-09 NaN NaN 0.630937 0.256707\n", "2016-01-10 NaN NaN 0.017057 0.316170" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.join(df2, how='outer')" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "id": "BnxIHD1wL8ZA", "outputId": "a02abc4f-beec-4741-cbc1-179168bafadf" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
2016-01-040.5629170.7128210.0179400.876961
2016-01-050.7949970.5187080.3099620.768632
2016-01-060.9463440.9721930.2734690.435210
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.join(df2, how='inner')" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "id": "ypYiciNsL8ZC", "outputId": "8ea83eee-e408-4fa2-fb6a-709be802654a" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnDColumnA_df2
2016-01-010.9611910.214906NaNNaNNaN
2016-01-020.5629810.602063NaNNaNNaN
2016-01-030.5838410.367577NaNNaNNaN
2016-01-040.5629170.7128210.0179400.8769611.562917
2016-01-050.7949970.5187080.3099620.7686321.794997
2016-01-060.9463440.9721930.2734690.4352101.946344
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD ColumnA_df2\n", "2016-01-01 0.961191 0.214906 NaN NaN NaN\n", "2016-01-02 0.562981 0.602063 NaN NaN NaN\n", "2016-01-03 0.583841 0.367577 NaN NaN NaN\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961 1.562917\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632 1.794997\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210 1.946344" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2['ColumnA'] = df1.ColumnA+1 #Example when columns have the same name\n", "\n", "df1.join(df2, how='left', rsuffix='_df2')" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "id": "VKWLYtQyL8ZD" }, "outputs": [], "source": [ "del df2['ColumnA']\n", "df3 = df1.join(df2)\n", "\n", "# add a column to df to group on\n", "df3['ProfitLoss'] = pd.Series(['Profit', \n", " 'Loss', \n", " 'Profit', \n", " 'Same', \n", " 'Profit', \n", " 'Loss', \n", " 'Profit', \n", " 'Profit', \n", " 'Same', \n", " 'Loss'], index=dates)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "id": "tVryLR1kL8ZF" }, "outputs": [], "source": [ "df3['Student'] = pd.Series(['Alex',\n", " 'Alex',\n", " 'Alex',\n", " 'Marcos',\n", " 'Hannah',\n", " 'Hannah',\n", " 'Marcos',\n", " 'Hannah',\n", " 'Hannah',\n", " 'Barbara'], index=dates)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "id": "q9vjZfwcL8ZG", "outputId": "50e5fb5e-fb40-469a-f4a2-3758b12b7039" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnDProfitLossStudent
2016-01-010.9611910.214906NaNNaNProfitAlex
2016-01-020.5629810.602063NaNNaNLossAlex
2016-01-030.5838410.367577NaNNaNProfitAlex
2016-01-040.5629170.7128210.0179400.876961SameMarcos
2016-01-050.7949970.5187080.3099620.768632ProfitHannah
2016-01-060.9463440.9721930.2734690.435210LossHannah
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD ProfitLoss Student\n", "2016-01-01 0.961191 0.214906 NaN NaN Profit Alex\n", "2016-01-02 0.562981 0.602063 NaN NaN Loss Alex\n", "2016-01-03 0.583841 0.367577 NaN NaN Profit Alex\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961 Same Marcos\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632 Profit Hannah\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210 Loss Hannah" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "id": "aQ5xKKMWL8ZI", "outputId": "993632fa-1ffe-46bd-97b1-492300cc1dc0" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
ProfitLoss
Loss0.7546620.7871280.2734690.435210
Profit0.7800100.3670640.3099620.768632
Same0.5629170.7128210.0179400.876961
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "ProfitLoss \n", "Loss 0.754662 0.787128 0.273469 0.435210\n", "Profit 0.780010 0.367064 0.309962 0.768632\n", "Same 0.562917 0.712821 0.017940 0.876961" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grupos = df3.groupby('ProfitLoss')#.mean()\n", "grupos.mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Verifying Python's ordering heuristics" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "id": "bbIbVPYVL8ZJ", "outputId": "7af165bc-e886-4aa8-b1c0-f3950152ba1f" }, "outputs": [ { "data": { "text/plain": [ "'name2'" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max(['name1', 'name2', 'Name3'])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "id": "lW_LdufBL8ZK", "outputId": "930a5e51-ec3f-444b-8124-9de6bba0a0e9" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnD
StudentProfitLoss
AlexLoss0.5629810.602063NaNNaN
Profit0.9611910.367577NaNNaN
HannahLoss0.9463440.9721930.2734690.435210
Profit0.7949970.5187080.3099620.768632
MarcosSame0.5629170.7128210.0179400.876961
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD\n", "Student ProfitLoss \n", "Alex Loss 0.562981 0.602063 NaN NaN\n", " Profit 0.961191 0.367577 NaN NaN\n", "Hannah Loss 0.946344 0.972193 0.273469 0.435210\n", " Profit 0.794997 0.518708 0.309962 0.768632\n", "Marcos Same 0.562917 0.712821 0.017940 0.876961" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4 = df3.groupby(['Student','ProfitLoss']).max()\n", "df4" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "id": "Fer2LHsrL8ZM", "outputId": "adbc1636-4d72-4b0a-bc3c-814812976fe0" }, "outputs": [ { "data": { "text/plain": [ "Index(['Alex', 'Alex', 'Hannah', 'Hannah', 'Marcos'], dtype='object', name='Student')" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4.index.get_level_values('Student')" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "id": "aUM-BJMYL8ZN", "outputId": "f992ff24-9751-40bd-ebd4-e0f9e948f1e8" }, "outputs": [ { "data": { "text/plain": [ "0.7949966854120388" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4.loc[('Hannah','Profit'), 'ColumnA']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [Pandas Useful Functions](https://pandas.pydata.org/pandas-docs/stable/reference/general_functions.html)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ColumnAColumnBColumnCColumnDProfitLossStudent
2016-01-010.9611910.214906NaNNaNProfitAlex
2016-01-020.5629810.602063NaNNaNLossAlex
2016-01-030.5838410.367577NaNNaNProfitAlex
2016-01-040.5629170.7128210.0179400.876961SameMarcos
2016-01-050.7949970.5187080.3099620.768632ProfitHannah
2016-01-060.9463440.9721930.2734690.435210LossHannah
\n", "
" ], "text/plain": [ " ColumnA ColumnB ColumnC ColumnD ProfitLoss Student\n", "2016-01-01 0.961191 0.214906 NaN NaN Profit Alex\n", "2016-01-02 0.562981 0.602063 NaN NaN Loss Alex\n", "2016-01-03 0.583841 0.367577 NaN NaN Profit Alex\n", "2016-01-04 0.562917 0.712821 0.017940 0.876961 Same Marcos\n", "2016-01-05 0.794997 0.518708 0.309962 0.768632 Profit Hannah\n", "2016-01-06 0.946344 0.972193 0.273469 0.435210 Loss Hannah" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transpose" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2016-01-012016-01-022016-01-032016-01-042016-01-052016-01-06
ColumnA0.9611910.5629810.5838410.5629170.7949970.946344
ColumnB0.2149060.6020630.3675770.7128210.5187080.972193
ColumnCNaNNaNNaN0.017940.3099620.273469
ColumnDNaNNaNNaN0.8769610.7686320.43521
ProfitLossProfitLossProfitSameProfitLoss
StudentAlexAlexAlexMarcosHannahHannah
\n", "
" ], "text/plain": [ " 2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06\n", "ColumnA 0.961191 0.562981 0.583841 0.562917 0.794997 0.946344\n", "ColumnB 0.214906 0.602063 0.367577 0.712821 0.518708 0.972193\n", "ColumnC NaN NaN NaN 0.01794 0.309962 0.273469\n", "ColumnD NaN NaN NaN 0.876961 0.768632 0.43521\n", "ProfitLoss Profit Loss Profit Same Profit Loss\n", "Student Alex Alex Alex Marcos Hannah Hannah" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.T" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2016-01-012016-01-022016-01-032016-01-042016-01-052016-01-06
ColumnA0.9611910.5629810.5838410.5629170.7949970.946344
ColumnB0.2149060.6020630.3675770.7128210.5187080.972193
ColumnCNaNNaNNaN0.017940.3099620.273469
ColumnDNaNNaNNaN0.8769610.7686320.43521
ProfitLossProfitLossProfitSameProfitLoss
StudentAlexAlexAlexMarcosHannahHannah
\n", "
" ], "text/plain": [ " 2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06\n", "ColumnA 0.961191 0.562981 0.583841 0.562917 0.794997 0.946344\n", "ColumnB 0.214906 0.602063 0.367577 0.712821 0.518708 0.972193\n", "ColumnC NaN NaN NaN 0.01794 0.309962 0.273469\n", "ColumnD NaN NaN NaN 0.876961 0.768632 0.43521\n", "ProfitLoss Profit Loss Profit Same Profit Loss\n", "Student Alex Alex Alex Marcos Hannah Hannah" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.transpose()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [idmin & idmax](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2016-01-01 00:00:00', freq='D')" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3['ColumnA'].idxmax()" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ColumnA 0.961191\n", "ColumnB 0.214906\n", "ColumnC NaN\n", "ColumnD NaN\n", "ProfitLoss Profit\n", "Student Alex\n", "Name: 2016-01-01 00:00:00, dtype: object" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.loc[df3['ColumnA'].idxmax()]" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9611911784461947" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max(df3['ColumnA'])" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2016-01-04 00:00:00', freq='D')" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3['ColumnA'].idxmin()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ColumnA 0.562917\n", "ColumnB 0.712821\n", "ColumnC 0.01794\n", "ColumnD 0.876961\n", "ProfitLoss Same\n", "Student Marcos\n", "Name: 2016-01-04 00:00:00, dtype: object" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.loc[df3['ColumnA'].idxmin()]" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5629166477301453" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "min(df3['ColumnA'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [Not Equal - ne](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ne.html) " ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 False\n", "6 True\n", "7 True\n", "8 True\n", "9 True\n", "10 True\n", "11 True\n", "12 True\n", "Name: x, dtype: bool" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame()\n", "df['x'] = [0,0,0,0,0,0,1,2,3,4,5,6,7]\n", "\n", "df['x'].ne(0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Nsmallest " ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 0\n", "2 0\n", "Name: x, dtype: int64" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['x'].nsmallest(3)" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
00
10
20
\n", "
" ], "text/plain": [ " x\n", "0 0\n", "1 0\n", "2 0" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.nsmallest(3, 'x')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Nlargest " ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12 7\n", "11 6\n", "10 5\n", "Name: x, dtype: int64" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['x'].nlargest(3)" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
127
116
105
\n", "
" ], "text/plain": [ " x\n", "12 7\n", "11 6\n", "10 5" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.nlargest(3, 'x')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Is in " ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
0True
1True
2True
3True
4True
5True
6False
7True
8False
9False
\n", "
" ], "text/plain": [ " x\n", "0 True\n", "1 True\n", "2 True\n", "3 True\n", "4 True\n", "5 True\n", "6 False\n", "7 True\n", "8 False\n", "9 False" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isin([0,2]).head(10)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
00.0
10.0
20.0
30.0
40.0
50.0
6NaN
72.0
8NaN
9NaN
10NaN
11NaN
12NaN
\n", "
" ], "text/plain": [ " x\n", "0 0.0\n", "1 0.0\n", "2 0.0\n", "3 0.0\n", "4 0.0\n", "5 0.0\n", "6 NaN\n", "7 2.0\n", "8 NaN\n", "9 NaN\n", "10 NaN\n", "11 NaN\n", "12 NaN" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.isin([0,2])] #.dropna()" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
x
61.0
83.0
94.0
105.0
116.0
127.0
\n", "
" ], "text/plain": [ " x\n", "6 1.0\n", "8 3.0\n", "9 4.0\n", "10 5.0\n", "11 6.0\n", "12 7.0" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[~df.isin([0,2])].dropna()" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestStudentMathsPhysicsChemistryBiologyComputer_Science
0T2Student 15475669932
1T2Student 22991966958
2T1Student 32810223325
3T2Student 4292341042
4T3Student 54071454287
\n", "
" ], "text/plain": [ " Test Student Maths Physics Chemistry Biology Computer_Science\n", "0 T2 Student 1 54 75 66 99 32\n", "1 T2 Student 2 29 91 96 69 58\n", "2 T1 Student 3 28 10 22 33 25\n", "3 T2 Student 4 29 23 41 0 42\n", "4 T3 Student 5 40 71 45 42 87" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create a test dataframe\n", "# Untidy dataframe\n", "# x : Subjects\n", "# y : Student names\n", "\n", "marks = pd.DataFrame(np.random.randint(0, 100, size = (20,5)), \n", " columns = ['Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science'])\n", "\n", "marks['Student'] = ['Student ' + str(i) for i in range(1,21)]\n", "marks['Test'] = np.random.choice(['T1', 'T2', 'T3'], size=len(marks))\n", "marks = marks[['Test','Student','Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science']]\n", "\n", "display(marks.head())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [Agg](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.agg.html)" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MathsPhysicsChemistryBiologyComputer_Science
sum994.01084.0818.01121.00999.00
min5.00.00.00.0011.00
mean49.754.240.956.0549.95
\n", "
" ], "text/plain": [ " Maths Physics Chemistry Biology Computer_Science\n", "sum 994.0 1084.0 818.0 1121.00 999.00\n", "min 5.0 0.0 0.0 0.00 11.00\n", "mean 49.7 54.2 40.9 56.05 49.95" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marks[['Maths', 'Physics','Chemistry', 'Biology', 'Computer_Science']].agg(['sum', 'min', 'mean'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Group By" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "grouped = marks.groupby('Test')" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "T1\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestStudentMathsPhysicsChemistryBiologyComputer_Science
2T1Student 32810223325
5T1Student 64350489571
6T1Student 7999701273
10T1Student 11853955055
11T1Student 129793226330
\n", "
" ], "text/plain": [ " Test Student Maths Physics Chemistry Biology Computer_Science\n", "2 T1 Student 3 28 10 22 33 25\n", "5 T1 Student 6 43 50 48 95 71\n", "6 T1 Student 7 99 97 0 12 73\n", "10 T1 Student 11 85 3 95 50 55\n", "11 T1 Student 12 97 93 22 63 30" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "T2\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestStudentMathsPhysicsChemistryBiologyComputer_Science
0T2Student 15475669932
1T2Student 22991966958
3T2Student 4292341042
7T2Student 8170727517
9T2Student 101594556945
\n", "
" ], "text/plain": [ " Test Student Maths Physics Chemistry Biology Computer_Science\n", "0 T2 Student 1 54 75 66 99 32\n", "1 T2 Student 2 29 91 96 69 58\n", "3 T2 Student 4 29 23 41 0 42\n", "7 T2 Student 8 17 0 72 75 17\n", "9 T2 Student 10 15 94 55 69 45" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "T3\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestStudentMathsPhysicsChemistryBiologyComputer_Science
4T3Student 54071454287
8T3Student 9752316538
12T3Student 139288581854
\n", "
" ], "text/plain": [ " Test Student Maths Physics Chemistry Biology Computer_Science\n", "4 T3 Student 5 40 71 45 42 87\n", "8 T3 Student 9 75 23 1 65 38\n", "12 T3 Student 13 92 88 58 18 54" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for group in grouped:\n", " print(group[0])\n", " display(group[1].head())" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Test\n", "T1 55.111111\n", "T2 36.375000\n", "T3 69.000000\n", "Name: Maths, dtype: float64" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped['Maths'].mean()" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Test Student\n", "0 T1 9\n", "1 T2 8\n", "2 T3 3\n" ] } ], "source": [ "grouped2 = grouped.agg({\"Student\": \"nunique\"})\n", "grouped2 = grouped2.reset_index()\n", "print(grouped2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pivot" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "id": "h7UmJLl8L8Xs", "outputId": "7c07a5fa-bbc7-4e52-c64f-b30109932018" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StudentStudent 1Student 10Student 11Student 12Student 13Student 14Student 15Student 16Student 17Student 18Student 19Student 2Student 20Student 3Student 4Student 5Student 6Student 7Student 8Student 9
Test
T1NaNNaN95.022.0NaNNaNNaN43.04.014.053.0NaNNaN22.0NaNNaN48.00.0NaNNaN
T266.055.0NaNNaNNaN31.025.0NaNNaNNaNNaN96.027.0NaN41.0NaNNaNNaN72.0NaN
T3NaNNaNNaNNaN58.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN45.0NaNNaNNaN1.0
\n", "
" ], "text/plain": [ "Student Student 1 Student 10 Student 11 Student 12 Student 13 \\\n", "Test \n", "T1 NaN NaN 95.0 22.0 NaN \n", "T2 66.0 55.0 NaN NaN NaN \n", "T3 NaN NaN NaN NaN 58.0 \n", "\n", "Student Student 14 Student 15 Student 16 Student 17 Student 18 \\\n", "Test \n", "T1 NaN NaN 43.0 4.0 14.0 \n", "T2 31.0 25.0 NaN NaN NaN \n", "T3 NaN NaN NaN NaN NaN \n", "\n", "Student Student 19 Student 2 Student 20 Student 3 Student 4 Student 5 \\\n", "Test \n", "T1 53.0 NaN NaN 22.0 NaN NaN \n", "T2 NaN 96.0 27.0 NaN 41.0 NaN \n", "T3 NaN NaN NaN NaN NaN 45.0 \n", "\n", "Student Student 6 Student 7 Student 8 Student 9 \n", "Test \n", "T1 48.0 0.0 NaN NaN \n", "T2 NaN NaN 72.0 NaN \n", "T3 NaN NaN NaN 1.0 " ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pivot = marks.pivot(index='Test', columns='Student', values='Chemistry') \n", "pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [Melt](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.melt.html)" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TestStudentMathsPhysicsChemistryBiologyComputer_Science
0T2Student 15475669932
1T2Student 22991966958
2T1Student 32810223325
3T2Student 4292341042
4T3Student 54071454287
5T1Student 64350489571
6T1Student 7999701273
7T2Student 8170727517
8T3Student 9752316538
9T2Student 101594556945
\n", "
" ], "text/plain": [ " Test Student Maths Physics Chemistry Biology Computer_Science\n", "0 T2 Student 1 54 75 66 99 32\n", "1 T2 Student 2 29 91 96 69 58\n", "2 T1 Student 3 28 10 22 33 25\n", "3 T2 Student 4 29 23 41 0 42\n", "4 T3 Student 5 40 71 45 42 87\n", "5 T1 Student 6 43 50 48 95 71\n", "6 T1 Student 7 99 97 0 12 73\n", "7 T2 Student 8 17 0 72 75 17\n", "8 T3 Student 9 75 23 1 65 38\n", "9 T2 Student 10 15 94 55 69 45" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marks.head(10)" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StudentvariableFrequency
110Student 11Computer_Science55
111Student 12Computer_Science30
112Student 13Computer_Science54
113Student 14Computer_Science74
114Student 15Computer_Science11
115Student 16Computer_Science61
116Student 17Computer_Science18
117Student 18Computer_Science31
118Student 19Computer_Science79
119Student 20Computer_Science98
\n", "
" ], "text/plain": [ " Student variable Frequency\n", "110 Student 11 Computer_Science 55\n", "111 Student 12 Computer_Science 30\n", "112 Student 13 Computer_Science 54\n", "113 Student 14 Computer_Science 74\n", "114 Student 15 Computer_Science 11\n", "115 Student 16 Computer_Science 61\n", "116 Student 17 Computer_Science 18\n", "117 Student 18 Computer_Science 31\n", "118 Student 19 Computer_Science 79\n", "119 Student 20 Computer_Science 98" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tidy = pd.melt(marks, id_vars = 'Student', value_name = 'Frequency')\n", "tidy.tail(10)" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 120 entries, 0 to 119\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Student 120 non-null object\n", " 1 variable 120 non-null object\n", " 2 Frequency 120 non-null object\n", "dtypes: object(3)\n", "memory usage: 2.9+ KB\n" ] } ], "source": [ "tidy.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "15_Pandas_Basics.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }