{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas para usuarios de Microsoft Excel --- 19:50 min\n", "\n", "* 19:50 min | Última modificación: Octubre 6, 2021" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preparación" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Lectura de un archivo CSV" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/tips.csv'\n", "tips = pd.read_csv(url)\n", "tips.head(n=5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Autofiltro en Microsoft Excel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![autofilter](images/autofilter.png)" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "## Ordenamiento de una columna" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
21428.176.50FemaleYesSatDinner3
5234.815.20FemaleNoSunDinner4
8534.835.17FemaleNoThurLunch4
15529.855.14FemaleNoSunDinner5
1135.265.00FemaleNoSunDinner4
7325.285.00FemaleYesSatDinner2
14327.055.00FemaleNoThurLunch6
19743.115.00FemaleYesThurLunch4
23835.834.67FemaleNoSatDinner3
9316.324.30FemaleYesFriDinner2
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "214 28.17 6.50 Female Yes Sat Dinner 3\n", "52 34.81 5.20 Female No Sun Dinner 4\n", "85 34.83 5.17 Female No Thur Lunch 4\n", "155 29.85 5.14 Female No Sun Dinner 5\n", "11 35.26 5.00 Female No Sun Dinner 4\n", "73 25.28 5.00 Female Yes Sat Dinner 2\n", "143 27.05 5.00 Female No Thur Lunch 6\n", "197 43.11 5.00 Female Yes Thur Lunch 4\n", "238 35.83 4.67 Female No Sat Dinner 3\n", "93 16.32 4.30 Female Yes Fri Dinner 2" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.sort_values(\n", " by=[\"sex\", \"tip\"],\n", " ascending=[True, False],\n", ").head(n=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filtrado" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![filtrado-excel](images/filtrado-excel.png)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
2339.427.58MaleNoSatDinner4
5948.276.73MaleNoSatDinner4
14134.306.70MaleNoThurLunch6
17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "141 34.30 6.70 Male No Thur Lunch 6\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Filtro \"Mayor que\"\n", "#\n", "tips[tips['tip'] > 6.50].head(n=10)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
2339.427.58MaleNoSatDinner4
5948.276.73MaleNoSatDinner4
14134.306.70MaleNoThurLunch6
17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "141 34.30 6.70 Male No Thur Lunch 6\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Comando equivalente con query\n", "#\n", "tips.query('tip > 6.50').head(n=10)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
2339.427.58MaleNoSatDinner4
5948.276.73MaleNoSatDinner4
17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Filtro con condicional compuesto\n", "#\n", "tips[(tips['tip'] > 6.50) & (tips['time'] == 'Dinner')].head(n=10)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
21930.143.09FemaleYesSatDinner4
22113.423.48FemaleYesFriLunch2
22315.983.00FemaleNoFriLunch3
22720.453.00MaleNoSatDinner4
23115.693.00MaleYesSatDinner3
23211.613.39MaleNoSatDinner2
23415.533.00MaleYesSatDinner2
23835.834.67FemaleNoSatDinner3
23929.035.92MaleNoSatDinner3
24318.783.00FemaleNoThurDinner2
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "219 30.14 3.09 Female Yes Sat Dinner 4\n", "221 13.42 3.48 Female Yes Fri Lunch 2\n", "223 15.98 3.00 Female No Fri Lunch 3\n", "227 20.45 3.00 Male No Sat Dinner 4\n", "231 15.69 3.00 Male Yes Sat Dinner 3\n", "232 11.61 3.39 Male No Sat Dinner 2\n", "234 15.53 3.00 Male Yes Sat Dinner 2\n", "238 35.83 4.67 Female No Sat Dinner 3\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "243 18.78 3.00 Female No Thur Dinner 2" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Filtro \"Por encima del promedio\"\n", "#\n", "tips[\n", " tips['tip'] > tips['tip'].mean()\n", "].tail(n=10)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.99827868852459" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Cómputo del promedio\n", "#\n", "tips['tip'].mean()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
17050.8110.00MaleYesSatDinner3
21248.339.00MaleNoSatDinner4
2339.427.58MaleNoSatDinner4
5948.276.73MaleNoSatDinner4
14134.306.70MaleNoThurLunch6
18323.176.50MaleYesSunDinner4
21428.176.50FemaleYesSatDinner3
4732.406.00MaleNoSunDinner4
23929.035.92MaleNoSatDinner3
8824.715.85MaleNoThurLunch2
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "141 34.30 6.70 Male No Thur Lunch 6\n", "183 23.17 6.50 Male Yes Sun Dinner 4\n", "214 28.17 6.50 Female Yes Sat Dinner 3\n", "47 32.40 6.00 Male No Sun Dinner 4\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "88 24.71 5.85 Male No Thur Lunch 2" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Diez mejores\n", "#\n", "tips.nlargest(10, 'tip')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesize
673.071.00FemaleYesSatDinner1
925.751.00FemaleYesFriDinner2
1117.251.00FemaleNoSatDinner1
23612.601.00MaleYesSatDinner2
016.991.01FemaleNoSunDinner2
21512.901.10FemaleYesSatDinner2
23732.831.17MaleYesSatDinner2
7510.511.25MaleNoSatDinner2
1358.511.25FemaleNoThurLunch2
23510.071.25MaleNoSatDinner2
\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "67 3.07 1.00 Female Yes Sat Dinner 1\n", "92 5.75 1.00 Female Yes Fri Dinner 2\n", "111 7.25 1.00 Female No Sat Dinner 1\n", "236 12.60 1.00 Male Yes Sat Dinner 2\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "215 12.90 1.10 Female Yes Sat Dinner 2\n", "237 32.83 1.17 Male Yes Sat Dinner 2\n", "75 10.51 1.25 Male No Sat Dinner 2\n", "135 8.51 1.25 Female No Thur Lunch 2\n", "235 10.07 1.25 Male No Sat Dinner 2" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Diez inferiores\n", "#\n", "tips.nsmallest(10, 'tip')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Eliminación de una columna" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokertimesize
016.991.01FemaleNoDinner2
110.341.66MaleNoDinner3
221.013.50MaleNoDinner3
323.683.31MaleNoDinner2
424.593.61FemaleNoDinner4
.....................
23929.035.92MaleNoDinner3
24027.182.00FemaleYesDinner2
24122.672.00MaleYesDinner2
24217.821.75MaleNoDinner2
24318.783.00FemaleNoDinner2
\n", "

244 rows × 6 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker time size\n", "0 16.99 1.01 Female No Dinner 2\n", "1 10.34 1.66 Male No Dinner 3\n", "2 21.01 3.50 Male No Dinner 3\n", "3 23.68 3.31 Male No Dinner 2\n", "4 24.59 3.61 Female No Dinner 4\n", ".. ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Dinner 3\n", "240 27.18 2.00 Female Yes Dinner 2\n", "241 22.67 2.00 Male Yes Dinner 2\n", "242 17.82 1.75 Male No Dinner 2\n", "243 18.78 3.00 Female No Dinner 2\n", "\n", "[244 rows x 6 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.drop(\"day\", axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cambio del nombre de una columna" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexis_smokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
........................
23929.035.92MaleNoSatDinner3
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
24318.783.00FemaleNoThurDinner2
\n", "

244 rows × 7 columns

\n", "
" ], "text/plain": [ " total_bill tip sex is_smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[244 rows x 7 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.rename(\n", " columns={\"smoker\": \"is_smoker\"},\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Subtotales" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sex\n", "Female 87\n", "Male 157\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Contar -- size()\n", "#\n", "tips.groupby('sex').size()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsmokerdaytimesize
sex
Female878787878787
Male157157157157157157
\n", "
" ], "text/plain": [ " total_bill tip smoker day time size\n", "sex \n", "Female 87 87 87 87 87 87\n", "Male 157 157 157 157 157 157" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Contar -- size() vs count()\n", "#\n", "tips.groupby('sex').count()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsmokerdaytimesize
sex
Female44.306.5YesThurLunch6
Male50.8110.0YesThurLunch6
\n", "
" ], "text/plain": [ " total_bill tip smoker day time size\n", "sex \n", "Female 44.30 6.5 Yes Thur Lunch 6\n", "Male 50.81 10.0 Yes Thur Lunch 6" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Contar -- max(), min(), ...\n", "#\n", "tips.groupby('sex').max()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tiptotal_bill
sex
Female6.518.056897
Male10.020.744076
\n", "
" ], "text/plain": [ " tip total_bill\n", "sex \n", "Female 6.5 18.056897\n", "Male 10.0 20.744076" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Funciones diferenciales por columna\n", "#\n", "tips.groupby(\"sex\").agg(\n", " {\n", " \"tip\": np.max,\n", " \"total_bill\": np.mean,\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tiptotal_bill
sex
Female6.518.056897
Male10.020.744076
\n", "
" ], "text/plain": [ " tip total_bill\n", "sex \n", "Female 6.5 18.056897\n", "Male 10.0 20.744076" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.groupby(\n", " \"sex\", \n", " as_index=True,\n", ").agg(\n", " {\n", " \"tip\": np.max,\n", " \"total_bill\": np.mean,\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tiptotal_bill
sex
Female6.518.056897
Male10.020.744076
\n", "
" ], "text/plain": [ " tip total_bill\n", "sex \n", "Female 6.5 18.056897\n", "Male 10.0 20.744076" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Opción as_index\n", "#\n", "tips.groupby(\n", " \"sex\",\n", " as_index=True,\n", ").agg({\"tip\": np.max, \"total_bill\": np.mean})" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sextiptotal_bill
0Female6.518.056897
1Male10.020.744076
\n", "
" ], "text/plain": [ " sex tip total_bill\n", "0 Female 6.5 18.056897\n", "1 Male 10.0 20.744076" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.groupby(\n", " \"sex\",\n", " as_index=False,\n", ").agg({\"tip\": np.max, \"total_bill\": np.mean})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Computos sobre las columnas de una tabla" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tips.columns" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_billtipsexsmokerdaytimesizetax10
016.991.01FemaleNoSunDinner21.699
110.341.66MaleNoSunDinner31.034
221.013.50MaleNoSunDinner32.101
323.683.31MaleNoSunDinner22.368
424.593.61FemaleNoSunDinner42.459
...........................
23929.035.92MaleNoSatDinner32.903
24027.182.00FemaleYesSatDinner22.718
24122.672.00MaleYesSatDinner22.267
24217.821.75MaleNoSatDinner21.782
24318.783.00FemaleNoThurDinner21.878
\n", "

244 rows × 8 columns

\n", "
" ], "text/plain": [ " total_bill tip sex smoker day time size tax10\n", "0 16.99 1.01 Female No Sun Dinner 2 1.699\n", "1 10.34 1.66 Male No Sun Dinner 3 1.034\n", "2 21.01 3.50 Male No Sun Dinner 3 2.101\n", "3 23.68 3.31 Male No Sun Dinner 2 2.368\n", "4 24.59 3.61 Female No Sun Dinner 4 2.459\n", ".. ... ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3 2.903\n", "240 27.18 2.00 Female Yes Sat Dinner 2 2.718\n", "241 22.67 2.00 Male Yes Sat Dinner 2 2.267\n", "242 17.82 1.75 Male No Sat Dinner 2 1.782\n", "243 18.78 3.00 Female No Thur Dinner 2 1.878\n", "\n", "[244 rows x 8 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Una nueva columna\n", "#\n", "tips.assign(tax10=0.1 * tips.total_bill)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Valores
010
121
232
342
455
561
674
\n", "
" ], "text/plain": [ " Valores\n", "0 10\n", "1 21\n", "2 32\n", "3 42\n", "4 55\n", "5 61\n", "6 74" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Computos con valores respecto a otras filas\n", "#\n", "x = pd.DataFrame([10, 21, 32, 42, 55, 61, 74], columns=['Valores'])\n", "x" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Valores
00
10
210
321
432
542
655
\n", "
" ], "text/plain": [ " Valores\n", "0 0\n", "1 0\n", "2 10\n", "3 21\n", "4 32\n", "5 42\n", "6 55" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shift(periods=2, fill_value=0)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Valores
032
142
255
361
474
50
60
\n", "
" ], "text/plain": [ " Valores\n", "0 32\n", "1 42\n", "2 55\n", "3 61\n", "4 74\n", "5 0\n", "6 0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shift(periods=-2, fill_value=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Escenarios" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Alternativa 1\n", "#\n", "escenarios = [\n", " (1, 2, 3),\n", " (4, 5, 6),\n", " (7, 8, 9),\n", " (10, 11, 12),\n", "]\n", "a, b, c = escenarios[1]\n", "a + b + c" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Alterantiva 2\n", "#\n", "def f(a, b, c):\n", " return a + b + c\n", "\n", "\n", "escenarios = [\n", " {\"a\": 1, \"b\": 2, \"c\": 3},\n", " {\"a\": 4, \"b\": 5, \"c\": 6},\n", " {\"a\": 7, \"b\": 8, \"c\": 9},\n", " {\"a\": 10, \"b\": 11, \"c\": 12},\n", "]\n", "f(**escenarios[1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tablas dinámicas" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Escenario
00
11
22
33
\n", "
" ], "text/plain": [ " Escenario\n", "0 0\n", "1 1\n", "2 2\n", "3 3" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Del ejemplo anterior\n", "# \n", "def f(a, b, c):\n", " return a + b + c\n", "\n", "escenarios = [\n", " {\"a\": 1, \"b\": 2, \"c\": 3},\n", " {\"a\": 4, \"b\": 5, \"c\": 6},\n", " {\"a\": 7, \"b\": 8, \"c\": 9},\n", " {\"a\": 10, \"b\": 11, \"c\": 12},\n", "]\n", "\n", "z = pd.DataFrame(range(len(escenarios)), columns=['Escenario'])\n", "z" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Escenariof
006
1115
2224
3333
\n", "
" ], "text/plain": [ " Escenario f\n", "0 0 6\n", "1 1 15\n", "2 2 24\n", "3 3 33" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "z['f'] = [f(**escenarios[i]) for i in z['Escenario']]\n", "z" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Solver" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def f0(x):\n", " return sum([i**2 for i in x])\n", "\n", "# 1^2 + 2^2 + 3^2 = 14\n", "f0([1, 2, 3])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1.17452324e-08, -8.75678727e-09, -3.66353142e-09])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from scipy.optimize import minimize\n", "\n", "minimize(f0, x0=[1, 2, 3]).x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Styles" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Basado en:\n", "\n", "https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCDE
01.01.329212NaN-0.316280-0.990810
12.0-1.070816-1.4387130.5644170.295722
23.0-1.6264040.2195650.6788051.889273
34.00.9615380.104011NaN0.850229
45.01.4534251.0577370.1655620.515018
56.0-1.3369360.5628611.392855-0.063328
67.00.1216681.207603-0.0020401.627796
78.00.3544931.037528-0.3856840.519818
89.01.686583-1.3259631.428984-2.089354
910.0-0.1298200.631523-0.5865380.290720
\n", "
" ], "text/plain": [ " A B C D E\n", "0 1.0 1.329212 NaN -0.316280 -0.990810\n", "1 2.0 -1.070816 -1.438713 0.564417 0.295722\n", "2 3.0 -1.626404 0.219565 0.678805 1.889273\n", "3 4.0 0.961538 0.104011 NaN 0.850229\n", "4 5.0 1.453425 1.057737 0.165562 0.515018\n", "5 6.0 -1.336936 0.562861 1.392855 -0.063328\n", "6 7.0 0.121668 1.207603 -0.002040 1.627796\n", "7 8.0 0.354493 1.037528 -0.385684 0.519818\n", "8 9.0 1.686583 -1.325963 1.428984 -2.089354\n", "9 10.0 -0.129820 0.631523 -0.586538 0.290720" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "np.random.seed(24)\n", "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n", "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n", " axis=1)\n", "df.iloc[3, 3] = np.nan\n", "df.iloc[0, 2] = np.nan\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def color_negative_red(val):\n", " color = 'red' if val < 0 else 'black'\n", " return 'color: %s' % color\n", "\n", "df.style.applymap(color_negative_red)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def highlight_max(s):\n", " is_max = s == s.max()\n", " return ['background-color: yellow' if v else '' for v in is_max]\n", "\n", "df.style.apply(highlight_max)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.\\\n", " applymap(color_negative_red).\\\n", " apply(highlight_max)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.apply(highlight_max, subset=['B', 'C', 'D'])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.applymap(color_negative_red, subset=pd.IndexSlice[2:5, [\"B\", \"D\"]])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
0100.00%132.92%nan%-31.63%-99.08%
1200.00%-107.08%-143.87%56.44%29.57%
2300.00%-162.64%21.96%67.88%188.93%
3400.00%96.15%10.40%nan%85.02%
4500.00%145.34%105.77%16.56%51.50%
5600.00%-133.69%56.29%139.29%-6.33%
6700.00%12.17%120.76%-0.20%162.78%
7800.00%35.45%103.75%-38.57%51.98%
8900.00%168.66%-132.60%142.90%-208.94%
91000.00%-12.98%63.15%-58.65%29.07%
" ], "text/plain": [ "" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.format(\"{:.2%}\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001000nan-0.32-0.990810
12.000000-100-1.438713+0.560.295722
23.000000-2000.219565+0.681.889273
34.00000010000.104011+nan0.850229
45.00000010001.057737+0.170.515018
56.000000-1000.562861+1.39-0.063328
67.00000000001.207603-0.001.627796
78.00000000001.037528-0.390.519818
89.0000002000-1.325963+1.43-2.089354
910.000000-0000.631523-0.590.290720
" ], "text/plain": [ "" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.format({'B': \"{:0<4.0f}\", 'D': '{:+.2f}'})" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.000000±1.33nan-0.316280-0.990810
12.000000±1.07-1.4387130.5644170.295722
23.000000±1.630.2195650.6788051.889273
34.000000±0.960.104011nan0.850229
45.000000±1.451.0577370.1655620.515018
56.000000±1.340.5628611.392855-0.063328
67.000000±0.121.207603-0.0020401.627796
78.000000±0.351.037528-0.3856840.519818
89.000000±1.69-1.3259631.428984-2.089354
910.000000±0.130.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
0100.00%132.92%--31.63%-99.08%
1200.00%-107.08%-143.87%56.44%29.57%
2300.00%-162.64%21.96%67.88%188.93%
3400.00%96.15%10.40%-85.02%
4500.00%145.34%105.77%16.56%51.50%
5600.00%-133.69%56.29%139.29%-6.33%
6700.00%12.17%120.76%-0.20%162.78%
7800.00%35.45%103.75%-38.57%51.98%
8900.00%168.66%-132.60%142.90%-208.94%
91000.00%-12.98%63.15%-58.65%29.07%
" ], "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.format(\"{:.2%}\", na_rep=\"-\")" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.highlight_null(null_color='red')" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import seaborn as sns\n", "\n", "cm = sns.light_palette(\"green\", as_cmap=True)\n", "\n", "df.style.background_gradient(cmap=cm)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
" ], "text/plain": [ "" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[:4].style.background_gradient(cmap='viridis')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.highlight_max(axis=0)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.bar(subset=['A', 'B'], color='#d65f5f')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
01.0000001.329212nan-0.316280-0.990810
12.000000-1.070816-1.4387130.5644170.295722
23.000000-1.6264040.2195650.6788051.889273
34.0000000.9615380.104011nan0.850229
45.0000001.4534251.0577370.1655620.515018
56.000000-1.3369360.5628611.392855-0.063328
67.0000000.1216681.207603-0.0020401.627796
78.0000000.3544931.037528-0.3856840.519818
89.0000001.686583-1.3259631.428984-2.089354
910.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.bar(subset=['A', 'B'], align='mid', color=['#d65f5f', '#5fba7d'])" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B C D E
1.0000001.329212nan-0.316280-0.990810
2.000000-1.070816-1.4387130.5644170.295722
3.000000-1.6264040.2195650.6788051.889273
4.0000000.9615380.104011nan0.850229
5.0000001.4534251.0577370.1655620.515018
6.000000-1.3369360.5628611.392855-0.063328
7.0000000.1216681.207603-0.0020401.627796
8.0000000.3544931.037528-0.3856840.519818
9.0000001.686583-1.3259631.428984-2.089354
10.000000-0.1298200.631523-0.5865380.290720
" ], "text/plain": [ "" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.hide_index()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
A B E
01.0000001.329212-0.990810
12.000000-1.0708160.295722
23.000000-1.6264041.889273
34.0000000.9615380.850229
45.0000001.4534250.515018
56.000000-1.336936-0.063328
67.0000000.1216681.627796
78.0000000.3544930.519818
89.0000001.686583-2.089354
910.000000-0.1298200.290720
" ], "text/plain": [ "" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.style.hide_columns(['C','D'])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }