{ "cells": [ { "cell_type": "markdown", "id": "0a104cc0-91f0-4d63-ab0c-b6221a1859ec", "metadata": { "tags": [] }, "source": [ "# Actualización de campos --- 7:30 min\n", "\n", "* 7:30 min | Última modificación: Octubre 6, 2021 " ] }, { "cell_type": "code", "execution_count": 1, "id": "4e0387c9-7923-4548-ada5-733772ecf36f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "pd.set_option(\"display.notebook_repr_html\", False)" ] }, { "cell_type": "markdown", "id": "773fffb6-c020-4816-9b2d-f76d9faf5531", "metadata": { "tags": [] }, "source": [ "## Método update()" ] }, { "cell_type": "code", "execution_count": 2, "id": "f8d4511d-4f8f-4415-b194-8fdf3bd23463", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_1.csv\n" ] } ], "source": [ "%%writefile /tmp/data_1.csv\n", "clientId,name,bonus,costs\n", "10,Omar Y. Fletcher,0,9999\n", "11,Buffy W. Vincent,1,\n", "12,Mira N. Franklin,2,\n", "13,Ferris Q. Le,3,4" ] }, { "cell_type": "code", "execution_count": 3, "id": "ab2a77e1-d60b-41c1-969b-e8d8a92394d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_2.csv\n" ] } ], "source": [ "%%writefile /tmp/data_2.csv\n", "clientId,location,bonus,costs\n", "12,\"P.O. Box 445, 323 Cursus Rd.\",100,12\n", "13,Ap #791-3809 Eu Street,,13\n", "14,6715 Diam. Rd.,200," ] }, { "cell_type": "code", "execution_count": 4, "id": "f2713246-c926-400e-9bf7-a406abda7b06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ " name bonus costs\n", "clientId \n", "10 Omar Y. Fletcher 0 9999.0\n", "11 Buffy W. Vincent 1 NaN\n", "12 Mira N. Franklin 2 NaN\n", "13 Ferris Q. Le 3 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " location bonus costs\n", "clientId \n", "12 P.O. Box 445, 323 Cursus Rd. 100.0 12.0\n", "13 Ap #791-3809 Eu Street NaN 13.0\n", "14 6715 Diam. Rd. 200.0 NaN" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " name bonus costs\n", "clientId \n", "10 Omar Y. Fletcher 0.0 9999.0\n", "11 Buffy W. Vincent 1.0 NaN\n", "12 Mira N. Franklin 100.0 12.0\n", "13 Ferris Q. Le 3.0 13.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n", "df_1.set_index('clientId', inplace=True)\n", "\n", "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n", "df_2.set_index('clientId', inplace=True)\n", "\n", "\n", "display(\n", " df_1,\n", " \"\",\n", " df_2,\n", ")\n", "\n", "df_1.update(df_2)\n", "\n", "#\n", "# Update df_1.\n", "#\n", "# Note que costs para el primer cliente se \n", "# actualiza con el valor del segundo df\n", "#\n", "# Tambien se actualiza el ultimo registro\n", "# para el campo bonus\n", "#\n", "display(\n", " \"\",\n", " df_1\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "62613416-4ef0-4d31-bc62-478e98ba7802", "metadata": {}, "outputs": [ { "data": { "text/plain": [ " name bonus costs\n", "clientId \n", "10 Omar Y. Fletcher 0 9999.0\n", "11 Buffy W. Vincent 1 NaN\n", "12 Mira N. Franklin 2 NaN\n", "13 Ferris Q. Le 3 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " location bonus costs\n", "clientId \n", "12 P.O. Box 445, 323 Cursus Rd. 100.0 12.0\n", "13 Ap #791-3809 Eu Street NaN 13.0\n", "14 6715 Diam. Rd. 200.0 NaN" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " location bonus costs\n", "clientId \n", "12 P.O. Box 445, 323 Cursus Rd. 2.0 12.0\n", "13 Ap #791-3809 Eu Street 3.0 4.0\n", "14 6715 Diam. Rd. 200.0 NaN" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n", "df_1.set_index('clientId', inplace=True)\n", "\n", "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n", "df_2.set_index('clientId', inplace=True)\n", "\n", "\n", "display(\n", " df_1,\n", " \"\",\n", " df_2,\n", ")\n", "\n", "df_2.update(df_1)\n", "\n", "#\n", "# Update df_1.\n", "#\n", "# Note que costs para el primer cliente se \n", "# actualiza con el valor del segundo df\n", "#\n", "# Tambien se actualiza el ultimo registro\n", "# para el campo bonus\n", "#\n", "display(\n", " \"\",\n", " df_2\n", ")" ] }, { "cell_type": "markdown", "id": "4e855b51-02e5-4989-8cf9-391c10bf0419", "metadata": {}, "source": [ "## Método combine() sobre columnas" ] }, { "cell_type": "code", "execution_count": 6, "id": "672ee6ab-6546-448d-a181-60f27b79a7b5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_1.csv\n" ] } ], "source": [ "%%writefile /tmp/data_1.csv\n", "A,B\n", "0,4\n", "0,4" ] }, { "cell_type": "code", "execution_count": 7, "id": "0ce4c806-38f3-4070-9ced-3befbac4f902", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_2.csv\n" ] } ], "source": [ "%%writefile /tmp/data_2.csv\n", "A,B\n", "1,3\n", "1,3" ] }, { "cell_type": "code", "execution_count": 8, "id": "42ae9e99-dcf0-4dfb-b9c2-570f0002da84", "metadata": {}, "outputs": [ { "data": { "text/plain": [ " A B\n", "0 0 4\n", "1 0 4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 1 3\n", "1 1 3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 0 3\n", "1 0 3" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n", "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n", "\n", "display(\n", " df_1,\n", " \"\",\n", " df_2,\n", " \"\"\n", ")\n", "\n", "#\n", "# Mínimo de la suma de columnas\n", "#\n", "take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2\n", "\n", "df_1 = df_1.combine(\n", " other=df_2, \n", " func=take_smaller,\n", ")\n", "\n", "display(df_1)" ] }, { "cell_type": "markdown", "id": "7c08c390-34cc-4da7-9f2e-6dd0945c55a5", "metadata": {}, "source": [ "## Método combine() sobre elementos" ] }, { "cell_type": "code", "execution_count": 9, "id": "fdecbe25-caa4-4d87-8405-db718bc34657", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_1.csv\n" ] } ], "source": [ "%%writefile /tmp/data_1.csv\n", "A,B\n", "5,2\n", "0,4" ] }, { "cell_type": "code", "execution_count": 10, "id": "4aa2a0f6-7ed2-46bd-b960-70e4e591b09d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data_2.csv\n" ] } ], "source": [ "%%writefile /tmp/data_2.csv\n", "A,B\n", "1,3\n", "1,3" ] }, { "cell_type": "code", "execution_count": 11, "id": "f212dbd4-86a1-48a7-8e6e-42cdcc524512", "metadata": {}, "outputs": [ { "data": { "text/plain": [ " A B\n", "0 5 2\n", "1 0 4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 1 3\n", "1 1 3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 1 2\n", "1 0 3" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "\n", "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n", "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n", "\n", "display(\n", " df_1,\n", " \"\",\n", " df_2,\n", " \"\"\n", ")\n", "\n", "\n", "df_1 = df_1.combine(\n", " other=df_2, \n", " func=np.minimum,\n", ")\n", "\n", "display(df_1)" ] }, { "cell_type": "markdown", "id": "659afa98-260d-45b0-8214-3c8e09bdc084", "metadata": {}, "source": [ "## Método combine_first()" ] }, { "cell_type": "markdown", "id": "649468af-c1e1-4ae8-a192-3fe88a1ee84e", "metadata": {}, "source": [ "Combina dos datafrmames llenando los valores nulos del primer dataframe con los valores del segundo dataframe." ] }, { "cell_type": "code", "execution_count": 12, "id": "3a001bc0-a4c9-437a-a868-eb1e2bfa17ef", "metadata": {}, "outputs": [ { "data": { "text/plain": [ " A B\n", "0 NaN NaN\n", "1 0.0 4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 1 3\n", "1 1 3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "''" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ " A B\n", "0 1.0 3.0\n", "1 0.0 4.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})\n", "df_2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})\n", "result = df_1.combine_first(df_2)\n", "\n", "display(\n", " df_1,\n", " \"\",\n", " df_2,\n", " \"\",\n", " result\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 5 }