{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0a104cc0-91f0-4d63-ab0c-b6221a1859ec",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Actualización de campos --- 7:30 min\n",
    "\n",
    "* 7:30 min | Última modificación: Octubre 6, 2021 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4e0387c9-7923-4548-ada5-733772ecf36f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "pd.set_option(\"display.notebook_repr_html\", False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "773fffb6-c020-4816-9b2d-f76d9faf5531",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Método update()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f8d4511d-4f8f-4415-b194-8fdf3bd23463",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_1.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_1.csv\n",
    "clientId,name,bonus,costs\n",
    "10,Omar Y. Fletcher,0,9999\n",
    "11,Buffy W. Vincent,1,\n",
    "12,Mira N. Franklin,2,\n",
    "13,Ferris Q. Le,3,4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ab2a77e1-d60b-41c1-969b-e8d8a92394d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_2.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_2.csv\n",
    "clientId,location,bonus,costs\n",
    "12,\"P.O. Box 445, 323 Cursus Rd.\",100,12\n",
    "13,Ap #791-3809 Eu Street,,13\n",
    "14,6715 Diam. Rd.,200,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f2713246-c926-400e-9bf7-a406abda7b06",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                      name  bonus   costs\n",
       "clientId                                 \n",
       "10        Omar Y. Fletcher      0  9999.0\n",
       "11        Buffy W. Vincent      1     NaN\n",
       "12        Mira N. Franklin      2     NaN\n",
       "13            Ferris Q. Le      3     4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "                              location  bonus  costs\n",
       "clientId                                            \n",
       "12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0\n",
       "13              Ap #791-3809 Eu Street    NaN   13.0\n",
       "14                      6715 Diam. Rd.  200.0    NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "                      name  bonus   costs\n",
       "clientId                                 \n",
       "10        Omar Y. Fletcher    0.0  9999.0\n",
       "11        Buffy W. Vincent    1.0     NaN\n",
       "12        Mira N. Franklin  100.0    12.0\n",
       "13            Ferris Q. Le    3.0    13.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n",
    "df_1.set_index('clientId', inplace=True)\n",
    "\n",
    "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n",
    "df_2.set_index('clientId', inplace=True)\n",
    "\n",
    "\n",
    "display(\n",
    "    df_1,\n",
    "    \"\",\n",
    "    df_2,\n",
    ")\n",
    "\n",
    "df_1.update(df_2)\n",
    "\n",
    "#\n",
    "# Update df_1.\n",
    "#\n",
    "# Note que costs para el primer cliente se \n",
    "# actualiza con el valor del segundo df\n",
    "#\n",
    "# Tambien se actualiza el ultimo registro\n",
    "# para el campo bonus\n",
    "#\n",
    "display(\n",
    "    \"\",\n",
    "    df_1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "62613416-4ef0-4d31-bc62-478e98ba7802",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                      name  bonus   costs\n",
       "clientId                                 \n",
       "10        Omar Y. Fletcher      0  9999.0\n",
       "11        Buffy W. Vincent      1     NaN\n",
       "12        Mira N. Franklin      2     NaN\n",
       "13            Ferris Q. Le      3     4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "                              location  bonus  costs\n",
       "clientId                                            \n",
       "12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0\n",
       "13              Ap #791-3809 Eu Street    NaN   13.0\n",
       "14                      6715 Diam. Rd.  200.0    NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "                              location  bonus  costs\n",
       "clientId                                            \n",
       "12        P.O. Box 445, 323 Cursus Rd.    2.0   12.0\n",
       "13              Ap #791-3809 Eu Street    3.0    4.0\n",
       "14                      6715 Diam. Rd.  200.0    NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n",
    "df_1.set_index('clientId', inplace=True)\n",
    "\n",
    "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n",
    "df_2.set_index('clientId', inplace=True)\n",
    "\n",
    "\n",
    "display(\n",
    "    df_1,\n",
    "    \"\",\n",
    "    df_2,\n",
    ")\n",
    "\n",
    "df_2.update(df_1)\n",
    "\n",
    "#\n",
    "# Update df_1.\n",
    "#\n",
    "# Note que costs para el primer cliente se \n",
    "# actualiza con el valor del segundo df\n",
    "#\n",
    "# Tambien se actualiza el ultimo registro\n",
    "# para el campo bonus\n",
    "#\n",
    "display(\n",
    "    \"\",\n",
    "    df_2\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e855b51-02e5-4989-8cf9-391c10bf0419",
   "metadata": {},
   "source": [
    "## Método combine() sobre columnas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "672ee6ab-6546-448d-a181-60f27b79a7b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_1.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_1.csv\n",
    "A,B\n",
    "0,4\n",
    "0,4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0ce4c806-38f3-4070-9ced-3befbac4f902",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_2.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_2.csv\n",
    "A,B\n",
    "1,3\n",
    "1,3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "42ae9e99-dcf0-4dfb-b9c2-570f0002da84",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  0  4\n",
       "1  0  4"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  1  3\n",
       "1  1  3"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  0  3\n",
       "1  0  3"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n",
    "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n",
    "\n",
    "display(\n",
    "    df_1,\n",
    "    \"\",\n",
    "    df_2,\n",
    "    \"\"\n",
    ")\n",
    "\n",
    "#\n",
    "# Mínimo de la suma de columnas\n",
    "#\n",
    "take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2\n",
    "\n",
    "df_1 = df_1.combine(\n",
    "    other=df_2, \n",
    "    func=take_smaller,\n",
    ")\n",
    "\n",
    "display(df_1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c08c390-34cc-4da7-9f2e-6dd0945c55a5",
   "metadata": {},
   "source": [
    "## Método combine() sobre elementos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fdecbe25-caa4-4d87-8405-db718bc34657",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_1.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_1.csv\n",
    "A,B\n",
    "5,2\n",
    "0,4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "4aa2a0f6-7ed2-46bd-b960-70e4e591b09d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting /tmp/data_2.csv\n"
     ]
    }
   ],
   "source": [
    "%%writefile /tmp/data_2.csv\n",
    "A,B\n",
    "1,3\n",
    "1,3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f212dbd4-86a1-48a7-8e6e-42cdcc524512",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  5  2\n",
       "1  0  4"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  1  3\n",
       "1  1  3"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  1  2\n",
       "1  0  3"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "df_1 = pd.read_csv(\"/tmp/data_1.csv\")\n",
    "df_2 = pd.read_csv(\"/tmp/data_2.csv\")\n",
    "\n",
    "display(\n",
    "    df_1,\n",
    "    \"\",\n",
    "    df_2,\n",
    "    \"\"\n",
    ")\n",
    "\n",
    "\n",
    "df_1 = df_1.combine(\n",
    "    other=df_2, \n",
    "    func=np.minimum,\n",
    ")\n",
    "\n",
    "display(df_1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "659afa98-260d-45b0-8214-3c8e09bdc084",
   "metadata": {},
   "source": [
    "## Método combine_first()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "649468af-c1e1-4ae8-a192-3fe88a1ee84e",
   "metadata": {},
   "source": [
    "Combina dos datafrmames llenando los valores nulos del primer dataframe con los valores del segundo dataframe."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3a001bc0-a4c9-437a-a868-eb1e2bfa17ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "     A    B\n",
       "0  NaN  NaN\n",
       "1  0.0  4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "   A  B\n",
       "0  1  3\n",
       "1  1  3"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "''"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "     A    B\n",
       "0  1.0  3.0\n",
       "1  0.0  4.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})\n",
    "df_2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})\n",
    "result = df_1.combine_first(df_2)\n",
    "\n",
    "display(\n",
    "    df_1,\n",
    "    \"\",\n",
    "    df_2,\n",
    "    \"\",\n",
    "    result\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}