{ "cells": [ { "cell_type": "markdown", "id": "c93b837c-4072-41e8-beab-f1dcb775f2b0", "metadata": { "tags": [] }, "source": [ "Verificación de rangos numéricos y de fecha --- 10:06 min\n", "===\n", "\n", "* 10:06 min | Última modificación: Octubre 14, 2021 | [YouTube](https://youtu.be/W22Kr9LGw6I)" ] }, { "cell_type": "markdown", "id": "cdbf76da-a6b0-40e9-8449-b918908cab1e", "metadata": {}, "source": [ "Rangos numéricos\n", "---" ] }, { "cell_type": "markdown", "id": "9c8c7d5a-006c-4ef2-8396-37a32adf6292", "metadata": {}, "source": [ "Opciones:\n", "\n", "* Borrado del registro.\n", "\n", "* Conversión a nulo.\n", "\n", "* Corrección al máximo o mínimo.\n", "\n", "* Imputación como si fuere un valor nulo.\n", "\n", "* Actualización a un valor preestablecido." ] }, { "cell_type": "code", "execution_count": 1, "id": "6783233f-dc00-472f-bcf1-3d0a747a92ce", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data.csv\n" ] } ], "source": [ "%%writefile /tmp/data.csv\n", "personId,rangecol\n", "1,1\n", "2,3\n", "3,2\n", "4,10\n", "5,0\n", "6,1\n", "7,10\n", "8,9" ] }, { "cell_type": "markdown", "id": "b787734e-b347-41a5-a8d8-67609cd8bf53", "metadata": {}, "source": [ "Los valores de la columna `rangecol` están restringidos al rango [1, 2, 3]." ] }, { "cell_type": "code", "execution_count": 2, "id": "c254af73-15fb-4ba7-8953-1066f436d26d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3 10\n", "4 0\n", "6 10\n", "7 9\n", "Name: rangecol, dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('/tmp/data.csv')\n", "\n", "#\n", "# registros que no cumplen la restricción.\n", "#\n", "df[(df.rangecol < 1) | (df.rangecol > 3)].rangecol" ] }, { "cell_type": "code", "execution_count": 3, "id": "8d9475c3-27c6-448e-918f-b22b346f5244", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
personIdrangecol
011
123
232
343
451
561
673
783
\n", "
" ], "text/plain": [ " personId rangecol\n", "0 1 1\n", "1 2 3\n", "2 3 2\n", "3 4 3\n", "4 5 1\n", "5 6 1\n", "6 7 3\n", "7 8 3" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Se convierten valores > 3 a 3 y valoes < 1 a 1\n", "#\n", "df.rangecol[df.rangecol > 3] = 3\n", "df.rangecol[df.rangecol < 1] = 1\n", "df" ] }, { "cell_type": "code", "execution_count": 4, "id": "0c02892a-e65b-44c2-8167-5efea402cd69", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
personIdrangecol
011
123
232
561
\n", "
" ], "text/plain": [ " personId rangecol\n", "0 1 1\n", "1 2 3\n", "2 3 2\n", "5 6 1" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('/tmp/data.csv')\n", "\n", "#\n", "# Borrado de registros que no están en el rango\n", "# mediante selección\n", "#\n", "df = df[(df.rangecol >= 1) & (df.rangecol <= 3)]\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "id": "edc1c760-45f7-4420-8272-df3ce2d7cbc1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
personIdrangecol
011
123
232
561
\n", "
" ], "text/plain": [ " personId rangecol\n", "0 1 1\n", "1 2 3\n", "2 3 2\n", "5 6 1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"/tmp/data.csv\")\n", "\n", "#\n", "# Borrado de registros que no están en el rango\n", "# usando la función drop()\n", "#\n", "df.drop(\n", " df[(df.rangecol < 1) | (df.rangecol > 3)].index,\n", " inplace=True,\n", ")\n", "df" ] }, { "cell_type": "markdown", "id": "c8740f93-ff07-47a0-b213-67e2bb4e3bc4", "metadata": {}, "source": [ "Rangos de fecha\n", "---" ] }, { "cell_type": "code", "execution_count": 6, "id": "95795fe8-31d6-4066-957b-f03e0d1f73b6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/data.csv\n" ] } ], "source": [ "%%writefile /tmp/data.csv\n", "eventId,eventDate\n", "1,2012-01-10\n", "2,1900-12-23\n", "3,2018-09-17\n", "4,2019-11-15\n", "5,2020-04-23\n", "6,2025-07-03\n", "7,2020-02-17\n", "8,2017-08-12\n", "9,2015-06-24" ] }, { "cell_type": "code", "execution_count": 7, "id": "e1624500-b65b-497e-bf10-6e9a9ea83873", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventIdeventDate
012012-01-10
121900-12-23
232018-09-17
342019-11-15
452020-04-23
562025-07-03
672020-02-17
782017-08-12
892015-06-24
\n", "
" ], "text/plain": [ " eventId eventDate\n", "0 1 2012-01-10\n", "1 2 1900-12-23\n", "2 3 2018-09-17\n", "3 4 2019-11-15\n", "4 5 2020-04-23\n", "5 6 2025-07-03\n", "6 7 2020-02-17\n", "7 8 2017-08-12\n", "8 9 2015-06-24" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "eventId int64\n", "eventDate object\n", "dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = pd.read_csv('/tmp/data.csv')\n", "\n", "display(\n", " df,\n", " df.dtypes\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "1f309d81-3b49-40e1-8b7d-b773cbad4d9e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventIdeventDate
012012-01-10
121900-12-23
232018-09-17
342019-11-15
452020-04-23
562025-07-03
672020-02-17
782017-08-12
892015-06-24
\n", "
" ], "text/plain": [ " eventId eventDate\n", "0 1 2012-01-10\n", "1 2 1900-12-23\n", "2 3 2018-09-17\n", "3 4 2019-11-15\n", "4 5 2020-04-23\n", "5 6 2025-07-03\n", "6 7 2020-02-17\n", "7 8 2017-08-12\n", "8 9 2015-06-24" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "eventId int64\n", "eventDate datetime64[ns]\n", "dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#\n", "# Cambio del tipo de dato de 'eventDate' a datetime\n", "#\n", "df.eventDate = pd.to_datetime(df.eventDate)\n", "\n", "display(\n", " df,\n", " df.dtypes\n", ")" ] }, { "cell_type": "markdown", "id": "d1e07f45-1420-4f73-9bd0-b83fde78077a", "metadata": {}, "source": [ "Rango de fechas:\n", "\n", " 1950-01-01\n", " Fecha actual" ] }, { "cell_type": "code", "execution_count": 9, "id": "283e6386-b70c-4cfa-adb5-e4ce2f61c582", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Timestamp('2021-09-15 00:00:00')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import datetime as dt\n", "\n", "today = pd.to_datetime(dt.date.today())\n", "today" ] }, { "cell_type": "code", "execution_count": 10, "id": "1203d502-bd97-4de1-b1be-a4fffd2f8531", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventIdeventDate
121900-12-23
562025-07-03
\n", "
" ], "text/plain": [ " eventId eventDate\n", "1 2 1900-12-23\n", "5 6 2025-07-03" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Verificación de restricciones\n", "#\n", "df[(df.eventDate < pd.to_datetime('1950-01-01')) | (df.eventDate > today)]" ] }, { "cell_type": "code", "execution_count": 11, "id": "e905ec02-4671-4a2b-be75-ce38efc36b0c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventIdeventDate
012012-01-10
121950-01-01
232018-09-17
342019-11-15
452020-04-23
562021-09-15
672020-02-17
782017-08-12
892015-06-24
\n", "
" ], "text/plain": [ " eventId eventDate\n", "0 1 2012-01-10\n", "1 2 1950-01-01\n", "2 3 2018-09-17\n", "3 4 2019-11-15\n", "4 5 2020-04-23\n", "5 6 2021-09-15\n", "6 7 2020-02-17\n", "7 8 2017-08-12\n", "8 9 2015-06-24" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "eventId int64\n", "eventDate datetime64[ns]\n", "dtype: object" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.loc[\n", " df.eventDate < \"1950-01-01\",\n", " \"eventDate\",\n", "] = pd.to_datetime(\"1950-01-01\")\n", "\n", "\n", "df.loc[\n", " df.eventDate > today, \n", " \"eventDate\"\n", "] = today\n", "\n", "display(\n", " df,\n", " df.dtypes\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 5 }