{
"cells": [
{
"cell_type": "markdown",
"id": "148b56ac-a42a-40f7-9264-ae69e79a3fe3",
"metadata": {},
"source": [
"Importación de archivos usando Pandas --- 10:37 min\n",
"===\n",
"\n",
"* 10:37 min | Última modificación: Octubre 13, 2021 | [YouTube](https://youtu.be/Dmjcs0KmuRw)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b1828156-53bb-40cb-b6e6-dc9b91caa908",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5b97b539-4e06-474a-b555-eb411bdd35a2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cement | \n",
" slag | \n",
" ash | \n",
" water | \n",
" superplastic | \n",
" coarseagg | \n",
" fineagg | \n",
" age | \n",
" strength | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 162.0 | \n",
" 2.5 | \n",
" 1040.0 | \n",
" 676.0 | \n",
" 28 | \n",
" 79.99 | \n",
"
\n",
" \n",
" 1 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 162.0 | \n",
" 2.5 | \n",
" 1055.0 | \n",
" 676.0 | \n",
" 28 | \n",
" 61.89 | \n",
"
\n",
" \n",
" 2 | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 270 | \n",
" 40.27 | \n",
"
\n",
" \n",
" 3 | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 365 | \n",
" 41.05 | \n",
"
\n",
" \n",
" 4 | \n",
" 198.6 | \n",
" 132.4 | \n",
" 0.0 | \n",
" 192.0 | \n",
" 0.0 | \n",
" 978.4 | \n",
" 825.5 | \n",
" 360 | \n",
" 44.30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cement slag ash water superplastic coarseagg fineagg age strength\n",
"0 540.0 0.0 0.0 162.0 2.5 1040.0 676.0 28 79.99\n",
"1 540.0 0.0 0.0 162.0 2.5 1055.0 676.0 28 61.89\n",
"2 332.5 142.5 0.0 228.0 0.0 932.0 594.0 270 40.27\n",
"3 332.5 142.5 0.0 228.0 0.0 932.0 594.0 365 41.05\n",
"4 198.6 132.4 0.0 192.0 0.0 978.4 825.5 360 44.30"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Lectura del dataset completo\n",
"# =============================================================================\n",
"# Lectura de un archivo remoto.\n",
"#\n",
"remote_file = \"https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/concrete.csv\"\n",
"\n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" sep = ',', # separador de campos\n",
" thousands = None, # separador de miles para números\n",
" decimal = '.', # separador de decimales\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "33079ee9-6437-4c39-bbd0-33efcba2b0be",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cement | \n",
" slag | \n",
" ash | \n",
" water | \n",
" superplastic | \n",
" coarseagg | \n",
" fineagg | \n",
" age | \n",
" strength | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 162.0 | \n",
" 2.5 | \n",
" 1040.0 | \n",
" 676.0 | \n",
" 28 | \n",
" 79.99 | \n",
"
\n",
" \n",
" 1 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 162.0 | \n",
" 2.5 | \n",
" 1055.0 | \n",
" 676.0 | \n",
" 28 | \n",
" 61.89 | \n",
"
\n",
" \n",
" 2 | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 270 | \n",
" 40.27 | \n",
"
\n",
" \n",
" 3 | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 365 | \n",
" 41.05 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cement slag ash water superplastic coarseagg fineagg age strength\n",
"0 540.0 0.0 0.0 162.0 2.5 1040.0 676.0 28 79.99\n",
"1 540.0 0.0 0.0 162.0 2.5 1055.0 676.0 28 61.89\n",
"2 332.5 142.5 0.0 228.0 0.0 932.0 594.0 270 40.27\n",
"3 332.5 142.5 0.0 228.0 0.0 932.0 594.0 365 41.05"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Lectura de las primeras filas\n",
"# =============================================================================\n",
"#\n",
"# Tambien tiene las opciones \n",
"# * skiprows\n",
"# * skipfooter\n",
"#\n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" nrows=4,\n",
")\n",
"\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "da5a83f3-8495-46a7-ac24-b979db212f8b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0.1 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 270 | \n",
" 40.27 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 332.5 | \n",
" 142.5 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 594.0 | \n",
" 365 | \n",
" 41.05 | \n",
"
\n",
" \n",
" 1 | \n",
" 198.6 | \n",
" 132.4 | \n",
" 0.0 | \n",
" 192.0 | \n",
" 0.0 | \n",
" 978.4 | \n",
" 825.5 | \n",
" 360 | \n",
" 44.30 | \n",
"
\n",
" \n",
" 2 | \n",
" 266.0 | \n",
" 114.0 | \n",
" 0.0 | \n",
" 228.0 | \n",
" 0.0 | \n",
" 932.0 | \n",
" 670.0 | \n",
" 90 | \n",
" 47.03 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 332.5 142.5 0.0 228.0 0.0.1 932.0 594.0 270 40.27\n",
"0 332.5 142.5 0.0 228.0 0.0 932.0 594.0 365 41.05\n",
"1 198.6 132.4 0.0 192.0 0.0 978.4 825.5 360 44.30\n",
"2 266.0 114.0 0.0 228.0 0.0 932.0 670.0 90 47.03"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Especificación de filas que cumplen con una\n",
"# condicion\n",
"# =============================================================================\n",
"#\n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" skiprows=lambda row_counter: row_counter < 3 or row_counter > 6,\n",
")\n",
"\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7d0f939d-2a40-4b9b-8ea0-866f81cebed7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cement | \n",
" ash | \n",
" superplastic | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 2.5 | \n",
"
\n",
" \n",
" 1 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 2.5 | \n",
"
\n",
" \n",
" 2 | \n",
" 332.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 332.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 198.6 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1025 | \n",
" 276.4 | \n",
" 90.3 | \n",
" 8.9 | \n",
"
\n",
" \n",
" 1026 | \n",
" 322.2 | \n",
" 115.6 | \n",
" 10.4 | \n",
"
\n",
" \n",
" 1027 | \n",
" 148.5 | \n",
" 108.6 | \n",
" 6.1 | \n",
"
\n",
" \n",
" 1028 | \n",
" 159.1 | \n",
" 0.0 | \n",
" 11.3 | \n",
"
\n",
" \n",
" 1029 | \n",
" 260.9 | \n",
" 78.3 | \n",
" 8.6 | \n",
"
\n",
" \n",
"
\n",
"
1030 rows × 3 columns
\n",
"
"
],
"text/plain": [
" cement ash superplastic\n",
"0 540.0 0.0 2.5\n",
"1 540.0 0.0 2.5\n",
"2 332.5 0.0 0.0\n",
"3 332.5 0.0 0.0\n",
"4 198.6 0.0 0.0\n",
"... ... ... ...\n",
"1025 276.4 90.3 8.9\n",
"1026 322.2 115.6 10.4\n",
"1027 148.5 108.6 6.1\n",
"1028 159.1 0.0 11.3\n",
"1029 260.9 78.3 8.6\n",
"\n",
"[1030 rows x 3 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Lectura de columnas seleccionadas por indice\n",
"# =============================================================================\n",
"# Tambien puede especificarse con una función \n",
"# lambda como en el caso anterior.\n",
"# \n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" usecols=[0, 2, 4],\n",
")\n",
"\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a8abce47-8cd5-4048-bda3-524d159c2d38",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cement | \n",
" ash | \n",
" superplastic | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 2.5 | \n",
"
\n",
" \n",
" 1 | \n",
" 540.0 | \n",
" 0.0 | \n",
" 2.5 | \n",
"
\n",
" \n",
" 2 | \n",
" 332.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 332.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 198.6 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 1025 | \n",
" 276.4 | \n",
" 90.3 | \n",
" 8.9 | \n",
"
\n",
" \n",
" 1026 | \n",
" 322.2 | \n",
" 115.6 | \n",
" 10.4 | \n",
"
\n",
" \n",
" 1027 | \n",
" 148.5 | \n",
" 108.6 | \n",
" 6.1 | \n",
"
\n",
" \n",
" 1028 | \n",
" 159.1 | \n",
" 0.0 | \n",
" 11.3 | \n",
"
\n",
" \n",
" 1029 | \n",
" 260.9 | \n",
" 78.3 | \n",
" 8.6 | \n",
"
\n",
" \n",
"
\n",
"
1030 rows × 3 columns
\n",
"
"
],
"text/plain": [
" cement ash superplastic\n",
"0 540.0 0.0 2.5\n",
"1 540.0 0.0 2.5\n",
"2 332.5 0.0 0.0\n",
"3 332.5 0.0 0.0\n",
"4 198.6 0.0 0.0\n",
"... ... ... ...\n",
"1025 276.4 90.3 8.9\n",
"1026 322.2 115.6 10.4\n",
"1027 148.5 108.6 6.1\n",
"1028 159.1 0.0 11.3\n",
"1029 260.9 78.3 8.6\n",
"\n",
"[1030 rows x 3 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Lectura de columnas seleccionadas por nombre\n",
"# =============================================================================\n",
"# \n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" usecols=[\"cement\", \"ash\", \"superplastic\"],\n",
")\n",
"\n",
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b8b82699-7f64-4327-bcda-2795e0f7c5c8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"cement object\n",
"slag float64\n",
"ash float64\n",
"water float64\n",
"superplastic float64\n",
"coarseagg float64\n",
"fineagg float64\n",
"age int64\n",
"strength float64\n",
"dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Especificación del tipo de dato\n",
"# =============================================================================\n",
"#\n",
"dataset = pd.read_csv(\n",
" remote_file,\n",
" dtype={'cement': str},\n",
")\n",
"\n",
"dataset.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5dd8fc1c-ee65-4c94-8ef7-a14124ca1d73",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Dataset sin header\n",
"# =============================================================================\n",
"#"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d238288b-d7e7-445f-a542-6e95216a25ef",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/data.csv\n"
]
}
],
"source": [
"%%writefile /tmp/data.csv\n",
"Isabelle I. Dotson,\"P.O. Box 477, 7357 Cras St.\",LamontzŽe,12144-86218,02.24.21,21\n",
"Erin F. Munoz,\"P.O. Box 737, 6015 Ligula St.\",Springfield,6373,10.15.20,21\n",
"Troy G. Harrell,3706 Ut St.,Herne,69259,06.28.21,23\n",
"Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,05.24.21,21\n",
"Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24\n",
"Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n",
"Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n",
"Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n",
"Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n",
"Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n",
"Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n",
"Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n",
"Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n",
"Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n",
"Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n",
"Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n",
"Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n",
"May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n",
"Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n",
"Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n",
"Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n",
"Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n",
"Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n",
"Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19\n",
"Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n",
"Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n",
"Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n",
"Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20\n",
"Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n",
"Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ef88bcc3-4985-40c9-9530-7c0b14548c4d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" P.O. Box 477, 7357 Cras St. | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 10.15.20 | \n",
" 21 | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" 69259 | \n",
" 06.28.21 | \n",
" 23 | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" 05.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" 20619 | \n",
" 09.23.20 | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 Isabelle I. Dotson P.O. Box 477, 7357 Cras St. LamontzŽe \n",
"1 Erin F. Munoz P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 Troy G. Harrell 3706 Ut St. Herne \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 02.24.21 21 \n",
"1 6373 10.15.20 21 \n",
"2 69259 06.28.21 23 \n",
"3 93452 05.24.21 21 \n",
"4 20619 09.23.20 24 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names = \"Name,Address,City,ZipCode,Date,Score\".split(',')\n",
"\n",
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" names=column_names,\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "3b89cff6-a5fb-4845-b958-490d9868397d",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Uso de converters\n",
"# =============================================================================\n",
"#"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5e090404-a493-4eed-a436-8a562ab35777",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/data.csv\n"
]
}
],
"source": [
"%%writefile /tmp/data.csv\n",
"Name,Address,City,ZipCode,Date,Score\n",
"Isabelle I. Dotson,\"P.O. Box 477, 7357 Cras St.\",LamontzŽe,12144-86218,02.24.21,21\n",
"Erin F. Munoz,\"P.O. Box 737, 6015 Ligula St.\",Springfield,6373,10.15.20,21\n",
"Troy G. Harrell,3706 Ut St.,Herne,69259,06.28.21,23\n",
"Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,05.24.21,21\n",
"Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24\n",
"Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n",
"Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n",
"Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n",
"Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n",
"Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n",
"Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n",
"Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n",
"Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n",
"Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n",
"Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n",
"Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n",
"Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n",
"May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n",
"Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n",
"Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n",
"Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n",
"Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n",
"Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n",
"Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19\n",
"Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n",
"Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n",
"Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n",
"Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20\n",
"Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n",
"Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "64481f5f-55e2-45f1-8492-245645805576",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" ISABELLE I. DOTSON | \n",
" P.O. Box 477, 7357 Cras St. | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 1 | \n",
" ERIN F. MUNOZ | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 10.15.20 | \n",
" 21 | \n",
"
\n",
" \n",
" 2 | \n",
" TROY G. HARRELL | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" 69259 | \n",
" 06.28.21 | \n",
" 23 | \n",
"
\n",
" \n",
" 3 | \n",
" YEN W. SUMMERS | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" 05.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 4 | \n",
" JEROME B. CARNEY | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" 20619 | \n",
" 09.23.20 | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 ISABELLE I. DOTSON P.O. Box 477, 7357 Cras St. LamontzŽe \n",
"1 ERIN F. MUNOZ P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 TROY G. HARRELL 3706 Ut St. Herne \n",
"3 YEN W. SUMMERS P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 JEROME B. CARNEY 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 02.24.21 21 \n",
"1 6373 10.15.20 21 \n",
"2 69259 06.28.21 23 \n",
"3 93452 05.24.21 21 \n",
"4 20619 09.23.20 24 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" converters = {\n",
" 'Name': lambda x: x.upper()\n",
" }\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "480a972e-ff50-46b6-b2cc-ed40345a097f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" P.O. Box 477, 7357 Cras St. | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 2021-02-24 | \n",
" 21 | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 2020-10-15 | \n",
" 21 | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" 69259 | \n",
" 2021-06-28 | \n",
" 23 | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" 2021-05-24 | \n",
" 21 | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" 20619 | \n",
" 2020-09-23 | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 Isabelle I. Dotson P.O. Box 477, 7357 Cras St. LamontzŽe \n",
"1 Erin F. Munoz P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 Troy G. Harrell 3706 Ut St. Herne \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 2021-02-24 21 \n",
"1 6373 2020-10-15 21 \n",
"2 69259 2021-06-28 23 \n",
"3 93452 2021-05-24 21 \n",
"4 20619 2020-09-23 24 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Campos con fecha\n",
"# =============================================================================\n",
"#\n",
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" parse_dates=['Date'],\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d3206bdd-cd6f-4083-a5b6-731118fff9cd",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Valores Faltantes\n",
"# =============================================================================\n",
"# En el siguiente archivo se eliminaron varios\n",
"# datos. En los faltantes aparece \"data,,data\"\n",
"#"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "80ee1668-90cf-4cff-be3a-1ce75bed054f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/data.csv\n"
]
}
],
"source": [
"%%writefile /tmp/data.csv\n",
"Name,Address,City,ZipCode,Date,Score\n",
"Isabelle I. Dotson,,LamontzŽe,12144-86218,02.24.21,21\n",
"Erin F. Munoz,\"P.O. Box 737, 6015 Ligula St.\",Springfield,6373,10.15.20,21\n",
"Troy G. Harrell,3706 Ut St.,Herne,69259,\n",
"Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,05.24.21,21\n",
"Jerome B. Carney,298-7004 Natoque St.,Osimo,,09.23.20,24\n",
"Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n",
"Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n",
"Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n",
"Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n",
"Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n",
"Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n",
"Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n",
"Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n",
"Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n",
"Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n",
"Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n",
"Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n",
"May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n",
"Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n",
"Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n",
"Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n",
"Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n",
"Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n",
"Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,\n",
"Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n",
"Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n",
"Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n",
"Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20\n",
"Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n",
"Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "3fae909b-a4ce-40ce-8563-817980a85cbc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" NaN | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 10.15.20 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" 69259 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" 05.24.21 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" NaN | \n",
" 09.23.20 | \n",
" 24.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 Isabelle I. Dotson NaN LamontzŽe \n",
"1 Erin F. Munoz P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 Troy G. Harrell 3706 Ut St. Herne \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 02.24.21 21.0 \n",
"1 6373 10.15.20 21.0 \n",
"2 69259 NaN NaN \n",
"3 93452 05.24.21 21.0 \n",
"4 NaN 09.23.20 24.0 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a3f79f34-d8b3-40a8-a8cd-8566535f4e78",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 10.15.20 | \n",
" 21 | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" 69259 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" 05.24.21 | \n",
" 21 | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" | \n",
" 09.23.20 | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 Isabelle I. Dotson LamontzŽe \n",
"1 Erin F. Munoz P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 Troy G. Harrell 3706 Ut St. Herne \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 02.24.21 21 \n",
"1 6373 10.15.20 21 \n",
"2 69259 \n",
"3 93452 05.24.21 21 \n",
"4 09.23.20 24 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# na_filter permite detectar valores faltantes\n",
"#\n",
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" na_filter=False,\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4b0ae599-5343-466d-99ef-ded8fc9cbd72",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Caracteres marcando nulos\n",
"# =============================================================================\n",
"# En el siguiente archivo los nulos están \n",
"# indicados con '?'\n",
"#"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "310fa855-0df2-410e-b51e-2f2a34e31928",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/data.csv\n"
]
}
],
"source": [
"%%writefile /tmp/data.csv\n",
"Name,Address,City,ZipCode,Date,Score\n",
"Isabelle I. Dotson,?,LamontzŽe,12144-86218,02.24.21,21\n",
"Erin F. Munoz,\"P.O. Box 737, 6015 Ligula St.\",Springfield,6373,10.15.20,21\n",
"Troy G. Harrell,3706 Ut St.,Herne,?,06.28.21,23\n",
"Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,?\n",
"Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24\n",
"Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n",
"Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n",
"Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n",
"Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n",
"Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n",
"Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n",
"Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n",
"Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n",
"Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n",
"Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n",
"Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n",
"Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n",
"May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n",
"Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n",
"Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n",
"Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n",
"Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n",
"Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n",
"Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19\n",
"Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n",
"Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n",
"Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n",
"Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20\n",
"Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n",
"Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2018f504-61a6-464a-9278-587db6c3e779",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" NaN | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" P.O. Box 737, 6015 Ligula St. | \n",
" Springfield | \n",
" 6373 | \n",
" 10.15.20 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" NaN | \n",
" 06.28.21 | \n",
" 23.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" 20619 | \n",
" 09.23.20 | \n",
" 24.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City \\\n",
"0 Isabelle I. Dotson NaN LamontzŽe \n",
"1 Erin F. Munoz P.O. Box 737, 6015 Ligula St. Springfield \n",
"2 Troy G. Harrell 3706 Ut St. Herne \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo \n",
"\n",
" ZipCode Date Score \n",
"0 12144-86218 02.24.21 21.0 \n",
"1 6373 10.15.20 21.0 \n",
"2 NaN 06.28.21 23.0 \n",
"3 93452 NaN NaN \n",
"4 20619 09.23.20 24.0 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# na_filter permite detectar valores faltantes\n",
"#\n",
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" na_values='?',\n",
")\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "0e113783-7c1a-4ae5-ac55-294fbe0b5cb8",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Lines con errores \n",
"#"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "75f19c5a-f5fd-405a-bd08-9cf47336b969",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/data.csv\n"
]
}
],
"source": [
"%%writefile /tmp/data.csv\n",
"Name,Address,City,ZipCode,Date,Score\n",
"Isabelle I. Dotson,?,LamontzŽe,12144-86218,02.24.21,\n",
"Erin F. Munoz,\n",
"Troy G. Harrell,3706 Ut St.,Herne,?,06.28.21,23\n",
"Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,?\n",
"Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24\n",
"Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n",
"Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n",
"Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n",
"Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n",
"Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n",
"Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n",
"Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n",
"Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n",
"Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n",
"Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n",
"Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n",
"Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n",
"May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n",
"Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n",
"Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n",
"Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n",
"Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n",
"Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n",
"Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19\n",
"Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n",
"Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n",
"Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n",
"Owen Z. Odom,1659 Risus. Street,\n",
"Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n",
"Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "9569d564-7408-451f-bd22-4486389d17e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Address | \n",
" City | \n",
" ZipCode | \n",
" Date | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Isabelle I. Dotson | \n",
" ? | \n",
" LamontzŽe | \n",
" 12144-86218 | \n",
" 02.24.21 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" Erin F. Munoz | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" Troy G. Harrell | \n",
" 3706 Ut St. | \n",
" Herne | \n",
" ? | \n",
" 06.28.21 | \n",
" 23.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Yen W. Summers | \n",
" P.O. Box 939, 8016 Egestas Av. | \n",
" Elversele | \n",
" 93452 | \n",
" ? | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" Jerome B. Carney | \n",
" 298-7004 Natoque St. | \n",
" Osimo | \n",
" 20619 | \n",
" 09.23.20 | \n",
" 24.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Address City ZipCode \\\n",
"0 Isabelle I. Dotson ? LamontzŽe 12144-86218 \n",
"1 Erin F. Munoz NaN NaN NaN \n",
"2 Troy G. Harrell 3706 Ut St. Herne ? \n",
"3 Yen W. Summers P.O. Box 939, 8016 Egestas Av. Elversele 93452 \n",
"4 Jerome B. Carney 298-7004 Natoque St. Osimo 20619 \n",
"\n",
" Date Score \n",
"0 02.24.21 NaN \n",
"1 NaN NaN \n",
"2 06.28.21 23.0 \n",
"3 ? NaN \n",
"4 09.23.20 24.0 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# na_filter permite detectar valores faltantes\n",
"#\n",
"dataset = pd.read_csv(\n",
" \"/tmp/data.csv\",\n",
" error_bad_lines=False,\n",
" warn_bad_lines=True,\n",
")\n",
"\n",
"\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "e4643a73-14f9-4c7a-9568-ed9bed93f397",
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Otros separadores\n",
"# ===============================================\n",
"# la opción sep=? permite especificar el tipo de\n",
"# separador\n",
"#"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}