Importación de archivos usando Pandas — 10:37 min
10:37 min | Última modificación: Octubre 13, 2021 | YouTube
[1]:
import pandas as pd
[2]:
#
# Lectura del dataset completo
# =============================================================================
# Lectura de un archivo remoto.
#
remote_file = "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/concrete.csv"
dataset = pd.read_csv(
remote_file,
sep = ',', # separador de campos
thousands = None, # separador de miles para números
decimal = '.', # separador de decimales
)
dataset.head()
[2]:
cement | slag | ash | water | superplastic | coarseagg | fineagg | age | strength | |
---|---|---|---|---|---|---|---|---|---|
0 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1040.0 | 676.0 | 28 | 79.99 |
1 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1055.0 | 676.0 | 28 | 61.89 |
2 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 270 | 40.27 |
3 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 365 | 41.05 |
4 | 198.6 | 132.4 | 0.0 | 192.0 | 0.0 | 978.4 | 825.5 | 360 | 44.30 |
[3]:
#
# Lectura de las primeras filas
# =============================================================================
#
# Tambien tiene las opciones
# * skiprows
# * skipfooter
#
dataset = pd.read_csv(
remote_file,
nrows=4,
)
dataset
[3]:
cement | slag | ash | water | superplastic | coarseagg | fineagg | age | strength | |
---|---|---|---|---|---|---|---|---|---|
0 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1040.0 | 676.0 | 28 | 79.99 |
1 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1055.0 | 676.0 | 28 | 61.89 |
2 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 270 | 40.27 |
3 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 365 | 41.05 |
[4]:
#
# Especificación de filas que cumplen con una
# condicion
# =============================================================================
#
dataset = pd.read_csv(
remote_file,
skiprows=lambda row_counter: row_counter < 3 or row_counter > 6,
)
dataset
[4]:
332.5 | 142.5 | 0.0 | 228.0 | 0.0.1 | 932.0 | 594.0 | 270 | 40.27 | |
---|---|---|---|---|---|---|---|---|---|
0 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 365 | 41.05 |
1 | 198.6 | 132.4 | 0.0 | 192.0 | 0.0 | 978.4 | 825.5 | 360 | 44.30 |
2 | 266.0 | 114.0 | 0.0 | 228.0 | 0.0 | 932.0 | 670.0 | 90 | 47.03 |
[5]:
#
# Lectura de columnas seleccionadas por indice
# =============================================================================
# Tambien puede especificarse con una función
# lambda como en el caso anterior.
#
dataset = pd.read_csv(
remote_file,
usecols=[0, 2, 4],
)
dataset
[5]:
cement | ash | superplastic | |
---|---|---|---|
0 | 540.0 | 0.0 | 2.5 |
1 | 540.0 | 0.0 | 2.5 |
2 | 332.5 | 0.0 | 0.0 |
3 | 332.5 | 0.0 | 0.0 |
4 | 198.6 | 0.0 | 0.0 |
... | ... | ... | ... |
1025 | 276.4 | 90.3 | 8.9 |
1026 | 322.2 | 115.6 | 10.4 |
1027 | 148.5 | 108.6 | 6.1 |
1028 | 159.1 | 0.0 | 11.3 |
1029 | 260.9 | 78.3 | 8.6 |
1030 rows × 3 columns
[6]:
#
# Lectura de columnas seleccionadas por nombre
# =============================================================================
#
dataset = pd.read_csv(
remote_file,
usecols=["cement", "ash", "superplastic"],
)
dataset
[6]:
cement | ash | superplastic | |
---|---|---|---|
0 | 540.0 | 0.0 | 2.5 |
1 | 540.0 | 0.0 | 2.5 |
2 | 332.5 | 0.0 | 0.0 |
3 | 332.5 | 0.0 | 0.0 |
4 | 198.6 | 0.0 | 0.0 |
... | ... | ... | ... |
1025 | 276.4 | 90.3 | 8.9 |
1026 | 322.2 | 115.6 | 10.4 |
1027 | 148.5 | 108.6 | 6.1 |
1028 | 159.1 | 0.0 | 11.3 |
1029 | 260.9 | 78.3 | 8.6 |
1030 rows × 3 columns
[7]:
#
# Especificación del tipo de dato
# =============================================================================
#
dataset = pd.read_csv(
remote_file,
dtype={'cement': str},
)
dataset.dtypes
[7]:
cement object
slag float64
ash float64
water float64
superplastic float64
coarseagg float64
fineagg float64
age int64
strength float64
dtype: object
[8]:
#
# Dataset sin header
# =============================================================================
#
[9]:
%%writefile /tmp/data.csv
Isabelle I. Dotson,"P.O. Box 477, 7357 Cras St.",LamontzŽe,12144-86218,02.24.21,21
Erin F. Munoz,"P.O. Box 737, 6015 Ligula St.",Springfield,6373,10.15.20,21
Troy G. Harrell,3706 Ut St.,Herne,69259,06.28.21,23
Yen W. Summers,"P.O. Box 939, 8016 Egestas Av.",Elversele,93452,05.24.21,21
Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24
Yuri O. Head,"P.O. Box 214, 231 Dapibus Ave",Lac-Serent,4464,12.22.21,17
Ima A. Richard,"Ap #496-7181 Ullamcorper, St.",Lehrte,21449,09.12.21,22
Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21
Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15
Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19
Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23
Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14
Christian T. Mercer,"P.O. Box 858, 1997 Elit, Avenue",Guaymas,21574,10.20.20,19
Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25
Wilma I. Patton,"P.O. Box 299, 7144 Orci Street",Palermo,233311,12.27.20,17
Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21
Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19
May K. Wood,"777-1067 Auctor, Ave",Maastricht,754025,01.06.21,13
Beck Q. Monroe,"P.O. Box 123, 9085 Lorem, Rd.",Reading,27665,08.03.22,24
Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11
Serina X. Lang,"P.O. Box 780, 2133 Rutrum Road",Bromyard,Z7366,07.02.21,14
Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17
Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24
Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19
Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23
Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20
Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10
Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20
Serina B. Nash,"P.O. Box 143, 5197 At Avenue",San Costantino Calabro,77234,10.31.21,18
Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19
Overwriting /tmp/data.csv
[10]:
column_names = "Name,Address,City,ZipCode,Date,Score".split(',')
dataset = pd.read_csv(
"/tmp/data.csv",
names=column_names,
)
dataset.head()
[10]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | P.O. Box 477, 7357 Cras St. | LamontzŽe | 12144-86218 | 02.24.21 | 21 |
1 | Erin F. Munoz | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 10.15.20 | 21 |
2 | Troy G. Harrell | 3706 Ut St. | Herne | 69259 | 06.28.21 | 23 |
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | 05.24.21 | 21 |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | 20619 | 09.23.20 | 24 |
[11]:
#
# Uso de converters
# =============================================================================
#
[12]:
%%writefile /tmp/data.csv
Name,Address,City,ZipCode,Date,Score
Isabelle I. Dotson,"P.O. Box 477, 7357 Cras St.",LamontzŽe,12144-86218,02.24.21,21
Erin F. Munoz,"P.O. Box 737, 6015 Ligula St.",Springfield,6373,10.15.20,21
Troy G. Harrell,3706 Ut St.,Herne,69259,06.28.21,23
Yen W. Summers,"P.O. Box 939, 8016 Egestas Av.",Elversele,93452,05.24.21,21
Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24
Yuri O. Head,"P.O. Box 214, 231 Dapibus Ave",Lac-Serent,4464,12.22.21,17
Ima A. Richard,"Ap #496-7181 Ullamcorper, St.",Lehrte,21449,09.12.21,22
Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21
Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15
Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19
Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23
Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14
Christian T. Mercer,"P.O. Box 858, 1997 Elit, Avenue",Guaymas,21574,10.20.20,19
Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25
Wilma I. Patton,"P.O. Box 299, 7144 Orci Street",Palermo,233311,12.27.20,17
Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21
Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19
May K. Wood,"777-1067 Auctor, Ave",Maastricht,754025,01.06.21,13
Beck Q. Monroe,"P.O. Box 123, 9085 Lorem, Rd.",Reading,27665,08.03.22,24
Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11
Serina X. Lang,"P.O. Box 780, 2133 Rutrum Road",Bromyard,Z7366,07.02.21,14
Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17
Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24
Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19
Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23
Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20
Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10
Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20
Serina B. Nash,"P.O. Box 143, 5197 At Avenue",San Costantino Calabro,77234,10.31.21,18
Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19
Overwriting /tmp/data.csv
[13]:
dataset = pd.read_csv(
"/tmp/data.csv",
converters = {
'Name': lambda x: x.upper()
}
)
dataset.head()
[13]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | ISABELLE I. DOTSON | P.O. Box 477, 7357 Cras St. | LamontzŽe | 12144-86218 | 02.24.21 | 21 |
1 | ERIN F. MUNOZ | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 10.15.20 | 21 |
2 | TROY G. HARRELL | 3706 Ut St. | Herne | 69259 | 06.28.21 | 23 |
3 | YEN W. SUMMERS | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | 05.24.21 | 21 |
4 | JEROME B. CARNEY | 298-7004 Natoque St. | Osimo | 20619 | 09.23.20 | 24 |
[14]:
#
# Campos con fecha
# =============================================================================
#
dataset = pd.read_csv(
"/tmp/data.csv",
parse_dates=['Date'],
)
dataset.head()
[14]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | P.O. Box 477, 7357 Cras St. | LamontzŽe | 12144-86218 | 2021-02-24 | 21 |
1 | Erin F. Munoz | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 2020-10-15 | 21 |
2 | Troy G. Harrell | 3706 Ut St. | Herne | 69259 | 2021-06-28 | 23 |
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | 2021-05-24 | 21 |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | 20619 | 2020-09-23 | 24 |
[15]:
#
# Valores Faltantes
# =============================================================================
# En el siguiente archivo se eliminaron varios
# datos. En los faltantes aparece "data,,data"
#
[16]:
%%writefile /tmp/data.csv
Name,Address,City,ZipCode,Date,Score
Isabelle I. Dotson,,LamontzŽe,12144-86218,02.24.21,21
Erin F. Munoz,"P.O. Box 737, 6015 Ligula St.",Springfield,6373,10.15.20,21
Troy G. Harrell,3706 Ut St.,Herne,69259,
Yen W. Summers,"P.O. Box 939, 8016 Egestas Av.",Elversele,93452,05.24.21,21
Jerome B. Carney,298-7004 Natoque St.,Osimo,,09.23.20,24
Yuri O. Head,"P.O. Box 214, 231 Dapibus Ave",Lac-Serent,4464,12.22.21,17
Ima A. Richard,"Ap #496-7181 Ullamcorper, St.",Lehrte,21449,09.12.21,22
Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21
Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15
Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19
Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23
Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14
Christian T. Mercer,"P.O. Box 858, 1997 Elit, Avenue",Guaymas,21574,10.20.20,19
Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25
Wilma I. Patton,"P.O. Box 299, 7144 Orci Street",Palermo,233311,12.27.20,17
Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21
Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19
May K. Wood,"777-1067 Auctor, Ave",Maastricht,754025,01.06.21,13
Beck Q. Monroe,"P.O. Box 123, 9085 Lorem, Rd.",Reading,27665,08.03.22,24
Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11
Serina X. Lang,"P.O. Box 780, 2133 Rutrum Road",Bromyard,Z7366,07.02.21,14
Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17
Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24
Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,
Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23
Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20
Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10
Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20
Serina B. Nash,"P.O. Box 143, 5197 At Avenue",San Costantino Calabro,77234,10.31.21,18
Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19
Overwriting /tmp/data.csv
[17]:
dataset = pd.read_csv(
"/tmp/data.csv",
)
dataset.head()
[17]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | NaN | LamontzŽe | 12144-86218 | 02.24.21 | 21.0 |
1 | Erin F. Munoz | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 10.15.20 | 21.0 |
2 | Troy G. Harrell | 3706 Ut St. | Herne | 69259 | NaN | NaN |
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | 05.24.21 | 21.0 |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | NaN | 09.23.20 | 24.0 |
[18]:
#
# na_filter permite detectar valores faltantes
#
dataset = pd.read_csv(
"/tmp/data.csv",
na_filter=False,
)
dataset.head()
[18]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | LamontzŽe | 12144-86218 | 02.24.21 | 21 | |
1 | Erin F. Munoz | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 10.15.20 | 21 |
2 | Troy G. Harrell | 3706 Ut St. | Herne | 69259 | ||
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | 05.24.21 | 21 |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | 09.23.20 | 24 |
[19]:
#
# Caracteres marcando nulos
# =============================================================================
# En el siguiente archivo los nulos están
# indicados con '?'
#
[20]:
%%writefile /tmp/data.csv
Name,Address,City,ZipCode,Date,Score
Isabelle I. Dotson,?,LamontzŽe,12144-86218,02.24.21,21
Erin F. Munoz,"P.O. Box 737, 6015 Ligula St.",Springfield,6373,10.15.20,21
Troy G. Harrell,3706 Ut St.,Herne,?,06.28.21,23
Yen W. Summers,"P.O. Box 939, 8016 Egestas Av.",Elversele,93452,?
Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24
Yuri O. Head,"P.O. Box 214, 231 Dapibus Ave",Lac-Serent,4464,12.22.21,17
Ima A. Richard,"Ap #496-7181 Ullamcorper, St.",Lehrte,21449,09.12.21,22
Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21
Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15
Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19
Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23
Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14
Christian T. Mercer,"P.O. Box 858, 1997 Elit, Avenue",Guaymas,21574,10.20.20,19
Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25
Wilma I. Patton,"P.O. Box 299, 7144 Orci Street",Palermo,233311,12.27.20,17
Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21
Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19
May K. Wood,"777-1067 Auctor, Ave",Maastricht,754025,01.06.21,13
Beck Q. Monroe,"P.O. Box 123, 9085 Lorem, Rd.",Reading,27665,08.03.22,24
Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11
Serina X. Lang,"P.O. Box 780, 2133 Rutrum Road",Bromyard,Z7366,07.02.21,14
Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17
Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24
Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19
Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23
Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20
Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10
Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20
Serina B. Nash,"P.O. Box 143, 5197 At Avenue",San Costantino Calabro,77234,10.31.21,18
Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19
Overwriting /tmp/data.csv
[21]:
#
# na_filter permite detectar valores faltantes
#
dataset = pd.read_csv(
"/tmp/data.csv",
na_values='?',
)
dataset.head()
[21]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | NaN | LamontzŽe | 12144-86218 | 02.24.21 | 21.0 |
1 | Erin F. Munoz | P.O. Box 737, 6015 Ligula St. | Springfield | 6373 | 10.15.20 | 21.0 |
2 | Troy G. Harrell | 3706 Ut St. | Herne | NaN | 06.28.21 | 23.0 |
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | NaN | NaN |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | 20619 | 09.23.20 | 24.0 |
[22]:
#
# Lines con errores
#
[23]:
%%writefile /tmp/data.csv
Name,Address,City,ZipCode,Date,Score
Isabelle I. Dotson,?,LamontzŽe,12144-86218,02.24.21,
Erin F. Munoz,
Troy G. Harrell,3706 Ut St.,Herne,?,06.28.21,23
Yen W. Summers,"P.O. Box 939, 8016 Egestas Av.",Elversele,93452,?
Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24
Yuri O. Head,"P.O. Box 214, 231 Dapibus Ave",Lac-Serent,4464,12.22.21,17
Ima A. Richard,"Ap #496-7181 Ullamcorper, St.",Lehrte,21449,09.12.21,22
Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21
Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15
Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19
Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23
Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14
Christian T. Mercer,"P.O. Box 858, 1997 Elit, Avenue",Guaymas,21574,10.20.20,19
Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25
Wilma I. Patton,"P.O. Box 299, 7144 Orci Street",Palermo,233311,12.27.20,17
Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21
Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19
May K. Wood,"777-1067 Auctor, Ave",Maastricht,754025,01.06.21,13
Beck Q. Monroe,"P.O. Box 123, 9085 Lorem, Rd.",Reading,27665,08.03.22,24
Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11
Serina X. Lang,"P.O. Box 780, 2133 Rutrum Road",Bromyard,Z7366,07.02.21,14
Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17
Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24
Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19
Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23
Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20
Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10
Owen Z. Odom,1659 Risus. Street,
Serina B. Nash,"P.O. Box 143, 5197 At Avenue",San Costantino Calabro,77234,10.31.21,18
Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19
Overwriting /tmp/data.csv
[24]:
#
# na_filter permite detectar valores faltantes
#
dataset = pd.read_csv(
"/tmp/data.csv",
error_bad_lines=False,
warn_bad_lines=True,
)
dataset.head()
[24]:
Name | Address | City | ZipCode | Date | Score | |
---|---|---|---|---|---|---|
0 | Isabelle I. Dotson | ? | LamontzŽe | 12144-86218 | 02.24.21 | NaN |
1 | Erin F. Munoz | NaN | NaN | NaN | NaN | NaN |
2 | Troy G. Harrell | 3706 Ut St. | Herne | ? | 06.28.21 | 23.0 |
3 | Yen W. Summers | P.O. Box 939, 8016 Egestas Av. | Elversele | 93452 | ? | NaN |
4 | Jerome B. Carney | 298-7004 Natoque St. | Osimo | 20619 | 09.23.20 | 24.0 |
[25]:
#
# Otros separadores
# ===============================================
# la opción sep=? permite especificar el tipo de
# separador
#