Importación de archivos numéricos usando NumPy — 6:53 min

  • 6:53 min | Última modificación: Octubre 13, 2021 | YouTube

[1]:
%%writefile /tmp/numpy_data.txt
7.318719402026263587e-02 1.775082190597712106e-01 9.205824952147805273e-01
4.062207718332084827e-01 6.691909950327930012e-01 4.691625868153078693e-01
6.963794427590925817e-01 1.380183788805662282e-01 8.086264398068685466e-01
5.893088349077085786e-01 3.899639085394711602e-01 8.655591960123206752e-01
Overwriting /tmp/numpy_data.txt
[2]:
import numpy as np
[3]:
#
# Lectura del archivo de texto
# ===============================================
#
numpy_data = np.loadtxt('/tmp/numpy_data.txt')
numpy_data
[3]:
array([[0.07318719, 0.17750822, 0.9205825 ],
       [0.40622077, 0.669191  , 0.46916259],
       [0.69637944, 0.13801838, 0.80862644],
       [0.58930883, 0.38996391, 0.8655592 ]])
[4]:
#
# Cantidad de datos leidos
#
numpy_data.size
[4]:
12
[5]:
%%writefile /tmp/comma_delimited_data.txt
1,2,3
4,5,6
7,8,9
10,11,12
Overwriting /tmp/comma_delimited_data.txt
[6]:
#
# Lectura de un archivo delimitado por comas
# ===============================================
#
numpy_data = np.loadtxt(
    "/tmp/comma_delimited_data.txt",
    delimiter=",",
)
numpy_data
[6]:
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8.,  9.],
       [10., 11., 12.]])
[7]:
#
# Selección de columnas a leer
# ===============================================
#
numpy_data = np.loadtxt(
    "/tmp/comma_delimited_data.txt",
    delimiter=",",
    usecols=(0, 2),
)
numpy_data
[7]:
array([[ 1.,  3.],
       [ 4.,  6.],
       [ 7.,  9.],
       [10., 12.]])
[8]:
%%writefile /tmp/comma_delimited_data.txt
col_A,col_B,col_C
1,2,3
4,5,6
7,8,9
10,11,12
Overwriting /tmp/comma_delimited_data.txt
[9]:
#
# Lectura de un archivo con cabecera
# ===============================================
#
numpy_data = np.loadtxt(
    "/tmp/comma_delimited_data.txt",
    delimiter=",",
    skiprows=1,
)
numpy_data
[9]:
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8.,  9.],
       [10., 11., 12.]])
[10]:
#
# Especificación del tipo de dato
# ===============================================
#
numpy_data = np.loadtxt(
    "/tmp/comma_delimited_data.txt",
    delimiter=",",
    skiprows=1,
    dtype=np.uint,
)
numpy_data
[10]:
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]], dtype=uint64)
[11]:
#
# Aplicación de una función a una columna
# ===============================================
#


def increase(id):
    return int(id) + 100


numpy_data = np.loadtxt(
    "/tmp/comma_delimited_data.txt",
    delimiter=",",
    skiprows=1,
    dtype=np.uint,
    converters={0: increase},
)

numpy_data
[11]:
array([[101,   2,   3],
       [104,   5,   6],
       [107,   8,   9],
       [110,  11,  12]], dtype=uint64)
[12]:
%%writefile /tmp/missing_data.txt
1,,3
4,5,6
7,8,
10,11,12
Overwriting /tmp/missing_data.txt
[13]:
#
# Lectura de un archivo con datos faltantes
# ===============================================
#
numpy_data = np.genfromtxt(
    '/tmp/missing_data.txt',
    delimiter=',',
)
numpy_data
[13]:
array([[ 1., nan,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8., nan],
       [10., 11., 12.]])