{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Importación de archivos numéricos usando NumPy --- 6:53 min\n", "===\n", "\n", "* 6:53 min | Última modificación: Octubre 13, 2021 | [YouTube](https://youtu.be/3-jElszJ3nU)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/numpy_data.txt\n" ] } ], "source": [ "%%writefile /tmp/numpy_data.txt\n", "7.318719402026263587e-02 1.775082190597712106e-01 9.205824952147805273e-01\n", "4.062207718332084827e-01 6.691909950327930012e-01 4.691625868153078693e-01\n", "6.963794427590925817e-01 1.380183788805662282e-01 8.086264398068685466e-01\n", "5.893088349077085786e-01 3.899639085394711602e-01 8.655591960123206752e-01" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.07318719, 0.17750822, 0.9205825 ],\n", " [0.40622077, 0.669191 , 0.46916259],\n", " [0.69637944, 0.13801838, 0.80862644],\n", " [0.58930883, 0.38996391, 0.8655592 ]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Lectura del archivo de texto\n", "# ===============================================\n", "#\n", "numpy_data = np.loadtxt('/tmp/numpy_data.txt')\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Cantidad de datos leidos\n", "#\n", "numpy_data.size" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/comma_delimited_data.txt\n" ] } ], "source": [ "%%writefile /tmp/comma_delimited_data.txt\n", "1,2,3\n", "4,5,6\n", "7,8,9\n", "10,11,12" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., 2., 3.],\n", " [ 4., 5., 6.],\n", " [ 7., 8., 9.],\n", " [10., 11., 12.]])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Lectura de un archivo delimitado por comas\n", "# ===============================================\n", "#\n", "numpy_data = np.loadtxt(\n", " \"/tmp/comma_delimited_data.txt\",\n", " delimiter=\",\",\n", ")\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., 3.],\n", " [ 4., 6.],\n", " [ 7., 9.],\n", " [10., 12.]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Selección de columnas a leer\n", "# ===============================================\n", "#\n", "numpy_data = np.loadtxt(\n", " \"/tmp/comma_delimited_data.txt\",\n", " delimiter=\",\",\n", " usecols=(0, 2),\n", ")\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/comma_delimited_data.txt\n" ] } ], "source": [ "%%writefile /tmp/comma_delimited_data.txt\n", "col_A,col_B,col_C\n", "1,2,3\n", "4,5,6\n", "7,8,9\n", "10,11,12" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., 2., 3.],\n", " [ 4., 5., 6.],\n", " [ 7., 8., 9.],\n", " [10., 11., 12.]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Lectura de un archivo con cabecera\n", "# ===============================================\n", "#\n", "numpy_data = np.loadtxt(\n", " \"/tmp/comma_delimited_data.txt\",\n", " delimiter=\",\",\n", " skiprows=1,\n", ")\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1, 2, 3],\n", " [ 4, 5, 6],\n", " [ 7, 8, 9],\n", " [10, 11, 12]], dtype=uint64)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Especificación del tipo de dato\n", "# ===============================================\n", "#\n", "numpy_data = np.loadtxt(\n", " \"/tmp/comma_delimited_data.txt\",\n", " delimiter=\",\",\n", " skiprows=1,\n", " dtype=np.uint,\n", ")\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[101, 2, 3],\n", " [104, 5, 6],\n", " [107, 8, 9],\n", " [110, 11, 12]], dtype=uint64)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Aplicación de una función a una columna\n", "# ===============================================\n", "#\n", "\n", "\n", "def increase(id):\n", " return int(id) + 100\n", "\n", "\n", "numpy_data = np.loadtxt(\n", " \"/tmp/comma_delimited_data.txt\",\n", " delimiter=\",\",\n", " skiprows=1,\n", " dtype=np.uint,\n", " converters={0: increase},\n", ")\n", "\n", "numpy_data" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /tmp/missing_data.txt\n" ] } ], "source": [ "%%writefile /tmp/missing_data.txt\n", "1,,3\n", "4,5,6\n", "7,8,\n", "10,11,12" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1., nan, 3.],\n", " [ 4., 5., 6.],\n", " [ 7., 8., nan],\n", " [10., 11., 12.]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#\n", "# Lectura de un archivo con datos faltantes\n", "# ===============================================\n", "#\n", "numpy_data = np.genfromtxt(\n", " '/tmp/missing_data.txt',\n", " delimiter=',',\n", ")\n", "numpy_data" ] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "nteract": { "version": "0.12.3" }, "toc-autonumbering": false, "toc-showcode": false, "toc-showmarkdowntxt": false, "toc-showtags": false }, "nbformat": 4, "nbformat_minor": 4 }