{ "cells": [ { "cell_type": "markdown", "id": "801723ff-42b4-4375-8600-1cd4357dc4f7", "metadata": {}, "source": [ "Importación de archivos CSV --- 8:17 min\n", "===\n", "\n", "* 8:17 min | Última modificación: Octubre 13, 2021 | [YouTube](https://youtu.be/_taGI112oFo)" ] }, { "cell_type": "code", "execution_count": 1, "id": "e7f88287-7ad8-4dce-ab3c-402e0cad8994", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing /tmp/data.csv\n" ] } ], "source": [ "%%writefile /tmp/data.csv\n", "Name,Address,City,ZipCode,Date,Score\n", "Isabelle I. Dotson,\"P.O. Box 477, 7357 Cras St.\",LamontzŽe,12144-86218,02.24.21,21\n", "Erin F. Munoz,\"P.O. Box 737, 6015 Ligula St.\",Springfield,6373,10.15.20,21\n", "Troy G. Harrell,3706 Ut St.,Herne,69259,06.28.21,23\n", "Yen W. Summers,\"P.O. Box 939, 8016 Egestas Av.\",Elversele,93452,05.24.21,21\n", "Jerome B. Carney,298-7004 Natoque St.,Osimo,20619,09.23.20,24\n", "Yuri O. Head,\"P.O. Box 214, 231 Dapibus Ave\",Lac-Serent,4464,12.22.21,17\n", "Ima A. Richard,\"Ap #496-7181 Ullamcorper, St.\",Lehrte,21449,09.12.21,22\n", "Eleanor Q. Guerrero,5425 Ornare St.,Arendonk,00636,02.15.22,21\n", "Stuart F. Daniels,719-8478 Nunc St.,Mataram,43168,06.20.21,15\n", "Cara G. Beach,648-517 Velit. Av.,Casciana Terme,78450,02.12.22,19\n", "Nolan E. Cortez,Ap #766-6998 Gravida Rd.,Manfredonia,09534,12.14.20,23\n", "Darius H. Beach,Ap #251-7151 Donec Av.,Valuyki,903927,10.31.21,14\n", "Christian T. Mercer,\"P.O. Box 858, 1997 Elit, Avenue\",Guaymas,21574,10.20.20,19\n", "Florence F. Gordon,543-831 Est. Rd.,Valcourt,6387,12.01.21,25\n", "Wilma I. Patton,\"P.O. Box 299, 7144 Orci Street\",Palermo,233311,12.27.20,17\n", "Rajah I. Jensen,Ap #782-6146 Turpis St.,Arquata del Tronto,15936,02.06.21,21\n", "Zoe U. Whitfield,Ap #200-7243 Sit St.,Chesapeake,W2A 0QO,12.01.21,19\n", "May K. Wood,\"777-1067 Auctor, Ave\",Maastricht,754025,01.06.21,13\n", "Beck Q. Monroe,\"P.O. Box 123, 9085 Lorem, Rd.\",Reading,27665,08.03.22,24\n", "Amanda A. Marks,638-3732 Fermentum Road,Sindelfingen,678968,11.20.20,11\n", "Serina X. Lang,\"P.O. Box 780, 2133 Rutrum Road\",Bromyard,Z7366,07.02.21,14\n", "Lionel L. Bartlett,Ap #400-1963 Pellentesque. Ave,Aparecida de Goiânia,744476,12.21.21,17\n", "Florence Z. Oconnor,839 Cras Road,Etawah,15197,11.29.20,24\n", "Chloe Y. Sears,361-5600 Per St.,Mission,0787 KM,08.08.22,19\n", "Mariam N. Tyler,589-6397 Scelerisque Road,Pozzuolo del Friuli,T5G 6M8,05.12.21,23\n", "Dana A. Jefferson,Ap #142-8392 Consequat Avenue,Khammam,49399,02.06.21,20\n", "Nola N. Chan,Ap #878-314 Faucibus Rd.,Requínoa,1640,06.10.22,10\n", "Owen Z. Odom,1659 Risus. Street,Valuyki,12758,02.03.21,20\n", "Serina B. Nash,\"P.O. Box 143, 5197 At Avenue\",San Costantino Calabro,77234,10.31.21,18\n", "Salvador I. Powell,356-7423 Semper Road,Cardiff,Z4197,05.28.22,19" ] }, { "cell_type": "markdown", "id": "a3221e27-d299-4ee5-b1ce-3670f08f041f", "metadata": {}, "source": [ "## Python csv API" ] }, { "cell_type": "code", "execution_count": 2, "id": "86735b6e-f4a7-4a13-b1dc-b9aa0880459b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Dialect',\n", " 'DictReader',\n", " 'DictWriter',\n", " 'Error',\n", " 'OrderedDict',\n", " 'QUOTE_ALL',\n", " 'QUOTE_MINIMAL',\n", " 'QUOTE_NONE',\n", " 'QUOTE_NONNUMERIC',\n", " 'Sniffer',\n", " 'StringIO',\n", " '_Dialect',\n", " '__all__',\n", " '__builtins__',\n", " '__cached__',\n", " '__doc__',\n", " '__file__',\n", " '__loader__',\n", " '__name__',\n", " '__package__',\n", " '__spec__',\n", " '__version__',\n", " 'excel',\n", " 'excel_tab',\n", " 'field_size_limit',\n", " 'get_dialect',\n", " 'list_dialects',\n", " 're',\n", " 'reader',\n", " 'register_dialect',\n", " 'unix_dialect',\n", " 'unregister_dialect',\n", " 'writer']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import csv\n", "\n", "dir(csv)" ] }, { "cell_type": "code", "execution_count": 3, "id": "2436ead8-b97d-4571-8418-4c0864646d4c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Name', 'Address', 'City', 'ZipCode', 'Date', 'Score']\n", "['Isabelle I. Dotson', 'P.O. Box 477, 7357 Cras St.', 'LamontzŽe', '12144-86218', '02.24.21', '21']\n", "['Erin F. Munoz', 'P.O. Box 737, 6015 Ligula St.', 'Springfield', '6373', '10.15.20', '21']\n", "['Troy G. Harrell', '3706 Ut St.', 'Herne', '69259', '06.28.21', '23']\n", "['Yen W. Summers', 'P.O. Box 939, 8016 Egestas Av.', 'Elversele', '93452', '05.24.21', '21']\n", "['Jerome B. Carney', '298-7004 Natoque St.', 'Osimo', '20619', '09.23.20', '24']\n", "['Yuri O. Head', 'P.O. Box 214, 231 Dapibus Ave', 'Lac-Serent', '4464', '12.22.21', '17']\n", "['Ima A. Richard', 'Ap #496-7181 Ullamcorper, St.', 'Lehrte', '21449', '09.12.21', '22']\n", "['Eleanor Q. Guerrero', '5425 Ornare St.', 'Arendonk', '00636', '02.15.22', '21']\n", "['Stuart F. Daniels', '719-8478 Nunc St.', 'Mataram', '43168', '06.20.21', '15']\n", "['Cara G. Beach', '648-517 Velit. Av.', 'Casciana Terme', '78450', '02.12.22', '19']\n", "['Nolan E. Cortez', 'Ap #766-6998 Gravida Rd.', 'Manfredonia', '09534', '12.14.20', '23']\n", "['Darius H. Beach', 'Ap #251-7151 Donec Av.', 'Valuyki', '903927', '10.31.21', '14']\n", "['Christian T. Mercer', 'P.O. Box 858, 1997 Elit, Avenue', 'Guaymas', '21574', '10.20.20', '19']\n", "['Florence F. Gordon', '543-831 Est. Rd.', 'Valcourt', '6387', '12.01.21', '25']\n", "['Wilma I. Patton', 'P.O. Box 299, 7144 Orci Street', 'Palermo', '233311', '12.27.20', '17']\n", "['Rajah I. Jensen', 'Ap #782-6146 Turpis St.', 'Arquata del Tronto', '15936', '02.06.21', '21']\n", "['Zoe U. Whitfield', 'Ap #200-7243 Sit St.', 'Chesapeake', 'W2A 0QO', '12.01.21', '19']\n", "['May K. Wood', '777-1067 Auctor, Ave', 'Maastricht', '754025', '01.06.21', '13']\n", "['Beck Q. Monroe', 'P.O. Box 123, 9085 Lorem, Rd.', 'Reading', '27665', '08.03.22', '24']\n", "['Amanda A. Marks', '638-3732 Fermentum Road', 'Sindelfingen', '678968', '11.20.20', '11']\n", "['Serina X. Lang', 'P.O. Box 780, 2133 Rutrum Road', 'Bromyard', 'Z7366', '07.02.21', '14']\n", "['Lionel L. Bartlett', 'Ap #400-1963 Pellentesque. Ave', 'Aparecida de Goiânia', '744476', '12.21.21', '17']\n", "['Florence Z. Oconnor', '839 Cras Road', 'Etawah', '15197', '11.29.20', '24']\n", "['Chloe Y. Sears', '361-5600 Per St.', 'Mission', '0787 KM', '08.08.22', '19']\n", "['Mariam N. Tyler', '589-6397 Scelerisque Road', 'Pozzuolo del Friuli', 'T5G 6M8', '05.12.21', '23']\n", "['Dana A. Jefferson', 'Ap #142-8392 Consequat Avenue', 'Khammam', '49399', '02.06.21', '20']\n", "['Nola N. Chan', 'Ap #878-314 Faucibus Rd.', 'Requínoa', '1640', '06.10.22', '10']\n", "['Owen Z. Odom', '1659 Risus. Street', 'Valuyki', '12758', '02.03.21', '20']\n", "['Serina B. Nash', 'P.O. Box 143, 5197 At Avenue', 'San Costantino Calabro', '77234', '10.31.21', '18']\n", "['Salvador I. Powell', '356-7423 Semper Road', 'Cardiff', 'Z4197', '05.28.22', '19']\n" ] } ], "source": [ "#\n", "# Archivo delimitado por comas con reader()\n", "# =============================================================================\n", "#\n", "with open(\"/tmp/data.csv\", \"r\") as csv_file:\n", "\n", " #\n", " # Retorna un objeto reader que puede ser\n", " # usado para iterar sobre las lineas del\n", " # archivo\n", " #\n", " csv_reader = csv.reader(\n", " csv_file,\n", " delimiter=\",\",\n", " quotechar='\"',\n", " )\n", "\n", " #\n", " # El for debe estar dentro del ambito\n", " # del bloque with\n", " #\n", " for row in csv_reader:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": 4, "id": "6a1b2fa8-fdec-412b-8bbf-d3f2bbc2e238", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Name ----> Score\n", "Isabelle I. Dotson ----> 21\n", "Erin F. Munoz ----> 21\n", "Troy G. Harrell ----> 23\n", "Yen W. Summers ----> 21\n", "Jerome B. Carney ----> 24\n", "Yuri O. Head ----> 17\n", "Ima A. Richard ----> 22\n", "Eleanor Q. Guerrero ----> 21\n", "Stuart F. Daniels ----> 15\n", "Cara G. Beach ----> 19\n", "Nolan E. Cortez ----> 23\n", "Darius H. Beach ----> 14\n", "Christian T. Mercer ----> 19\n", "Florence F. Gordon ----> 25\n", "Wilma I. Patton ----> 17\n", "Rajah I. Jensen ----> 21\n", "Zoe U. Whitfield ----> 19\n", "May K. Wood ----> 13\n", "Beck Q. Monroe ----> 24\n", "Amanda A. Marks ----> 11\n", "Serina X. Lang ----> 14\n", "Lionel L. Bartlett ----> 17\n", "Florence Z. Oconnor ----> 24\n", "Chloe Y. Sears ----> 19\n", "Mariam N. Tyler ----> 23\n", "Dana A. Jefferson ----> 20\n", "Nola N. Chan ----> 10\n", "Owen Z. Odom ----> 20\n", "Serina B. Nash ----> 18\n", "Salvador I. Powell ----> 19\n" ] } ], "source": [ "#\n", "# Archivo delimitado por comas con DictReader()\n", "# =============================================================================\n", "#\n", "fieldnames = [\n", " \"Name\",\n", " \"Address\",\n", " \"City\",\n", " \"ZipCode\",\n", " \"Date\",\n", " \"Score\",\n", "]\n", "\n", "with open(\"/tmp/data.csv\", \"r\") as csv_file:\n", "\n", " csv_dict_reader = csv.DictReader(\n", " csv_file,\n", " delimiter=\",\",\n", " quotechar='\"',\n", " fieldnames=fieldnames,\n", " )\n", "\n", " #\n", " # Note que en este caso el archivo no deberia\n", " # tener encabezamiento o se puede descartar\n", " # la primera iteración en el codigo\n", " #\n", " for row in csv_dict_reader:\n", " print(row[\"Name\"] + \" ----> \" + row[\"Score\"])" ] }, { "cell_type": "code", "execution_count": 5, "id": "ed328550-4ea6-41da-bdd1-64fead17cd41", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Name ----> Score\n", "Isabelle I. Dotson ----> 21\n", "Erin F. Munoz ----> 21\n", "Troy G. Harrell ----> 23\n", "Yen W. Summers ----> 21\n", "Jerome B. Carney ----> 24\n", "Yuri O. Head ----> 17\n", "Ima A. Richard ----> 22\n", "Eleanor Q. Guerrero ----> 21\n", "Stuart F. Daniels ----> 15\n", "Cara G. Beach ----> 19\n", "Nolan E. Cortez ----> 23\n", "Darius H. Beach ----> 14\n", "Christian T. Mercer ----> 19\n", "Florence F. Gordon ----> 25\n", "Wilma I. Patton ----> 17\n", "Rajah I. Jensen ----> 21\n", "Zoe U. Whitfield ----> 19\n", "May K. Wood ----> 13\n", "Beck Q. Monroe ----> 24\n", "Amanda A. Marks ----> 11\n", "Serina X. Lang ----> 14\n", "Lionel L. Bartlett ----> 17\n", "Florence Z. Oconnor ----> 24\n", "Chloe Y. Sears ----> 19\n", "Mariam N. Tyler ----> 23\n", "Dana A. Jefferson ----> 20\n", "Nola N. Chan ----> 10\n", "Owen Z. Odom ----> 20\n", "Serina B. Nash ----> 18\n", "Salvador I. Powell ----> 19\n" ] } ], "source": [ "#\n", "# NamedTuple\n", "# =============================================================================\n", "#\n", "from collections import namedtuple\n", "\n", "fieldnames = namedtuple(\n", " \"Record\",\n", " \"Name, Address, City, ZipCode, Date, Score\",\n", ")\n", "\n", "with open(\"/tmp/data.csv\", \"r\") as csv_file:\n", "\n", " csv_reader = csv.reader(\n", " csv_file,\n", " delimiter=\",\",\n", " quotechar='\"',\n", " )\n", "\n", " for row in map(fieldnames._make, csv_reader):\n", " print(row.Name + \" ----> \" + row.Score)" ] }, { "cell_type": "code", "execution_count": 6, "id": "ac6dcb4c-b473-4ba0-a511-65a8e533d05d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('Name', 'Name'), ('Address', 'Address'), ('City', 'City'), ('ZipCode', 'ZipCode'), ('Date', 'Date'), ('Score', 'Score')])\n", "OrderedDict([('Name', 'Isabelle I. Dotson'), ('Address', 'P.O. Box 477, 7357 Cras St.'), ('City', 'LamontzŽe'), ('ZipCode', '12144-86218'), ('Date', '02.24.21'), ('Score', '21')])\n", "OrderedDict([('Name', 'Erin F. Munoz'), ('Address', 'P.O. Box 737, 6015 Ligula St.'), ('City', 'Springfield'), ('ZipCode', '6373'), ('Date', '10.15.20'), ('Score', '21')])\n", "OrderedDict([('Name', 'Troy G. Harrell'), ('Address', '3706 Ut St.'), ('City', 'Herne'), ('ZipCode', '69259'), ('Date', '06.28.21'), ('Score', '23')])\n", "OrderedDict([('Name', 'Yen W. Summers'), ('Address', 'P.O. Box 939, 8016 Egestas Av.'), ('City', 'Elversele'), ('ZipCode', '93452'), ('Date', '05.24.21'), ('Score', '21')])\n", "OrderedDict([('Name', 'Jerome B. Carney'), ('Address', '298-7004 Natoque St.'), ('City', 'Osimo'), ('ZipCode', '20619'), ('Date', '09.23.20'), ('Score', '24')])\n", "OrderedDict([('Name', 'Yuri O. Head'), ('Address', 'P.O. Box 214, 231 Dapibus Ave'), ('City', 'Lac-Serent'), ('ZipCode', '4464'), ('Date', '12.22.21'), ('Score', '17')])\n", "OrderedDict([('Name', 'Ima A. Richard'), ('Address', 'Ap #496-7181 Ullamcorper, St.'), ('City', 'Lehrte'), ('ZipCode', '21449'), ('Date', '09.12.21'), ('Score', '22')])\n", "OrderedDict([('Name', 'Eleanor Q. Guerrero'), ('Address', '5425 Ornare St.'), ('City', 'Arendonk'), ('ZipCode', '00636'), ('Date', '02.15.22'), ('Score', '21')])\n", "OrderedDict([('Name', 'Stuart F. Daniels'), ('Address', '719-8478 Nunc St.'), ('City', 'Mataram'), ('ZipCode', '43168'), ('Date', '06.20.21'), ('Score', '15')])\n", "OrderedDict([('Name', 'Cara G. Beach'), ('Address', '648-517 Velit. Av.'), ('City', 'Casciana Terme'), ('ZipCode', '78450'), ('Date', '02.12.22'), ('Score', '19')])\n", "OrderedDict([('Name', 'Nolan E. Cortez'), ('Address', 'Ap #766-6998 Gravida Rd.'), ('City', 'Manfredonia'), ('ZipCode', '09534'), ('Date', '12.14.20'), ('Score', '23')])\n", "OrderedDict([('Name', 'Darius H. Beach'), ('Address', 'Ap #251-7151 Donec Av.'), ('City', 'Valuyki'), ('ZipCode', '903927'), ('Date', '10.31.21'), ('Score', '14')])\n", "OrderedDict([('Name', 'Christian T. Mercer'), ('Address', 'P.O. Box 858, 1997 Elit, Avenue'), ('City', 'Guaymas'), ('ZipCode', '21574'), ('Date', '10.20.20'), ('Score', '19')])\n", "OrderedDict([('Name', 'Florence F. Gordon'), ('Address', '543-831 Est. Rd.'), ('City', 'Valcourt'), ('ZipCode', '6387'), ('Date', '12.01.21'), ('Score', '25')])\n", "OrderedDict([('Name', 'Wilma I. Patton'), ('Address', 'P.O. Box 299, 7144 Orci Street'), ('City', 'Palermo'), ('ZipCode', '233311'), ('Date', '12.27.20'), ('Score', '17')])\n", "OrderedDict([('Name', 'Rajah I. Jensen'), ('Address', 'Ap #782-6146 Turpis St.'), ('City', 'Arquata del Tronto'), ('ZipCode', '15936'), ('Date', '02.06.21'), ('Score', '21')])\n", "OrderedDict([('Name', 'Zoe U. Whitfield'), ('Address', 'Ap #200-7243 Sit St.'), ('City', 'Chesapeake'), ('ZipCode', 'W2A 0QO'), ('Date', '12.01.21'), ('Score', '19')])\n", "OrderedDict([('Name', 'May K. Wood'), ('Address', '777-1067 Auctor, Ave'), ('City', 'Maastricht'), ('ZipCode', '754025'), ('Date', '01.06.21'), ('Score', '13')])\n", "OrderedDict([('Name', 'Beck Q. Monroe'), ('Address', 'P.O. Box 123, 9085 Lorem, Rd.'), ('City', 'Reading'), ('ZipCode', '27665'), ('Date', '08.03.22'), ('Score', '24')])\n", "OrderedDict([('Name', 'Amanda A. Marks'), ('Address', '638-3732 Fermentum Road'), ('City', 'Sindelfingen'), ('ZipCode', '678968'), ('Date', '11.20.20'), ('Score', '11')])\n", "OrderedDict([('Name', 'Serina X. Lang'), ('Address', 'P.O. Box 780, 2133 Rutrum Road'), ('City', 'Bromyard'), ('ZipCode', 'Z7366'), ('Date', '07.02.21'), ('Score', '14')])\n", "OrderedDict([('Name', 'Lionel L. Bartlett'), ('Address', 'Ap #400-1963 Pellentesque. Ave'), ('City', 'Aparecida de Goiânia'), ('ZipCode', '744476'), ('Date', '12.21.21'), ('Score', '17')])\n", "OrderedDict([('Name', 'Florence Z. Oconnor'), ('Address', '839 Cras Road'), ('City', 'Etawah'), ('ZipCode', '15197'), ('Date', '11.29.20'), ('Score', '24')])\n", "OrderedDict([('Name', 'Chloe Y. Sears'), ('Address', '361-5600 Per St.'), ('City', 'Mission'), ('ZipCode', '0787 KM'), ('Date', '08.08.22'), ('Score', '19')])\n", "OrderedDict([('Name', 'Mariam N. Tyler'), ('Address', '589-6397 Scelerisque Road'), ('City', 'Pozzuolo del Friuli'), ('ZipCode', 'T5G 6M8'), ('Date', '05.12.21'), ('Score', '23')])\n", "OrderedDict([('Name', 'Dana A. Jefferson'), ('Address', 'Ap #142-8392 Consequat Avenue'), ('City', 'Khammam'), ('ZipCode', '49399'), ('Date', '02.06.21'), ('Score', '20')])\n", "OrderedDict([('Name', 'Nola N. Chan'), ('Address', 'Ap #878-314 Faucibus Rd.'), ('City', 'Requínoa'), ('ZipCode', '1640'), ('Date', '06.10.22'), ('Score', '10')])\n", "OrderedDict([('Name', 'Owen Z. Odom'), ('Address', '1659 Risus. Street'), ('City', 'Valuyki'), ('ZipCode', '12758'), ('Date', '02.03.21'), ('Score', '20')])\n", "OrderedDict([('Name', 'Serina B. Nash'), ('Address', 'P.O. Box 143, 5197 At Avenue'), ('City', 'San Costantino Calabro'), ('ZipCode', '77234'), ('Date', '10.31.21'), ('Score', '18')])\n", "OrderedDict([('Name', 'Salvador I. Powell'), ('Address', '356-7423 Semper Road'), ('City', 'Cardiff'), ('ZipCode', 'Z4197'), ('Date', '05.28.22'), ('Score', '19')])\n" ] } ], "source": [ "#\n", "# Retorno del registro como un diccionario\n", "# =============================================================================\n", "#\n", "from collections import namedtuple\n", "\n", "fieldnames = namedtuple(\n", " \"Record\",\n", " \"Name, Address, City, ZipCode, Date, Score\",\n", ")\n", "\n", "with open(\"/tmp/data.csv\", \"r\") as csv_file:\n", "\n", " csv_reader = csv.reader(\n", " csv_file,\n", " delimiter=\",\",\n", " quotechar='\"',\n", " )\n", "\n", " for row in map(fieldnames._make, csv_reader):\n", " print(row._asdict())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 5 }