Procesamiento de un archivo usando list comprenhensions — 6:34 min
6:34 min | Última modificación: Octubre 5, 2021
[1]:
%%writefile /tmp/data.csv
Date,Year,CustomerID,Value
2013-01-12,2013,1,100
2014-05-12,2014,1,100
2013-02-25,2013,2,200
2013-04-04,2013,1,100
2013-06-21,2013,2,200
2014-05-18,2014,1,100
2014-06-23,2014,2,200
2013-02-28,2013,1,100
2013-08-02,2013,1,100
Overwriting /tmp/data.csv
[2]:
x = open("/tmp/data.csv", "r").readlines()
x
[2]:
['Date,Year,CustomerID,Value\n',
'2013-01-12,2013,1,100\n',
'2014-05-12,2014,1,100\n',
'2013-02-25,2013,2,200\n',
'2013-04-04,2013,1,100\n',
'2013-06-21,2013,2,200\n',
'2014-05-18,2014,1,100\n',
'2014-06-23,2014,2,200\n',
'2013-02-28,2013,1,100\n',
'2013-08-02,2013,1,100\n']
[3]:
x = [z.replace("\n", "") for z in x]
x
[3]:
['Date,Year,CustomerID,Value',
'2013-01-12,2013,1,100',
'2014-05-12,2014,1,100',
'2013-02-25,2013,2,200',
'2013-04-04,2013,1,100',
'2013-06-21,2013,2,200',
'2014-05-18,2014,1,100',
'2014-06-23,2014,2,200',
'2013-02-28,2013,1,100',
'2013-08-02,2013,1,100']
[4]:
x = [z.split(",") for z in x]
x
[4]:
[['Date', 'Year', 'CustomerID', 'Value'],
['2013-01-12', '2013', '1', '100'],
['2014-05-12', '2014', '1', '100'],
['2013-02-25', '2013', '2', '200'],
['2013-04-04', '2013', '1', '100'],
['2013-06-21', '2013', '2', '200'],
['2014-05-18', '2014', '1', '100'],
['2014-06-23', '2014', '2', '200'],
['2013-02-28', '2013', '1', '100'],
['2013-08-02', '2013', '1', '100']]
[5]:
# extrae el campo Date
[z[0] for z in x[1:]]
[5]:
['2013-01-12',
'2014-05-12',
'2013-02-25',
'2013-04-04',
'2013-06-21',
'2014-05-18',
'2014-06-23',
'2013-02-28',
'2013-08-02']
[6]:
# separa Date en sus partes
[z[0].split("-") for z in x[1:]]
[6]:
[['2013', '01', '12'],
['2014', '05', '12'],
['2013', '02', '25'],
['2013', '04', '04'],
['2013', '06', '21'],
['2014', '05', '18'],
['2014', '06', '23'],
['2013', '02', '28'],
['2013', '08', '02']]
[7]:
# el mes ocupa la posicion 1
[z[0].split("-")[1] for z in x[1:]] # el mes
[7]:
['01', '05', '02', '04', '06', '05', '06', '02', '08']
[8]:
x[1:] = [z + [z[0].split("-")[1]] for z in x[1:]]
x
[8]:
[['Date', 'Year', 'CustomerID', 'Value'],
['2013-01-12', '2013', '1', '100', '01'],
['2014-05-12', '2014', '1', '100', '05'],
['2013-02-25', '2013', '2', '200', '02'],
['2013-04-04', '2013', '1', '100', '04'],
['2013-06-21', '2013', '2', '200', '06'],
['2014-05-18', '2014', '1', '100', '05'],
['2014-06-23', '2014', '2', '200', '06'],
['2013-02-28', '2013', '1', '100', '02'],
['2013-08-02', '2013', '1', '100', '08']]
[9]:
x[0].append("Month")
x
[9]:
[['Date', 'Year', 'CustomerID', 'Value', 'Month'],
['2013-01-12', '2013', '1', '100', '01'],
['2014-05-12', '2014', '1', '100', '05'],
['2013-02-25', '2013', '2', '200', '02'],
['2013-04-04', '2013', '1', '100', '04'],
['2013-06-21', '2013', '2', '200', '06'],
['2014-05-18', '2014', '1', '100', '05'],
['2014-06-23', '2014', '2', '200', '06'],
['2013-02-28', '2013', '1', '100', '02'],
['2013-08-02', '2013', '1', '100', '08']]
[10]:
[z for z in x if z[1] == "2013"]
[10]:
[['2013-01-12', '2013', '1', '100', '01'],
['2013-02-25', '2013', '2', '200', '02'],
['2013-04-04', '2013', '1', '100', '04'],
['2013-06-21', '2013', '2', '200', '06'],
['2013-02-28', '2013', '1', '100', '02'],
['2013-08-02', '2013', '1', '100', '08']]