Actualización de campos — 7:30 min
7:30 min | Última modificación: Octubre 6, 2021
[1]:
import pandas as pd
pd.set_option("display.notebook_repr_html", False)
Método update()
[2]:
%%writefile /tmp/data_1.csv
clientId,name,bonus,costs
10,Omar Y. Fletcher,0,9999
11,Buffy W. Vincent,1,
12,Mira N. Franklin,2,
13,Ferris Q. Le,3,4
Overwriting /tmp/data_1.csv
[3]:
%%writefile /tmp/data_2.csv
clientId,location,bonus,costs
12,"P.O. Box 445, 323 Cursus Rd.",100,12
13,Ap #791-3809 Eu Street,,13
14,6715 Diam. Rd.,200,
Overwriting /tmp/data_2.csv
[4]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_1.set_index('clientId', inplace=True)
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.set_index('clientId', inplace=True)
display(
df_1,
"",
df_2,
)
df_1.update(df_2)
#
# Update df_1.
#
# Note que costs para el primer cliente se
# actualiza con el valor del segundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
display(
"",
df_1
)
name bonus costs
clientId
10 Omar Y. Fletcher 0 9999.0
11 Buffy W. Vincent 1 NaN
12 Mira N. Franklin 2 NaN
13 Ferris Q. Le 3 4.0
''
location bonus costs
clientId
12 P.O. Box 445, 323 Cursus Rd. 100.0 12.0
13 Ap #791-3809 Eu Street NaN 13.0
14 6715 Diam. Rd. 200.0 NaN
''
name bonus costs
clientId
10 Omar Y. Fletcher 0.0 9999.0
11 Buffy W. Vincent 1.0 NaN
12 Mira N. Franklin 100.0 12.0
13 Ferris Q. Le 3.0 13.0
[5]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_1.set_index('clientId', inplace=True)
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.set_index('clientId', inplace=True)
display(
df_1,
"",
df_2,
)
df_2.update(df_1)
#
# Update df_1.
#
# Note que costs para el primer cliente se
# actualiza con el valor del segundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
display(
"",
df_2
)
name bonus costs
clientId
10 Omar Y. Fletcher 0 9999.0
11 Buffy W. Vincent 1 NaN
12 Mira N. Franklin 2 NaN
13 Ferris Q. Le 3 4.0
''
location bonus costs
clientId
12 P.O. Box 445, 323 Cursus Rd. 100.0 12.0
13 Ap #791-3809 Eu Street NaN 13.0
14 6715 Diam. Rd. 200.0 NaN
''
location bonus costs
clientId
12 P.O. Box 445, 323 Cursus Rd. 2.0 12.0
13 Ap #791-3809 Eu Street 3.0 4.0
14 6715 Diam. Rd. 200.0 NaN
Método combine() sobre columnas
[6]:
%%writefile /tmp/data_1.csv
A,B
0,4
0,4
Overwriting /tmp/data_1.csv
[7]:
%%writefile /tmp/data_2.csv
A,B
1,3
1,3
Overwriting /tmp/data_2.csv
[8]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
display(
df_1,
"",
df_2,
""
)
#
# Mínimo de la suma de columnas
#
take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
df_1 = df_1.combine(
other=df_2,
func=take_smaller,
)
display(df_1)
A B
0 0 4
1 0 4
''
A B
0 1 3
1 1 3
''
A B
0 0 3
1 0 3
Método combine() sobre elementos
[9]:
%%writefile /tmp/data_1.csv
A,B
5,2
0,4
Overwriting /tmp/data_1.csv
[10]:
%%writefile /tmp/data_2.csv
A,B
1,3
1,3
Overwriting /tmp/data_2.csv
[11]:
import numpy as np
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
display(
df_1,
"",
df_2,
""
)
df_1 = df_1.combine(
other=df_2,
func=np.minimum,
)
display(df_1)
A B
0 5 2
1 0 4
''
A B
0 1 3
1 1 3
''
A B
0 1 2
1 0 3
Método combine_first()
Combina dos datafrmames llenando los valores nulos del primer dataframe con los valores del segundo dataframe.
[12]:
df_1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})
df_2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
result = df_1.combine_first(df_2)
display(
df_1,
"",
df_2,
"",
result
)
A B
0 NaN NaN
1 0.0 4.0
''
A B
0 1 3
1 1 3
''
A B
0 1.0 3.0
1 0.0 4.0