Actualización de campos — 7:30 min

  • 7:30 min | Última modificación: Octubre 6, 2021

[1]:
import pandas as pd

pd.set_option("display.notebook_repr_html", False)

Método update()

[2]:
%%writefile /tmp/data_1.csv
clientId,name,bonus,costs
10,Omar Y. Fletcher,0,9999
11,Buffy W. Vincent,1,
12,Mira N. Franklin,2,
13,Ferris Q. Le,3,4
Overwriting /tmp/data_1.csv
[3]:
%%writefile /tmp/data_2.csv
clientId,location,bonus,costs
12,"P.O. Box 445, 323 Cursus Rd.",100,12
13,Ap #791-3809 Eu Street,,13
14,6715 Diam. Rd.,200,
Overwriting /tmp/data_2.csv
[4]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_1.set_index('clientId', inplace=True)

df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.set_index('clientId', inplace=True)


display(
    df_1,
    "",
    df_2,
)

df_1.update(df_2)

#
# Update df_1.
#
# Note que costs para el primer cliente se
# actualiza con el valor del segundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
display(
    "",
    df_1
)
                      name  bonus   costs
clientId
10        Omar Y. Fletcher      0  9999.0
11        Buffy W. Vincent      1     NaN
12        Mira N. Franklin      2     NaN
13            Ferris Q. Le      3     4.0
''
                              location  bonus  costs
clientId
12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0
13              Ap #791-3809 Eu Street    NaN   13.0
14                      6715 Diam. Rd.  200.0    NaN
''
                      name  bonus   costs
clientId
10        Omar Y. Fletcher    0.0  9999.0
11        Buffy W. Vincent    1.0     NaN
12        Mira N. Franklin  100.0    12.0
13            Ferris Q. Le    3.0    13.0
[5]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_1.set_index('clientId', inplace=True)

df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.set_index('clientId', inplace=True)


display(
    df_1,
    "",
    df_2,
)

df_2.update(df_1)

#
# Update df_1.
#
# Note que costs para el primer cliente se
# actualiza con el valor del segundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
display(
    "",
    df_2
)
                      name  bonus   costs
clientId
10        Omar Y. Fletcher      0  9999.0
11        Buffy W. Vincent      1     NaN
12        Mira N. Franklin      2     NaN
13            Ferris Q. Le      3     4.0
''
                              location  bonus  costs
clientId
12        P.O. Box 445, 323 Cursus Rd.  100.0   12.0
13              Ap #791-3809 Eu Street    NaN   13.0
14                      6715 Diam. Rd.  200.0    NaN
''
                              location  bonus  costs
clientId
12        P.O. Box 445, 323 Cursus Rd.    2.0   12.0
13              Ap #791-3809 Eu Street    3.0    4.0
14                      6715 Diam. Rd.  200.0    NaN

Método combine() sobre columnas

[6]:
%%writefile /tmp/data_1.csv
A,B
0,4
0,4
Overwriting /tmp/data_1.csv
[7]:
%%writefile /tmp/data_2.csv
A,B
1,3
1,3
Overwriting /tmp/data_2.csv
[8]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")

display(
    df_1,
    "",
    df_2,
    ""
)

#
# Mínimo de la suma de columnas
#
take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2

df_1 = df_1.combine(
    other=df_2,
    func=take_smaller,
)

display(df_1)
   A  B
0  0  4
1  0  4
''
   A  B
0  1  3
1  1  3
''
   A  B
0  0  3
1  0  3

Método combine() sobre elementos

[9]:
%%writefile /tmp/data_1.csv
A,B
5,2
0,4
Overwriting /tmp/data_1.csv
[10]:
%%writefile /tmp/data_2.csv
A,B
1,3
1,3
Overwriting /tmp/data_2.csv
[11]:
import numpy as np

df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")

display(
    df_1,
    "",
    df_2,
    ""
)


df_1 = df_1.combine(
    other=df_2,
    func=np.minimum,
)

display(df_1)
   A  B
0  5  2
1  0  4
''
   A  B
0  1  3
1  1  3
''
   A  B
0  1  2
1  0  3

Método combine_first()

Combina dos datafrmames llenando los valores nulos del primer dataframe con los valores del segundo dataframe.

[12]:
df_1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})
df_2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})
result = df_1.combine_first(df_2)

display(
    df_1,
    "",
    df_2,
    "",
    result
)
     A    B
0  NaN  NaN
1  0.0  4.0
''
   A  B
0  1  3
1  1  3
''
     A    B
0  1.0  3.0
1  0.0  4.0