Diagramas de Dispersión en Matplotlib — 5:31 min

  • 5:31 min | Última modificación: Octubre 6, 2021 | YouTube

En este tutorial se presentan varios ejemplos de visualización de relaciones con diagramas de dispersión.

[1]:
import matplotlib.pyplot as plt

import seaborn as sns

Datos

[2]:
tips = sns.load_dataset("tips")
tips_smoker_Yes = tips[tips.smoker == "Yes"]
tips_smoker_No = tips[tips.smoker == "No"]

Separación por color

[3]:
plt.figure(figsize=(6, 6))

plt.scatter(
    x=tips_smoker_Yes.total_bill,
    y=tips_smoker_Yes.tip,
    c="tab:blue",
    s=100,
    marker="o",
    alpha=0.5,
    label="Yes",
)

plt.scatter(
    x=tips_smoker_No.total_bill,
    y=tips_smoker_No.tip,
    c="tab:orange",
    s=100,
    marker="o",
    alpha=0.5,
    label="No",
)

plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()

plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)

plt.show()
../../../_images/ciencia_datos_matplotlib_notebooks_1-10_scatter_6_0.png

Separación por tamaño

[4]:
plt.figure(figsize=(6, 6))

plt.scatter(
    x=tips_smoker_Yes.total_bill,
    y=tips_smoker_Yes.tip,
    c="tab:blue",
    s=200,
    marker="o",
    alpha=0.4,
    label="Yes",
)

plt.scatter(
    x=tips_smoker_No.total_bill,
    y=tips_smoker_No.tip,
    c="tab:blue",
    s=50,
    marker="o",
    alpha=0.6,
    label="No",
)

plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()

plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)

plt.show()
../../../_images/ciencia_datos_matplotlib_notebooks_1-10_scatter_8_0.png

Separación por forma

[5]:
# https://matplotlib.org/stable/api/markers_api.html#module-matplotlib.markers

plt.figure(figsize=(6, 6))

plt.scatter(
    x=tips_smoker_Yes.total_bill,
    y=tips_smoker_Yes.tip,
    c="tab:blue",
    s=100,
    marker="o",
    alpha=0.4,
    label="Yes",
)

plt.scatter(
    x=tips_smoker_No.total_bill,
    y=tips_smoker_No.tip,
    c="tab:blue",
    s=100,
    marker="s",
    alpha=0.6,
    label="No",
)


plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()

plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)

plt.show()
../../../_images/ciencia_datos_matplotlib_notebooks_1-10_scatter_10_0.png

Gráficas individuales por categoría

[6]:
plt.figure(figsize=(12, 6))

#
# Figura izquierda
#
ax_left = plt.subplot(121)

plt.scatter(
    x=tips_smoker_Yes.total_bill,
    y=tips_smoker_Yes.tip,
    c="tab:blue",
    s=100,
    marker="o",
    alpha=0.4,
)
plt.title("smoker=Yes")
plt.xlabel("total_bill")
plt.ylabel("tip")

plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)

#
# Figura derecha
#
ax_right = plt.subplot(122, sharey=ax_left)
plt.setp(ax_right.get_yticklabels(), visible=False)
plt.subplots_adjust(wspace=0.05, hspace=0.1)

plt.scatter(
    x=tips_smoker_No.total_bill,
    y=tips_smoker_No.tip,
    c="tab:blue",
    s=100,
    marker="o",
    alpha=0.4,
)

plt.title("smoker=No")
plt.xlabel("total_bill")
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
../../../_images/ciencia_datos_matplotlib_notebooks_1-10_scatter_12_0.png