Diagramas de Dispersión en Matplotlib — 5:31 min
5:31 min | Última modificación: Octubre 6, 2021 | YouTube
En este tutorial se presentan varios ejemplos de visualización de relaciones con diagramas de dispersión.
[1]:
import matplotlib.pyplot as plt
import seaborn as sns
Datos
[2]:
tips = sns.load_dataset("tips")
tips_smoker_Yes = tips[tips.smoker == "Yes"]
tips_smoker_No = tips[tips.smoker == "No"]
Separación por color
[3]:
plt.figure(figsize=(6, 6))
plt.scatter(
x=tips_smoker_Yes.total_bill,
y=tips_smoker_Yes.tip,
c="tab:blue",
s=100,
marker="o",
alpha=0.5,
label="Yes",
)
plt.scatter(
x=tips_smoker_No.total_bill,
y=tips_smoker_No.tip,
c="tab:orange",
s=100,
marker="o",
alpha=0.5,
label="No",
)
plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.show()
Separación por tamaño
[4]:
plt.figure(figsize=(6, 6))
plt.scatter(
x=tips_smoker_Yes.total_bill,
y=tips_smoker_Yes.tip,
c="tab:blue",
s=200,
marker="o",
alpha=0.4,
label="Yes",
)
plt.scatter(
x=tips_smoker_No.total_bill,
y=tips_smoker_No.tip,
c="tab:blue",
s=50,
marker="o",
alpha=0.6,
label="No",
)
plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.show()
Separación por forma
[5]:
# https://matplotlib.org/stable/api/markers_api.html#module-matplotlib.markers
plt.figure(figsize=(6, 6))
plt.scatter(
x=tips_smoker_Yes.total_bill,
y=tips_smoker_Yes.tip,
c="tab:blue",
s=100,
marker="o",
alpha=0.4,
label="Yes",
)
plt.scatter(
x=tips_smoker_No.total_bill,
y=tips_smoker_No.tip,
c="tab:blue",
s=100,
marker="s",
alpha=0.6,
label="No",
)
plt.xlabel("total_bill")
plt.ylabel("tip")
plt.legend()
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.show()
Gráficas individuales por categoría
[6]:
plt.figure(figsize=(12, 6))
#
# Figura izquierda
#
ax_left = plt.subplot(121)
plt.scatter(
x=tips_smoker_Yes.total_bill,
y=tips_smoker_Yes.tip,
c="tab:blue",
s=100,
marker="o",
alpha=0.4,
)
plt.title("smoker=Yes")
plt.xlabel("total_bill")
plt.ylabel("tip")
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
#
# Figura derecha
#
ax_right = plt.subplot(122, sharey=ax_left)
plt.setp(ax_right.get_yticklabels(), visible=False)
plt.subplots_adjust(wspace=0.05, hspace=0.1)
plt.scatter(
x=tips_smoker_No.total_bill,
y=tips_smoker_No.tip,
c="tab:blue",
s=100,
marker="o",
alpha=0.4,
)
plt.title("smoker=No")
plt.xlabel("total_bill")
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)