Transformación de datos en SQLite3 — 7:44 min

  • 7:44 min | Última modificación: Octubre 14, 2021 | YouTube

Preparación

Se crean dos cellmagics. La cellmagic %%sqlite3 permite ejecutar el contenido de la celda en el motor de búsqueda cuando la celda contiene una sola sentencia; el cellmagic retorna el resultado de la ejecución del comando SQL.

La cellmagic %%sqlite3script permite ejecutar scripts conformados por varios comandos SQL. No retorna resultados

[1]:
import sqlite3

conn = sqlite3.connect(":memory:")
cur = conn.cursor()
[2]:
from IPython.core.magic import register_cell_magic, register_line_magic


@register_cell_magic
def sqlite3(line, cell):
    cell = [
        l for l in cell.split("\n") if l[0:2] != "--" and l[0:2] not in ["--", "##"]
    ]
    cell = "\n".join(cell)
    return cur.execute(cell).fetchall()


@register_cell_magic
def sqlite3script(line, cell):
    cell = [l for l in cell.split("\n") if len(l) >= 2 and l[0:2] not in ["--", "##"]]
    cell = "\n".join(cell)
    conn.executescript(cell)
    conn.commit()


@register_line_magic
def sql(line):
    line = [
        l for l in line.split("\n") if l[0:2] != "--" and l[0:2] not in ["--", "##"]
    ]
    line = "\n".join(line)
    return cur.execute(line).fetchall()
[3]:
%%sqlite3script

DROP TABLE IF EXISTS persons;

CREATE TABLE persons (
    id INT,
    firstname VARCHAR(10),
    surname VARCHAR(10),
    birthday DATE,
    color VARCHAR(10),
    quantity INT
);

INSERT INTO persons VALUES
    (1,"Vivian","Hamilton","1971-07-08","green",1),
    (2,"Karen","Holcomb","1974-05-23","green",4),
    (3,"Cody","Garrett","1973-04-22","orange",1),
    (4,"Roth","Fry","1975-01-29","black",1),
    (5,"Zoe","Conway","1974-07-03","blue",2),
    (6,"Gretchen","Kinney","1974-10-18","viole",1),
    (7,"Driscoll","Klein","1970-10-05","blue",5),
    (8,"Karyn","Diaz","1969-02-24","red",1),
    (9,"Merritt","Guy","1974-10-17","indigo",4),
    (10,"Kylan","Sexton","1975-02-28","black",4),
    (11,"Jordan","Estes","1969-12-07","indigo",4),
    (12,"Hope","Coffey","1973-12-24","green",5),
    (13,"Vivian","Crane","1970-08-27","gray",5),
    (14,"Clio","Noel","1972-12-12","red",5),
    (15,"Hope","Silva","1970-07-01","blue",5),
    (16,"Ayanna","Jarvis","1974-02-11","orange",5),
    (17,"Chanda","Boyer","1973-04-01","green",4),
    (18,"Chadwick","Knight","1973-04-29","yellow",1);

Strings

LENGTH()

[4]:
%sql SELECT surname, LENGTH(surname) FROM persons LIMIT 5;
[4]:
[('Hamilton', 8), ('Holcomb', 7), ('Garrett', 7), ('Fry', 3), ('Conway', 6)]

UPPER(), LOWER()

[5]:
%sql SELECT surname, UPPER(surname), LOWER(surname) FROM persons LIMIT 5;
[5]:
[('Hamilton', 'HAMILTON', 'hamilton'),
 ('Holcomb', 'HOLCOMB', 'holcomb'),
 ('Garrett', 'GARRETT', 'garrett'),
 ('Fry', 'FRY', 'fry'),
 ('Conway', 'CONWAY', 'conway')]
[6]:
#
# Condicional para strings
#
%sql SELECT surname, surname BETWEEN 'c' AND 'k' FROM persons LIMIT 5;
[6]:
[('Hamilton', 0), ('Holcomb', 0), ('Garrett', 0), ('Fry', 0), ('Conway', 0)]

LIKE

La función LIKE permite buscar patrones específicos dentro de cadenas de caracteres.

> LIKE 'a%'   : Encuentra los valores que inician con 'a'
> LIKE '%a'   : Encuentra los valores que terminan con 'a'
> LIKE '%a%'  : Encuentra los valores que contengan 'a' en cualquier posición
> LIKE '_a%'  : Encuentra los valores que contengan 'a' en la segunda posición
> LIKE 'a_%_%': Encuentra los valores que inician con 'a' y tienen al menos 3 caracteres
> LIKE 'a%o'  : Encuentra los valores que inician con 'a' y terminan con 'o'
[7]:
%sql SELECT * FROM persons WHERE color LIKE 'b%';
[7]:
[(4, 'Roth', 'Fry', '1975-01-29', 'black', 1),
 (5, 'Zoe', 'Conway', '1974-07-03', 'blue', 2),
 (7, 'Driscoll', 'Klein', '1970-10-05', 'blue', 5),
 (10, 'Kylan', 'Sexton', '1975-02-28', 'black', 4),
 (15, 'Hope', 'Silva', '1970-07-01', 'blue', 5)]
[8]:
%sql SELECT * FROM persons WHERE color NOT LIKE 'b%';
[8]:
[(1, 'Vivian', 'Hamilton', '1971-07-08', 'green', 1),
 (2, 'Karen', 'Holcomb', '1974-05-23', 'green', 4),
 (3, 'Cody', 'Garrett', '1973-04-22', 'orange', 1),
 (6, 'Gretchen', 'Kinney', '1974-10-18', 'viole', 1),
 (8, 'Karyn', 'Diaz', '1969-02-24', 'red', 1),
 (9, 'Merritt', 'Guy', '1974-10-17', 'indigo', 4),
 (11, 'Jordan', 'Estes', '1969-12-07', 'indigo', 4),
 (12, 'Hope', 'Coffey', '1973-12-24', 'green', 5),
 (13, 'Vivian', 'Crane', '1970-08-27', 'gray', 5),
 (14, 'Clio', 'Noel', '1972-12-12', 'red', 5),
 (16, 'Ayanna', 'Jarvis', '1974-02-11', 'orange', 5),
 (17, 'Chanda', 'Boyer', '1973-04-01', 'green', 4),
 (18, 'Chadwick', 'Knight', '1973-04-29', 'yellow', 1)]
[9]:
#
# Conectores lógicos (AND).
#
%sql SELECT * FROM persons WHERE color = 'blue' AND firstname LIKE 'Z%';
[9]:
[(5, 'Zoe', 'Conway', '1974-07-03', 'blue', 2)]
[10]:
#
# Conectores lógicos (OR).
#
%sql SELECT * FROM persons WHERE color = 'blue' OR firstname LIKE 'K%';
[10]:
[(2, 'Karen', 'Holcomb', '1974-05-23', 'green', 4),
 (5, 'Zoe', 'Conway', '1974-07-03', 'blue', 2),
 (7, 'Driscoll', 'Klein', '1970-10-05', 'blue', 5),
 (8, 'Karyn', 'Diaz', '1969-02-24', 'red', 1),
 (10, 'Kylan', 'Sexton', '1975-02-28', 'black', 4),
 (15, 'Hope', 'Silva', '1970-07-01', 'blue', 5)]
[11]:
#
# Condicional para campos string
#
%sql SELECT * FROM persons WHERE color IN ('blue','black');
[11]:
[(4, 'Roth', 'Fry', '1975-01-29', 'black', 1),
 (5, 'Zoe', 'Conway', '1974-07-03', 'blue', 2),
 (7, 'Driscoll', 'Klein', '1970-10-05', 'blue', 5),
 (10, 'Kylan', 'Sexton', '1975-02-28', 'black', 4),
 (15, 'Hope', 'Silva', '1970-07-01', 'blue', 5)]
[12]:
#
# Condicional para campos string
#
%sql SELECT * FROM persons WHERE color NOT IN ('blue','black');
[12]:
[(1, 'Vivian', 'Hamilton', '1971-07-08', 'green', 1),
 (2, 'Karen', 'Holcomb', '1974-05-23', 'green', 4),
 (3, 'Cody', 'Garrett', '1973-04-22', 'orange', 1),
 (6, 'Gretchen', 'Kinney', '1974-10-18', 'viole', 1),
 (8, 'Karyn', 'Diaz', '1969-02-24', 'red', 1),
 (9, 'Merritt', 'Guy', '1974-10-17', 'indigo', 4),
 (11, 'Jordan', 'Estes', '1969-12-07', 'indigo', 4),
 (12, 'Hope', 'Coffey', '1973-12-24', 'green', 5),
 (13, 'Vivian', 'Crane', '1970-08-27', 'gray', 5),
 (14, 'Clio', 'Noel', '1972-12-12', 'red', 5),
 (16, 'Ayanna', 'Jarvis', '1974-02-11', 'orange', 5),
 (17, 'Chanda', 'Boyer', '1973-04-01', 'green', 4),
 (18, 'Chadwick', 'Knight', '1973-04-29', 'yellow', 1)]

ORDER BY

[13]:
%sql SELECT * FROM persons ORDER BY color;
[13]:
[(4, 'Roth', 'Fry', '1975-01-29', 'black', 1),
 (10, 'Kylan', 'Sexton', '1975-02-28', 'black', 4),
 (5, 'Zoe', 'Conway', '1974-07-03', 'blue', 2),
 (7, 'Driscoll', 'Klein', '1970-10-05', 'blue', 5),
 (15, 'Hope', 'Silva', '1970-07-01', 'blue', 5),
 (13, 'Vivian', 'Crane', '1970-08-27', 'gray', 5),
 (1, 'Vivian', 'Hamilton', '1971-07-08', 'green', 1),
 (2, 'Karen', 'Holcomb', '1974-05-23', 'green', 4),
 (12, 'Hope', 'Coffey', '1973-12-24', 'green', 5),
 (17, 'Chanda', 'Boyer', '1973-04-01', 'green', 4),
 (9, 'Merritt', 'Guy', '1974-10-17', 'indigo', 4),
 (11, 'Jordan', 'Estes', '1969-12-07', 'indigo', 4),
 (3, 'Cody', 'Garrett', '1973-04-22', 'orange', 1),
 (16, 'Ayanna', 'Jarvis', '1974-02-11', 'orange', 5),
 (8, 'Karyn', 'Diaz', '1969-02-24', 'red', 1),
 (14, 'Clio', 'Noel', '1972-12-12', 'red', 5),
 (6, 'Gretchen', 'Kinney', '1974-10-18', 'viole', 1),
 (18, 'Chadwick', 'Knight', '1973-04-29', 'yellow', 1)]

GROUP BY

COUNT()

[14]:
#
# Cuenta cuantos registros hay en la tabla.
#
%sql SELECT COUNT(*) FROM persons;
[14]:
[(18,)]

MAX(), MIN(), SUM(), AVG()

[15]:
%%sqlite3
SELECT
    color,
    MAX(quantity),
    MIN(quantity),
    SUM(quantity),
    AVG(quantity)
FROM
    persons
GROUP BY
    color;
[15]:
[('black', 4, 1, 5, 2.5),
 ('blue', 5, 2, 12, 4.0),
 ('gray', 5, 5, 5, 5.0),
 ('green', 5, 1, 14, 3.5),
 ('indigo', 4, 4, 8, 4.0),
 ('orange', 5, 1, 6, 3.0),
 ('red', 5, 1, 6, 3.0),
 ('viole', 1, 1, 1, 1.0),
 ('yellow', 1, 1, 1, 1.0)]
[16]:
%%sqlite3
SELECT
    color,
    quantity
FROM
    persons
WHERE
    quantity = (SELECT MIN(quantity) FROM persons);
[16]:
[('green', 1),
 ('orange', 1),
 ('black', 1),
 ('viole', 1),
 ('red', 1),
 ('yellow', 1)]

[17]:
## Cierre de la conexión
conn.close()