{
"cells": [
{
"cell_type": "markdown",
"id": "9bdb1960-15f6-4273-9c17-671608373885",
"metadata": {},
"source": [
"Importación de archivos XML --- 5:29 min\n",
"===\n",
"\n",
"* 5:29 min | Última modificación: Octubre 13, 2021 | [YouTube](https://youtu.be/mPm-fwomas0)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "72feeac4-e01a-46e3-aecb-9f399d0e724c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /tmp/core-site.xml\n"
]
}
],
"source": [
"%%writefile /tmp/core-site.xml\n",
"\n",
"\n",
"\n",
" \n",
" fs.defaultFS\n",
" hdfs://0.0.0.0:9000\n",
" \n",
""
]
},
{
"cell_type": "markdown",
"id": "09db2f00-1cd8-4236-861d-f5fa4d6c3ea2",
"metadata": {},
"source": [
"Interpretación\n",
" \n",
" {\n",
" \"root\": {\n",
" \"tag\": \"configuration\",\n",
" \"children\": [\n",
" {\n",
" \"tag\": \"property\",\n",
" \"children\": [\n",
" {\n",
" \"tag\": \"name\",\n",
" \"text\": \"fs.defaultFS\",\n",
" },\n",
" {\n",
" \"tag\": \"vaalue\",\n",
" \"text\": \"hdfs://0.0.0.0:9000\",\n",
" }\n",
" ]\n",
" }\n",
" ]\n",
" }\n",
" }\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2ca262f9-cfdf-41b2-aa5e-c3c3586ff62b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'configuration'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Ingestión del archivo\n",
"# ===============================================\n",
"#\n",
"import xml.etree.ElementTree as ET\n",
"\n",
"tree = ET.parse(\"/tmp/core-site.xml\")\n",
"root = tree.getroot()\n",
"root.tag"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "46c55c80-a5b4-42f2-8e6f-841d842d826a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Lista de hijos del nodo raiz\n",
"#\n",
"root.getchildren()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b616042e-b7f7-4f72-a053-41021f22ea13",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Cantidad de hijos del nodo raiz\n",
"#\n",
"len(root.getchildren())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "03dfb0ff-7ab7-4d57-925e-278a4e2de6d9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'property'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# tag del primer nodo hijo\n",
"#\n",
"root[0].tag"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8adc78e4-3b0a-474c-a994-240aad04a3fe",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'name'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Primer tag del primer hijo\n",
"#\n",
"root[0][0].tag"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8dd82ad1-e7c7-4495-915b-e8bd24b81520",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'fs.defaultFS'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Contenido de la propiedad text\n",
"# del primer hijo\n",
"#\n",
"root[0][0].text"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c01c809a-1a61-406d-9563-d83c623e01b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'hdfs://0.0.0.0:9000'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#\n",
"# Contenido de la propiedad text\n",
"# del segundo hijo\n",
"#\n",
"root[0][1].text"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}