nested XML to dict/DataFrame
Project description
#as dataframe
#pip install a-pandas-ex-xml2df
#### UPDATE 08.12.2022 - xpath / snippets
from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df
import pandas as pd
pd_add_read_xml_files()
df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml',add_xpath_and_snippet=True)
print(df[1:6].to_string())
aa_all_keys aa_value aa_file aa_xpath aa_snippet
1 (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/description <description link="http://en.wikipedia.org/wiki/The_King_of_Limbs">
2 (artist, 0, album, 0, song, 0, length) 5:15 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length="5:15" title="Bloom"/>
3 (artist, 0, album, 0, song, 0, title) Bloom https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length="5:15" title="Bloom"/>
4 (artist, 0, album, 0, song, 1, length) 4:41 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length="4:41" title="Morning Mr Magpie"/>
5 (artist, 0, album, 0, song, 1, title) Morning Mr Magpie https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length="4:41" title="Morning Mr Magpie"/>
from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df
import pandas as pd
pd_add_read_xml_files()
df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')
pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')
Out[4]:
aa_all_keys aa_value
level_0 level_1 level_2 level_3 level_4 level_5 level_6
artist 0 album 0.0 description description NaN (artist, 0, album, 0, description, description) \n\tThe King of Limbs is the eighth studio alb...
link NaN (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs
song 0 length (artist, 0, album, 0, song, 0, length) 5:15
title (artist, 0, album, 0, song, 0, title) Bloom
1 length (artist, 0, album, 0, song, 1, length) 4:41
... ...
1 album 1.0 song 9 title (artist, 1, album, 1, song, 9, title) Magic Doors
10 length (artist, 1, album, 1, song, 10, length) 5:45
title (artist, 1, album, 1, song, 10, title) Threads
title NaN NaN (artist, 1, album, 1, title) Third
name NaN NaN NaN NaN (artist, 1, name) Portishead
[98 rows x 2 columns]
#dataframe and dict
xmlfileorstrin11 = r"C:\Users\Gamer\Documents\Downloads\00000001_untouched.xml"
link='https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml'
uu1=xml_to_dict(xmlfileorstrin11)
uu11=xml_to_df(xmlfileorstrin11)
with open(xmlfileorstrin11,encoding='utf-8') as f:
xmlfileorstring = f.read()
uu2=xml_to_dict(xmlfileorstrin11)
uu22=xml_to_df(xmlfileorstrin11)
uu3=xml_to_dict(link)
uu33=xml_to_df(link)
uu1
Out[12]:
{'folder': 'data',
'filename': '00000001_untouched.png',
'path': None,
'source': {'database': 'Unknown'},
'size': {'width': 1920, 'height': 1080, 'depth': 3},
'segmented': 0,
'object': [{'name': 'search_bar',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 753, 'ymin': 8, 'xmax': 1172, 'ymax': 52}},
{'name': 'home_text',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 42, 'ymin': 5, 'xmax': 158, 'ymax': 55}},
{'name': 'add_friends',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 44, 'ymin': 185, 'xmax': 152, 'ymax': 310}}]}
uu11
Out[14]:
aa_all_keys aa_value
level_0 level_1 level_2 level_3
filename NaN NaN NaN (filename,) 00000001_untouched.png
folder NaN NaN NaN (folder,) data
object 0 bndbox xmax (object, 0, bndbox, xmax) 1172
xmin (object, 0, bndbox, xmin) 753
ymax (object, 0, bndbox, ymax) 52
ymin (object, 0, bndbox, ymin) 8
difficult NaN (object, 0, difficult) 0
name NaN (object, 0, name) search_bar
occluded NaN (object, 0, occluded) 0
pose NaN (object, 0, pose) Unspecified
truncated NaN (object, 0, truncated) 0
1 bndbox xmax (object, 1, bndbox, xmax) 158
xmin (object, 1, bndbox, xmin) 42
ymax (object, 1, bndbox, ymax) 55
ymin (object, 1, bndbox, ymin) 5
difficult NaN (object, 1, difficult) 0
name NaN (object, 1, name) home_text
occluded NaN (object, 1, occluded) 0
pose NaN (object, 1, pose) Unspecified
truncated NaN (object, 1, truncated) 0
2 bndbox xmax (object, 2, bndbox, xmax) 152
xmin (object, 2, bndbox, xmin) 44
ymax (object, 2, bndbox, ymax) 310
ymin (object, 2, bndbox, ymin) 185
difficult NaN (object, 2, difficult) 0
name NaN (object, 2, name) add_friends
occluded NaN (object, 2, occluded) 0
pose NaN (object, 2, pose) Unspecified
truncated NaN (object, 2, truncated) 0
path NaN NaN NaN (path,) None
segmented NaN NaN NaN (segmented,) 0
size depth NaN NaN (size, depth) 3
height NaN NaN (size, height) 1080
width NaN NaN (size, width) 1920
source database NaN NaN (source, database) Unknown
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
File details
Details for the file a_pandas_ex_xml2df-0.13.tar.gz
.
File metadata
- Download URL: a_pandas_ex_xml2df-0.13.tar.gz
- Upload date:
- Size: 7.4 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.9.13
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1d4fe4cccb792cb74a69530911b095ab658e3a87f882989698275dbda5ec9bed |
|
MD5 | 4d5398abea2b1d581adfce784e4f4342 |
|
BLAKE2b-256 | ce345e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189 |
File details
Details for the file a_pandas_ex_xml2df-0.13-py3-none-any.whl
.
File metadata
- Download URL: a_pandas_ex_xml2df-0.13-py3-none-any.whl
- Upload date:
- Size: 8.3 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.9.13
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | c3b453eef024d881647fcb794fde5faa136219f115ebf18764abad95e63aafc8 |
|
MD5 | 74007d6bd9cd2efa98320a52c1cf903b |
|
BLAKE2b-256 | 1edca15037d6e68249cbbcd70a7e15e2287efb50b960516ddb245eae4b0d983e |