Convert XML documents to dict and easily search for and retrieve the data they contain.
Project description
from xmlmanip import XMLSchema, SearchableList
string = """
<breakfast_menu>
<food tag="waffles">
<name>Belgian Waffles</name>
<price>$5.95</price>
<description>
Two of our famous Belgian Waffles with plenty of real maple syrup
</description>
<calories>650</calories>
</food>
<food tag="waffles">
<name >Strawberry Belgian Waffles</name>
<price>$7.95</price>
<description>
Light Belgian waffles covered with strawberries and whipped cream
</description>
<calories>900</calories>
</food>
<food tag="waffles">
<name>Berry-Berry Belgian Waffles</name>
<price>$8.95</price>
<description>
Belgian waffles covered with assorted fresh berries and whipped cream
</description>
<calories>900</calories>
</food>
<food tag="toast">
<name>French Toast</name>
<price>$4.50</price>
<description>
Thick slices made from our homemade sourdough bread
</description>
<calories>600</calories>
</food>
<food tag="classic">
<name>Homestyle Breakfast</name>
<price>$6.95</price>
<description>
Two eggs, bacon or sausage, toast, and our ever-popular hash browns
</description>
<calories>950</calories>
</food>
</breakfast_menu>
"""
You can import your XML string to convert it to a dict. (dict conversion handled by https://github.com/martinblech/xmltodict).
schema = XMLSchema(string)
schema
XMLSchema([('breakfast_menu', OrderedDict([('food', [OrderedDict([('@tag', 'waffles'), ('name', 'Belgian Waffles'), ('price', '$5.95'), ('description', 'Two of our famous Belgian Waffles with plenty of real maple syrup'), ('calories', '650')]), OrderedDict([('@tag', 'waffles'), ('name', 'Strawberry Belgian Waffles'), ('price', '$7.95'), ('description', 'Light Belgian waffles covered with strawberries and whipped cream'), ('calories', '900')]), OrderedDict([('@tag', 'waffles'), ('name', 'Berry-Berry Belgian Waffles'), ('price', '$8.95'), ('description', 'Belgian waffles covered with assorted fresh berries and whipped cream'), ('calories', '900')]), OrderedDict([('@tag', 'toast'), ('name', 'French Toast'), ('price', '$4.50'), ('description', 'Thick slices made from our homemade sourdough bread'), ('calories', '600')]), OrderedDict([('@tag', 'classic'), ('name', 'Homestyle Breakfast'), ('price', '$6.95'), ('description', 'Two eggs, bacon or sausage, toast, and our ever-popular hash browns'), ('calories', '950')])])]))])
Use .search() to search for data of interest.
schema.search(name="Homestyle Breakfast")
[SchemaInnerDict([('@tag', 'classic'), ('name', 'Homestyle Breakfast'), ('price', '$6.95'), ('description', 'Two eggs, bacon or sausage, toast, and our ever-popular hash browns'), ('calories', '950')])]
The SearchAbleList class will also allow you to easily search through lists of dicts.
example_list = [{"thing": 1, "other_thing": 2}, {"thing": 2, "other_thing": 2}]
searchable_list = SearchableList(example_list)
print(searchable_list.search(thing__ne=2)) # thing != 2
print(searchable_list.search(other_thing=2))
[{'thing': 1, 'other_thing': 2}] [{'thing': 1, 'other_thing': 2}, {'thing': 2, 'other_thing': 2}]
Use .locate() if you are interested in the “path” to your data of interest and .retrieve() to get an object from its “path.”
schema.locate(name="Homestyle Breakfast")
['__breakfast_menu__food__4__name']
schema.retrieve('__breakfast_menu__food__4__name')
'Homestyle Breakfast'
schema.retrieve('__breakfast_menu__food__4')
SchemaInnerDict([('@tag', 'classic'), ('name', 'Homestyle Breakfast'), ('price', '$6.95'), ('description', 'Two eggs, bacon or sausage, toast, and our ever-popular hash browns'), ('calories', '950')])
You have access to all of the standard comparison methods.
paths = schema.locate(name__contains="Waffles")
paths
['__breakfast_menu__food__0__name', '__breakfast_menu__food__1__name', '__breakfast_menu__food__2__name']
schema.search(name__contains="Waffles")
[SchemaInnerDict([('@tag', 'waffles'), ('name', 'Belgian Waffles'), ('price', '$5.95'), ('description', 'Two of our famous Belgian Waffles with plenty of real maple syrup'), ('calories', '650')]), SchemaInnerDict([('@tag', 'waffles'), ('name', 'Berry-Berry Belgian Waffles'), ('price', '$8.95'), ('description', 'Belgian waffles covered with assorted fresh berries and whipped cream'), ('calories', '900')]), SchemaInnerDict([('@tag', 'waffles'), ('name', 'Strawberry Belgian Waffles'), ('price', '$7.95'), ('description', 'Light Belgian waffles covered with strawberries and whipped cream'), ('calories', '900')])]
schema.search(calories__lt="700")
[SchemaInnerDict([('@tag', 'toast'), ('name', 'French Toast'), ('price', '$4.50'), ('description', 'Thick slices made from our homemade sourdough bread'), ('calories', '600')]), SchemaInnerDict([('@tag', 'waffles'), ('name', 'Belgian Waffles'), ('price', '$5.95'), ('description', 'Two of our famous Belgian Waffles with plenty of real maple syrup'), ('calories', '650')])]
Warning, all types are compared as strings, which may have undesirable results.
schema.search(calories__lt="700") == schema.search(calories__lt="70")
True
Some attributes cannot be accessed via keyword arguements, unfortunately.
schema.search(@tag__ne="waffles")
File "<ipython-input-13-da95e3095c41>", line 1 schema.search(@tag__ne="waffles") ^ SyntaxError: invalid syntax
You will need to pass the desired attribute and comparison method as strings in this case.
schema.search('@tag', 'waffles') # default comparison is __eq__
[SchemaInnerDict([('@tag', 'waffles'), ('name', 'Belgian Waffles'), ('price', '$5.95'), ('description', 'Two of our famous Belgian Waffles with plenty of real maple syrup'), ('calories', '650')]), SchemaInnerDict([('@tag', 'waffles'), ('name', 'Strawberry Belgian Waffles'), ('price', '$7.95'), ('description', 'Light Belgian waffles covered with strawberries and whipped cream'), ('calories', '900')]), SchemaInnerDict([('@tag', 'waffles'), ('name', 'Berry-Berry Belgian Waffles'), ('price', '$8.95'), ('description', 'Belgian waffles covered with assorted fresh berries and whipped cream'), ('calories', '900')])]
schema.search('@tag', 'waffles', comparison='ne')
[SchemaInnerDict([('@tag', 'classic'), ('name', 'Homestyle Breakfast'), ('price', '$6.95'), ('description', 'Two eggs, bacon or sausage, toast, and our ever-popular hash browns'), ('calories', '950')]), SchemaInnerDict([('@tag', 'toast'), ('name', 'French Toast'), ('price', '$4.50'), ('description', 'Thick slices made from our homemade sourdough bread'), ('calories', '600')])]
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.