Python Wrapper for Wikipedia
Project description
Wikipedia API
This package provides python API for accessing Wikipedia.
Installation
pip3 install wikipedia-api
Usage
import wikipedia
# Extract data in Wiki format
wiki_wiki = wikipedia.Wikipedia('en')
page_py = wiki_wiki.article('Python_(programming_language)')
print("Page - Exists: %s" % page_py.exists())
# Page - Exists: True
print("Page - Id: %s" % page_py.pageid)
# Page - Id: 23862
print("Page - Title: %s" % page_py.title)
# Page - Title: Python (programming language)
print("Page - Summary: %s" % page_py.summary[0:60])
# Page - Summary: Python is a widely used high-level programming language for
def print_sections(sections, level=0):
for s in sections:
print("%s: %s - %s" % ("*" * (level + 1), s.title, s.text[0:40]))
print_sections(s.sections, level + 1)
print_sections(page_py.sections)
# *: History - Python was conceived in the late 1980s,
# *: Features and philosophy - Python is a multi-paradigm programming l
# *: Syntax and semantics - Python is meant to be an easily readable
# **: Indentation - Python uses whitespace indentation, rath
# **: Statements and control flow - Python's statements include (among other
# **: Expressions - Some Python expressions are similar to l
# ...
section_py = page_py.section_by_title('Features and philosophy')
print("Section - Title: %s" % section_py.title)
# Section - Title: Features and philosophy
print("Section - Text: %s" % section_py.text[0:60])
# Section - Text: Python is a multi-paradigm programming language. Object-orie
# Now lets extract texts with HTML tags
wiki_html = wikipedia.Wikipedia(
language='cs',
extract_format=wikipedia.ExtractFormat.HTML
)
page_ostrava = wiki_html.article('Ostrava')
print("Page - Exists: %s" % page_ostrava.exists())
# Page - Exists: True
print("Page - Id: %s" % page_ostrava.pageid)
# Page - Id: 7667
print("Page - Title: %s" % page_ostrava.title)
# Page - Title: Ostrava
print("Page - Summary: %s" % page_ostrava.summary[0:60])
# Page - Summary: <p><b>Ostrava</b> (polsky <span lang="pl" title="polština" x
print_sections(page_ostrava.sections)
# *: Znak a logo -
# **: Heraldický znak - <p>Městský znak je blasonován: <i>V modr
# **: Marketingové logo - <p>V roce 2008 bylo představeno nové log
# *: Historie - <dl><dd><i>Související informace nalezne
# **: Zemské hranice - <p>Zemské hranice zde tvoří řeky Odra a
# *: Obyvatelstvo - <ul class="gallery mw-gallery-traditiona
section_ostrava = page_ostrava.section_by_title('Heraldický znak')
print("Section - Title: %s" % section_ostrava.title)
# Section - Title: Heraldický znak
print("Section - Text: %s" % section_ostrava.text[0:60])
# Section - Text: <p>Městský znak je blasonován: <i>V modrém štítě na zeleném
page_nonexisting = wiki_wiki.article('Wikipedia-API-FooBar')
print("Page - Exists: %s" % page_nonexisting.exists())
# Page - Exists: False
print("Page - Id: %s" % page_nonexisting.pageid)
# Page - Id: -1
print("Page - Title: %s" % page_nonexisting.title)
# Page - Title: Wikipedia-API-FooBar
print("Page - Summary: %s" % page_nonexisting.summary[0:60])
# Page - Summary:
External Links
Changelog
0.2.0
Use properties instead of functions
Added support for property Info
0.1.6
Support for extracting texts with HTML markdown
Added initial version of unit tests
0.1.4
It’s possible to extract summary and sections of the page
Added support for property Extracts
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Wikipedia-API-0.2.0.tar.gz
(8.1 kB
view details)
File details
Details for the file Wikipedia-API-0.2.0.tar.gz
.
File metadata
- Download URL: Wikipedia-API-0.2.0.tar.gz
- Upload date:
- Size: 8.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 98a7a10d9de38f19bc050440b466a17f72684bb0c60c625107c41c0dac614c85 |
|
MD5 | 92c1172be319d9935047a19a78b527ea |
|
BLAKE2b-256 | 96210162f032ac44c4f780b8d72fe24466b1b456596138a0aa705b520d5ceed9 |