Skip to main content

No project description provided

Project description

#path_to_mordor

Package to organization scraping based on bs4.

Simple example.

<!-- language: lang-py -->
"""
The module contains the rules of scraping.
"""
from ptm import Frodo
from ptm.path_actions import gpagins, gpages, gresults
from ptm.result_actions import KeyRealtionships, gvalues, gtexts, gattrs
from treasy import BookSearchDB
from treasy.session import create_session
from smithy.preparing import prepare

RESORCE = 'http://www.labirint-bookstore.ru'
START_PAGE = 'http://www.labirint-bookstore.ru/books'

def result_proccessing(result_set):
"""
This function proccess the results.
"""
db = BookSearchDB(create_session())
result = prepare(result_set, {'name': 'labirint-bookstore', 'url': 'http://www.labirint-bookstore.ru'})
print(result)
description = result['description'].split('||')
if len(description) > 2:
result['description'] = description[2].strip()
db.update_book(result)

KEY_RELATIONSHIPS = KeyRealtionships({
'ISBN:': 'isbn_numbers',
'Издательство:': 'publisher',
'Автор:': 'authors',
'Переводчик:': 'translators',
'Иллюстратор': 'illustrators',
'Серия:': 'series',
'Жанр:': 'genres',
'Год выпуска:': 'year',
'Тип обложки:': 'cover_format',
'Страниц:': 'page_number',
'Масса:': 'weidth',
'Размеры:': 'size',
'Иллюстратор:': 'illustrators'})

RESULTS = {
KEY_RELATIONSHIPS: gvalues(gtexts('div', attrs={'class': 'book-info-left'}),
gtexts('div', attrs={'class': 'book-info-right'})),
'description': gtexts('div', attrs={'id': 'bigcard-description'}, separator="||"),
'title': gattrs('img', attrs={'class': 'img-cover-book'}, target_attribute='alt')
}

PATH = {
gpagins(pagin_template='?page=', start_page_number=1, finish_page_number=10): {
gpages('div', attrs={'class': 'books-name'}):
gresults(result_proccessing, result_map=RESULTS)
}
}

def run(rucksack):
"""
Run travel. This function triggers scraping.
"""
frodo = Frodo(RESORCE, START_PAGE, PATH, rucksack)
frodo.run()

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

ptm-0.0.1a4.tar.gz (12.6 kB view hashes)

Uploaded Source

Built Distribution

ptm-0.0.1a4-py3-none-any.whl (21.4 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page