No project description provided
Project description
#path_to_mordor
Package to organization scraping based on bs4.
Simple example.
<!-- language: lang-py -->
"""
The module contains the rules of scraping.
"""
from ptm import Frodo
from ptm.path_actions import gpagins, gpages, gresults
from ptm.result_actions import KeyRealtionships, gvalues, gtexts, gattrs
from treasy import BookSearchDB
from treasy.session import create_session
from smithy.preparing import prepare
RESORCE = 'http://www.labirint-bookstore.ru'
START_PAGE = 'http://www.labirint-bookstore.ru/books'
def result_proccessing(result_set):
"""
This function proccess the results.
"""
db = BookSearchDB(create_session())
result = prepare(result_set, {'name': 'labirint-bookstore', 'url': 'http://www.labirint-bookstore.ru'})
print(result)
description = result['description'].split('||')
if len(description) > 2:
result['description'] = description[2].strip()
db.update_book(result)
KEY_RELATIONSHIPS = KeyRealtionships({
'ISBN:': 'isbn_numbers',
'Издательство:': 'publisher',
'Автор:': 'authors',
'Переводчик:': 'translators',
'Иллюстратор': 'illustrators',
'Серия:': 'series',
'Жанр:': 'genres',
'Год выпуска:': 'year',
'Тип обложки:': 'cover_format',
'Страниц:': 'page_number',
'Масса:': 'weidth',
'Размеры:': 'size',
'Иллюстратор:': 'illustrators'})
RESULTS = {
KEY_RELATIONSHIPS: gvalues(gtexts('div', attrs={'class': 'book-info-left'}),
gtexts('div', attrs={'class': 'book-info-right'})),
'description': gtexts('div', attrs={'id': 'bigcard-description'}, separator="||"),
'title': gattrs('img', attrs={'class': 'img-cover-book'}, target_attribute='alt')
}
PATH = {
gpagins(pagin_template='?page=', start_page_number=1, finish_page_number=10): {
gpages('div', attrs={'class': 'books-name'}):
gresults(result_proccessing, result_map=RESULTS)
}
}
def run(rucksack):
"""
Run travel. This function triggers scraping.
"""
frodo = Frodo(RESORCE, START_PAGE, PATH, rucksack)
frodo.run()
Package to organization scraping based on bs4.
Simple example.
<!-- language: lang-py -->
"""
The module contains the rules of scraping.
"""
from ptm import Frodo
from ptm.path_actions import gpagins, gpages, gresults
from ptm.result_actions import KeyRealtionships, gvalues, gtexts, gattrs
from treasy import BookSearchDB
from treasy.session import create_session
from smithy.preparing import prepare
RESORCE = 'http://www.labirint-bookstore.ru'
START_PAGE = 'http://www.labirint-bookstore.ru/books'
def result_proccessing(result_set):
"""
This function proccess the results.
"""
db = BookSearchDB(create_session())
result = prepare(result_set, {'name': 'labirint-bookstore', 'url': 'http://www.labirint-bookstore.ru'})
print(result)
description = result['description'].split('||')
if len(description) > 2:
result['description'] = description[2].strip()
db.update_book(result)
KEY_RELATIONSHIPS = KeyRealtionships({
'ISBN:': 'isbn_numbers',
'Издательство:': 'publisher',
'Автор:': 'authors',
'Переводчик:': 'translators',
'Иллюстратор': 'illustrators',
'Серия:': 'series',
'Жанр:': 'genres',
'Год выпуска:': 'year',
'Тип обложки:': 'cover_format',
'Страниц:': 'page_number',
'Масса:': 'weidth',
'Размеры:': 'size',
'Иллюстратор:': 'illustrators'})
RESULTS = {
KEY_RELATIONSHIPS: gvalues(gtexts('div', attrs={'class': 'book-info-left'}),
gtexts('div', attrs={'class': 'book-info-right'})),
'description': gtexts('div', attrs={'id': 'bigcard-description'}, separator="||"),
'title': gattrs('img', attrs={'class': 'img-cover-book'}, target_attribute='alt')
}
PATH = {
gpagins(pagin_template='?page=', start_page_number=1, finish_page_number=10): {
gpages('div', attrs={'class': 'books-name'}):
gresults(result_proccessing, result_map=RESULTS)
}
}
def run(rucksack):
"""
Run travel. This function triggers scraping.
"""
frodo = Frodo(RESORCE, START_PAGE, PATH, rucksack)
frodo.run()
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
ptm-0.0.1a4.tar.gz
(12.6 kB
view details)
Built Distribution
ptm-0.0.1a4-py3-none-any.whl
(21.4 kB
view details)
File details
Details for the file ptm-0.0.1a4.tar.gz
.
File metadata
- Download URL: ptm-0.0.1a4.tar.gz
- Upload date:
- Size: 12.6 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8faeb4b4dab5fadb71c34f4b943567ce131572ea0dc333329fa1eb6889ef68df |
|
MD5 | c57baf0f113eb275e939f058f976d9d1 |
|
BLAKE2b-256 | 76abde343801648f0031d0ac389c9f6ab4f6e09d4d494879d7e26183c38c941d |
File details
Details for the file ptm-0.0.1a4-py3-none-any.whl
.
File metadata
- Download URL: ptm-0.0.1a4-py3-none-any.whl
- Upload date:
- Size: 21.4 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 830b2f5df5da65891afe00b04d4ee1b9a18c2104d696e96fab661d709b725ba7 |
|
MD5 | f033e9e3077ef5c0aca25141a0da5729 |
|
BLAKE2b-256 | 0b6794f3310ff758c9d0344b3f855813a99c80a21726453827bfdda0e9f96781 |