Python download library
Project description
Download Boss
Python download library
1. Installation
pip install download_boss
2. Usage
2.1 HttpClient
Simple request
from requests import Request
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
request = RequestEnvelope(
Request(method='POST', url='https://httpbin.org/anything/hello', json={"hello": "world"},
{'verify': False, 'timeout': 10})
)
response = HttpClient().download(request)
print(response.text)
Retry based on HTTP status codes
from requests import Request
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
from download_boss.error.ClientRetriable import ClientRetriable
request = RequestEnvelope(
Request(method='GET', url='https://httpbin.org/anything/hello')
)
client = HttpClient(throwRetriableStatusCodeRanges=[401, range(500,599)])
while True:
try:
response = client.download(request)
print(response.text)
break
except ClientRetriable:
continue
Kerberos authentication:
from requests import Request
from requests_kerberos import HTTPKerberosAuth, OPTIONAL
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
request = RequestEnvelope(
Request(method='POST', url='https://httpbin.org/anything/kerb', auth=HTTPKerberosAuth(mutual_authentication=OPTIONAL))
)
response = HttpClient().download(request)
2.2. RetryWrapper
Retry automatically some HTTP status codes
from requests import Request
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
from download_boss.wrapper.RetryWrapper import RetryWrapper
from download_boss.error.RetriesExhausted import RetriesExhausted
request = RequestEnvelope(
Request(method='GET', url='https://httpbin.org/status/500')
)
client = HttpClient(throwRetriableStatusCodeRanges=[401, range(500,599)])
client = RetryWrapper(client, count=1, catchRetriableStatusCodeRanges=[range(500,599)])
try:
response = client.download(request)
except RetriesExhausted:
print("Retries exhausted!")
"""
2024-12-03 11:51:10,085 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/status/500
2024-12-03 11:51:10,485 [ INFO] RetryWrapper.py :: download() - Retrying... GET https://httpbin.org/status/500
2024-12-03 11:52:10,485 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/status/500
Retries exhausted!
"""
2.3. DelayWrapper
Delay download calls by 2-5 seconds
from requests import Request
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
from download_boss.wrapper.RetryWrapper import RetryWrapper
from download_boss.wrapper.DelayWrapper import DelayWrapper
from download_boss.error.RetriesExhausted import RetriesExhausted
client = HttpClient(throwRetriableStatusCodeRanges=[401, range(500,599)])
client = RetryWrapper(client, count=1, catchRetriableStatusCodeRanges=[range(500,599)])
client = DelayWrapper(client, length=2, maxLength=5)
requests = [
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/one') ),
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/two') ),
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/three') )
]
for r in requests:
response = client.download(r)
"""
2024-12-03 12:00:28,804 [ INFO] DelayWrapper.py :: download() - Delaying by 3s ... GET https://httpbin.org/anything/one
2024-12-03 12:00:31,805 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/anything/one
2024-12-03 12:00:32,206 [ INFO] DelayWrapper.py :: download() - Delaying by 2s ... GET https://httpbin.org/anything/two
2024-12-03 12:00:34,208 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/anything/two
2024-12-03 12:00:34,827 [ INFO] DelayWrapper.py :: download() - Delaying by 5s ... GET https://httpbin.org/anything/three
2024-12-03 12:00:39,830 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/anything/three
"""
2.4. FileCacheWrapper
from os.path import join, dirname
from requests import Request
from download_boss.client.request.RequestEnvelope import RequestEnvelope
from download_boss.client.HttpClient import HttpClient
from download_boss.wrapper.RetryWrapper import RetryWrapper
from download_boss.wrapper.DelayWrapper import DelayWrapper
from download_boss.wrapper.FileCacheWrapper import FileCacheWrapper
cacheFolderPath = join(dirname(__file__), "cache")
cacheLength = 60*60*24 # 1 day
client = HttpClient(throwRetriableStatusCodeRanges=[401, range(500,599)])
client = RetryWrapper(client, count=1, catchRetriableStatusCodeRanges=[range(500,599)])
client = DelayWrapper(client, length=2, maxLength=5)
client = FileCacheWrapper(client, cacheFolderPath, cacheLength)
requests = [
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/one') ),
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/one') ),
RequestEnvelope( Request(method='GET', url='https://httpbin.org/anything/one') )
]
for r in requests:
response = client.download(r)
"""
2024-12-03 13:26:24,921 [ INFO] FileCacheWrapper.py :: _getCache() - Cache miss: GET https://httpbin.org/anything/one
2024-12-03 13:26:24,921 [ INFO] DelayWrapper.py :: download() - Delaying by 3s ... GET https://httpbin.org/anything/one
2024-12-03 13:26:27,923 [ INFO] HttpClient.py :: download() - Requesting: GET https://httpbin.org/anything/one
2024-12-03 13:26:27,956 [DEBUG] connectionpool.py :: _new_conn() - Starting new HTTPS connection (1): httpbin.org:443
2024-12-03 13:26:29,256 [DEBUG] connectionpool.py :: _make_request() - https://httpbin.org:443 "GET /anything/one HTTP/11" 200 370
2024-12-03 13:26:29,257 [DEBUG] FileCacheWrapper.py :: _getCache() - Cache found: GET https://httpbin.org/anything/one
2024-12-03 13:26:29,263 [DEBUG] FileCacheWrapper.py :: _getCache() - Cache found: GET https://httpbin.org/anything/one
"""
3. Contribute
3.1. Install locally
Install pip/python.
Clone the project.
Create virtual env:
# Install virtualenv module
pip install --upgrade virtualenv
cd <PROJECT_ROOT>
# Create venv in your project
python -m venv venv
# Activate your virtual environment (Windows)
.\venv\Scripts\activate
+
# Activate your virtual environment (Linux)
chmod +x venv/bin/activate
source venv/bin/activate
Install project dependencies:
pip install -r requirements.txt
Install module locally as editable
pip install -e .
3.2. Testing
# Run test suite (Windows)
.\wtests.bat
# Run test suite (Linux)
./tests.sh
3.3. Release (automated)
Git add/commit/push to GitHub. The GitHub action will automatically publish the new version to PyPi.
3.4. Release (manual)
Install dependencies
pip install --upgrade setuptools wheel build twine
Build the package (wheel and sdist)
python -m build
Ensure .pypirc
in user folder is correct, then upload
python -m twine upload dist/*