Extract email addresses from given URL.
Project description
Extract emails from a given website
Requirements
Python >= 3.6
requests
selenium
Installation
pip install extract_emails
Usage
With default browsers
from extract_emails import EmailExtractor from extract_emails.browsers import ChromeBrowser with ChromeBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor from extract_emails.browsers import RequestsBrowser with RequestsBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
With custom browser
from extract_emails import EmailExtractor from extract_emails.browsers import BrowserInterface from selenium import webdriver from selenium.webdriver.firefox.options import Options class FirefoxBrowser(BrowserInterface): def __init__(self): ff_options = Options() self._driver = webdriver.Firefox( options=ff_options, executable_path="/home/di/geckodriver", ) def close(self): self._driver.quit() def get_page_source(self, url: str) -> str: self._driver.get(url) return self._driver.page_source with FirefoxBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
extract_emails-4.1.0.tar.gz
(15.0 kB
view hashes)
Built Distribution
Close
Hashes for extract_emails-4.1.0-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 47f2edf4ca0733aa0c7e55847926af06ba18620e31910fd8ae02afb1c7484fc2 |
|
MD5 | 9e3785b2f50c2b5726edd229346317bb |
|
BLAKE2b-256 | 2361d64b6d31ea432edccc2062ff3549518c325b1428fdf28e467c7cb23cf900 |