Extract email addresses from given URL.
Project description
Extract emails from a given website
Requirements
Python >= 3.6
requests
selenium
Installation
pip install extract_emails
Usage
With default browsers
from extract_emails import EmailExtractor from extract_emails.browsers import ChromeBrowser with ChromeBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor from extract_emails.browsers import RequestsBrowser with RequestsBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
With custom browser
from extract_emails import EmailExtractor from extract_emails.browsers import BrowserInterface from selenium import webdriver from selenium.webdriver.firefox.options import Options class FirefoxBrowser(BrowserInterface): def __init__(self): ff_options = Options() self._driver = webdriver.Firefox( options=ff_options, executable_path="/home/di/geckodriver", ) def close(self): self._driver.quit() def get_page_source(self, url: str) -> str: self._driver.get(url) return self._driver.page_source with FirefoxBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
extract_emails-4.0.1.tar.gz
(13.7 kB
view hashes)
Built Distribution
Close
Hashes for extract_emails-4.0.1-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | e8050d7a3b25ed67465a4aefba229fd4d3cdb8baddb7ff0b4ea027fe00e8aa2e |
|
MD5 | 53dfcfa44cd569dcf2f889100f557c3f |
|
BLAKE2b-256 | 400aab2154a7c6237287187c74d58c9725b012a5e5d31fe576e9cc77004bf7c4 |