Extract email addresses from given URL.
Project description
Extract emails from a given website
Requirements
Python >= 3.6
requests
selenium
Installation
pip install extract_emails
Usage
With default browsers
from extract_emails import EmailExtractor from extract_emails.browsers import ChromeBrowser with ChromeBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor from extract_emails.browsers import RequestsBrowser with RequestsBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
With custom browser
from extract_emails import EmailExtractor from extract_emails.browsers import BrowserInterface from selenium import webdriver from selenium.webdriver.firefox.options import Options class FirefoxBrowser(BrowserInterface): def __init__(self): ff_options = Options() self._driver = webdriver.Firefox( options=ff_options, executable_path="/home/di/geckodriver", ) def close(self): self._driver.quit() def get_page_source(self, url: str) -> str: self._driver.get(url) return self._driver.page_source with FirefoxBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
extract_emails-4.0.3.tar.gz
(14.5 kB
view hashes)
Built Distribution
Close
Hashes for extract_emails-4.0.3-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c7c0f558134970f83ef1573338de468efff6b24c7846032d3e0ddffb9fcb3ace |
|
MD5 | 6506a87adecf9bb7ef5a810c0bdbef6f |
|
BLAKE2b-256 | b2aeab0304fc0985c89c16e3aa2f420c9fc20d09a3a483744adc866000fa5aa8 |