Web-scraping/Automatted Firefox for Python
Project description
PyScrape provides selenium, requests and BeautifulSoup4. It was developed for web-scraping and automatted use of firefox.
Typical usage often looks like this:
# scrapeTest.py # routines for scraping websites using #--------------------- from debug import * import scrapewd debug(True) Google = [ \ {"name" : "Google", "action" : "URL", "path" : "https://www.google.co.uk"}, {"name" : "sign in", "action" : "CLICK", "path" : "html/body/div[1]/div[3]/div[1]/div[1]/div[1]/div[2]/div[3]/div/a"}, {"name" : "email", "action" : "SET", "value" : "EMAIL", "path" : "html/body/div[1]/div[2]/div[2]/form/input[10]"}, {"name" : "password", "action" : "SET", "value" : "PASSWORD", "path" : "html/body/div[1]/div[2]/div[2]/form/input[11]"}, {"name" : "submit", "action" : "CLICK", "path" : "html/body/div[1]/div[2]/div[2]/form/input[12]"}, {"name" : "OpenApps", "action" : "CLICK", "path" : "html/body/div[1]/div[3]/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/a"}, {"name" : "OpenMail", "action" : "CLICK", "path" : "html/body/div[1]/div[3]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]/ul[1]/li[7]/a/span[1]"}, {"name" : "GetRecentMail", "action" : "GET", "path" : " html/body/div[5]/div[2]/div/div[2]/div[1]/div[1]/div[1]/div[2]/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div/div/div[1]/span/a"}, {"name" : "OpenSignOut", "action" : "CLICK", "path" : "html/body/div[5]/div[2]/div/div[1]/div[3]/div[1]/div[1]/div[1]/div[2]/div[5]/div[1]/a/span"}, {"name" : "SignOut", "action" : "CLICK", "path" : "html/body/div[5]/div[2]/div/div[1]/div[3]/div[1]/div[1]/div[1]/div[2]/div[5]/div[2]/div[3]/div[2]/a"}] debug(True) scrapewd.close()