A simple web scraper that reads gethomebase.com's schedule and updates Google Calendar.
Project description
├── README.md
├── __pycache__
│ └── config.cpython-312.pyc
├── events.db
├── requirements.txt
├── setup.py
└── src
└── homebase_calendar_sync
├── __init__.py
├── __main__.py
├── __pycache__
│ ├── config.cpython-312.pyc
│ └── homebase_calendar_sync.cpython-312.pyc
├── config.py
├── db
│ ├── __pycache__
│ │ └── models.cpython-312.pyc
│ └── models.py
├── google_client
│ ├── __pycache__
│ │ ├── auth.cpython-312.pyc
│ │ ├── drive_types.cpython-312.pyc
│ │ └── google_client.cpython-312.pyc
│ ├── auth.py
│ ├── drive_types.py
│ └── google_client.py
└── homebase_calendar_sync.py
from setuptools import setup, find_packages
setup(
name='homebase_calendar_sync',
version='0.1.0',
author='David Midlo',
author_email='dmidlo@gmail.com',
description='A simple web scraper that reads gethomebase.com's schedule and updates google calendar.',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://github.com/dmidlo/homebase_calendar_sync', # Update this to your project's URL
packages=find_packages(),
install_requires=open('requirements.txt').read().splitlines(),
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
)
import os
import json
import httpx
from bs4 import BeautifulSoup
import pendulum
from pathlib import Path
from dotenv import load_dotenv
from rich import print
import hashlib
import config
from db.models import setup_database, connect_database
from google_client.auth import Metadata
from google_client.google_client import GoogleClient
DOTENV_BASE_DIR = Path(__file__).parent.parent.parent
load_dotenv(Path(DOTENV_BASE_DIR, ".env"))
HOMEBASE_USERNAME = os.environ["CC_HOMEBASE_USERNAME"]
HOMEBASE_PASSWORD = os.environ["CC_HOMEBASE_PASSWORD"]
EMPLOYEE_FIRSTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_FIRSTNAME"]
EMPLOYEE_LASTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_LASTNAME"]
START_DATE = os.environ["CC_HOMEBASE_START_DATE"]
END_DATE = os.environ["CC_HOMEBASE_END_DATE"]
LOOKAHEAD = os.environ["CC_HOMEBASE_LOOKAHEAD"]
LOOKAHEAD = LOOKAHEAD.lower() in ["true", "1", "t", "y", "yes"]
LOOKAHEAD_DAYS = os.environ["CC_HOMEBASE_DAYS_LOOKAHEAD"]
LOOKAHEAD_DAYS = int(LOOKAHEAD_DAYS)
class HomebaseScheduleScraper:
def __init__(
self, username, password, first_name, last_name, start_date, end_date
) -> None:
self.username = username
self.password = password
self.start_date, self.end_date = self.initialize_date_range(
start_date, end_date
)
self.login_url = "https://app.joinhomebase.com/accounts/sign-in"
self.base_schedule_url = (
"https://app.joinhomebase.com/api/fe/schedule_builder/schedule?"
)
self.client = httpx.Client()
self.login_payload = {
"authenticity_token": self.get_authenticity_token(),
"account[login]": username,
"account[password]": password,
"account[remember_me]": 0,
}
self.login()
self.calendar_json = json.loads(self.get_calendar_json())
self.employee_first_name = first_name
self.employee_last_name = last_name
self.employee_id = self.get_employee_id()
self.employee_jobs = self.get_employee_jobs()
self.employee_shifts = self.get_employee_shifts()
self.employee_shifts_in_range = self.filter_shifts_by_date()
self.close()
def close(self):
self.client.close()
def get_login_form(self):
response = self.client.get(self.login_url)
if response.status_code == 200:
html_content = BeautifulSoup(response.text, "html.parser")
return html_content.find("form", method="post")
else:
print(f"Failed to retrieve the page. Status Code: {response.status_code}")
def get_authenticity_token(self):
login_form = self.get_login_form()
if login_form:
input_element = login_form.find(
"input", attrs={"name": "authenticity_token", "type": "hidden"}
)
return input_element.get("value")
else:
print("No input element with `name='authenticity_token'` found.")
def login(self):
response = self.client.post(self.login_url, data=self.login_payload)
if response.status_code == 200:
print(f"Homebase Login Successful. Status Code: {response.status_code}")
else:
print(f"Homebase Login failed. Status Code: {response.status_code}")
def get_schedule_route(self):
route = f"{self.base_schedule_url}end_date={self.end_date.to_date_string()}&start_date={self.start_date.to_date_string()}"
print(route)
return route
def get_calendar_json(self):
response = self.client.get(self.get_schedule_route())
if response.status_code == 200:
return response.text
else:
print(f"Failed to retrieve the page. Status Code: {response.status_code}")
def get_employee_id(self):
for _ in self.calendar_json["included"]:
if _["type"] == "user" and (
str(_["attributes"]["firstName"]).lower()
== self.employee_first_name.lower()
and str(_["attributes"]["lastName"]).lower()
== self.employee_last_name.lower()
):
return _["id"]
def get_employee_jobs(self):
return [
_["id"]
for _ in self.calendar_json["included"]
if _["type"] == "job"
and _["relationships"]["user"]["data"]["id"] == self.employee_id
]
def get_employee_shifts(self):
return (
_
for _ in self.calendar_json["included"]
if _["type"] == "shift"
and _["relationships"]["owner"]["data"]["id"] in self.employee_jobs
)
def initialize_date_range(self, start_date, end_date):
if start_date == "today":
start = pendulum.now().start_of("day")
else:
start = pendulum.parse(start_date).start_of("day")
if end_date == "today":
end = pendulum.now().end_of("day")
else:
end = pendulum.parse(end_date).end_of("day")
if LOOKAHEAD:
start = start.start_of("week")
end = end.add(days=LOOKAHEAD_DAYS).end_of("week")
return start, end
def filter_shifts_by_date(self):
return (
_
for _ in self.employee_shifts
if self.start_date
<= pendulum.parse(_["attributes"]["startAt"])
<= self.end_date
)
def get_employee_shifts_json(self):
shifts = []
for _ in self.employee_shifts_in_range:
shift = {
"shiftId": _["id"],
"firstName": self.employee_first_name,
"lastName": self.employee_last_name,
"jobRole": _["attributes"]["roleName"],
"shiftDate": pendulum.parse(
_["attributes"]["startAt"]
).to_date_string(),
"startTime": pendulum.parse(
_["attributes"]["startAt"]
).to_time_string(),
"endTime": pendulum.parse(_["attributes"]["endAt"]).to_time_string(),
}
shifts.append(shift)
return json.dumps(shifts)
class HomebaseCalendarSync:
def __init__(self) -> None:
config.META = Metadata.metadata_singleton_factory()
config.META.check_for_client_secret_and_import()
config.GOOGLE = GoogleClient()
setup_database()
self.scraper = HomebaseScheduleScraper(
HOMEBASE_USERNAME,
HOMEBASE_PASSWORD,
EMPLOYEE_FIRSTNAME,
EMPLOYEE_LASTNAME,
START_DATE,
END_DATE,
)
self.primary_calendar = config.GOOGLE.get_primary_calendar()
self.primary_calendar_events = config.GOOGLE.get_calendar_events(
self.primary_calendar["id"]
)
self.remote_homebase_shifts = json.loads(
self.scraper.get_employee_shifts_json()
)
def __call__(self):
self.update_events_db_from_remote()
self.add_homebase_shifts()
config.DB.close()
def get_event_hash(self, event: dict) -> str:
event_str = json.dumps(event, sort_keys=True)
return hashlib.sha512(event_str.encode("utf-8")).hexdigest()
def update_events_db_from_remote(self):
connect_database()
remote_events = set()
for event in self.primary_calendar_events:
event_id = event["id"]
event_hash = self.get_event_hash(event)
remote_events.add(event_id)
from_homebase = 0 # 0/1 - False/True
homebase_shift_id = None
homebase_event = event.get("source")
if homebase_event:
shift_id_source = homebase_event["title"].split("-")
if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
homebase_shift_id = shift_id_source[1]
from_homebase = 1
config.DB_CURSOR.execute(
"SELECT hash FROM events WHERE event_id = ?", (event_id,)
)
row = config.DB_CURSOR.fetchone()
if row is None:
config.DB_CURSOR.execute(
"INSERT INTO events (event_id, hash, from_homebase, homebase_shift_id) VALUES (?, ?, ?, ?)",
(event_id, event_hash, from_homebase, homebase_shift_id),
)
# print(f"New event added: {event_id}")
elif row[0] != event_hash:
config.DB_CURSOR.execute(
"UPDATE events SET hash = ? WHERE event_id = ?",
(event_hash, event_id),
)
print(f"Event updated: {event_id}")
config.DB.commit()
# Prune Local Events to match remote
config.DB_CURSOR.execute("SELECT event_id FROM events")
local_events = {row[0] for row in config.DB_CURSOR.fetchall()}
events_to_delete = local_events - remote_events
for event_id in events_to_delete:
config.DB_CURSOR.execute(
"DELETE FROM events WHERE event_id = ?", (event_id,)
)
print(f"Event deleted: {event_id}")
config.DB.commit()
def get_homebase_events(self) -> set:
homebase_events = set()
for _ in self.primary_calendar_events:
if _.get("source"):
shift_id_source = _["source"]["title"].split("-")
if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
homebase_events.add(shift_id_source[1])
return homebase_events
def add_homebase_shifts(self):
connect_database()
remote_shifts = {_["shiftId"] for _ in self.remote_homebase_shifts}
homebase_events = self.get_homebase_events()
for shift in self.remote_homebase_shifts:
shift_hash = self.get_event_hash(shift)
config.DB_CURSOR.execute(
"SELECT hash FROM shifts WHERE homebase_shift_id = ?",
(shift["shiftId"],),
)
row = config.DB_CURSOR.fetchone()
local_time = pendulum.now()
start = pendulum.parse(
f"{shift["shiftDate"]} {shift["startTime"]}",
tz=local_time.timezone_name,
)
end = pendulum.parse(
f"{shift["shiftDate"]} {shift["endTime"]}", tz=local_time.timezone_name
)
event = {
"summary": f"Homebase - {shift["jobRole"]}",
"description": f"{shift["firstName"]} {shift["lastName"]}",
"start": {
"dateTime": start.to_iso8601_string(),
"timeZone": local_time.timezone_name,
},
"end": {
"dateTime": end.to_iso8601_string(),
"timeZone": local_time.timezone_name,
},
"source": {
"title": f"homebaseShiftId-{shift["shiftId"]}",
"url": "https://app.joinhomebase.com/",
},
}
if row is None:
config.DB_CURSOR.execute(
"INSERT INTO shifts (homebase_shift_id, hash) VALUES (?, ?)",
(shift["shiftId"], shift_hash),
)
print(f"New shift added: {shift_hash}")
config.DB.commit()
config.DB_CURSOR.execute(
"SELECT hash FROM events WHERE homebase_shift_id = ?",
(shift["shiftId"],),
)
row = config.DB_CURSOR.fetchone()
if row is None and shift["shiftId"] not in homebase_events:
config.GOOGLE.create_new_event(self.primary_calendar["id"], event)
elif row[0] != shift_hash:
config.DB_CURSOR.execute(
"UPDATE shifts SET hash = ? WHERE homebase_shift_id = ?",
(shift_hash, shift["shiftId"]),
)
print(f"Shift updated: {shift_hash}")
# TODO: for CRUD operations, this is where integration code for UPDATES to
# TODO homebase's shift times would be processed.
config.DB.commit()
else:
if shift["shiftId"] not in homebase_events:
config.GOOGLE.create_new_event(self.primary_calendar["id"], event)
# Prune Local Events to match remote
config.DB_CURSOR.execute("SELECT homebase_shift_id FROM shifts")
local_shifts = {row[0] for row in config.DB_CURSOR.fetchall()}
shifts_to_delete = local_shifts - remote_shifts
for event_id in shifts_to_delete:
config.DB_CURSOR.execute(
"DELETE FROM shifts WHERE homebase_shift_id = ?", (event_id,)
)
print(f"Shift deleted: {event_id}")
config.DB.commit()
self.update_events_db_from_remote()
def main():
sync = HomebaseCalendarSync()
sync()
if __name__ == "__main__":
main()
====================
complete my setup.py file with an entrypoint on def main
and use twine and keyring for deployment
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
File details
Details for the file homebase_calendar_sync-0.1.0.tar.gz
.
File metadata
- Download URL: homebase_calendar_sync-0.1.0.tar.gz
- Upload date:
- Size: 7.3 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.1.0 CPython/3.12.3
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 23744f0bca2842845df5bc89b8d854f62500ab5f4c889c1e67709bc8174678a4 |
|
MD5 | 522d95dcf98571f3f041f059b4b97eee |
|
BLAKE2b-256 | 210fa54bb30367c0d77002c127329b4b6aa4fd4c65fe22bdde2044d7165d8916 |
File details
Details for the file homebase_calendar_sync-0.1.0-py3-none-any.whl
.
File metadata
- Download URL: homebase_calendar_sync-0.1.0-py3-none-any.whl
- Upload date:
- Size: 9.7 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.1.0 CPython/3.12.3
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | dff6c61c0864b7889a327f7c2c80f2fcdc62e9fde763ffc36c50f3b512f03b4c |
|
MD5 | 985bc90e70d414e31a5980848b88fa8e |
|
BLAKE2b-256 | 03aaf83047545ab47b3378f8ad2870fe98a61a56baf882485cdd2233afaaf686 |