Skip to main content

A simple web scraper that reads gethomebase.com's schedule and updates Google Calendar.

Project description

├── README.md
├── __pycache__
│   └── config.cpython-312.pyc
├── events.db
├── requirements.txt
├── setup.py
└── src
    └── homebase_calendar_sync
        ├── __init__.py
        ├── __main__.py
        ├── __pycache__
        │   ├── config.cpython-312.pyc
        │   └── homebase_calendar_sync.cpython-312.pyc
        ├── config.py
        ├── db
        │   ├── __pycache__
        │   │   └── models.cpython-312.pyc
        │   └── models.py
        ├── google_client
        │   ├── __pycache__
        │   │   ├── auth.cpython-312.pyc
        │   │   ├── drive_types.cpython-312.pyc
        │   │   └── google_client.cpython-312.pyc
        │   ├── auth.py
        │   ├── drive_types.py
        │   └── google_client.py
        └── homebase_calendar_sync.py
from setuptools import setup, find_packages

setup(
    name='homebase_calendar_sync',
    version='0.1.0',
    author='David Midlo',
    author_email='dmidlo@gmail.com',
    description='A simple web scraper that reads gethomebase.com's schedule and updates google calendar.',
    long_description=open('README.md').read(),
    long_description_content_type='text/markdown',
    url='https://github.com/dmidlo/homebase_calendar_sync',  # Update this to your project's URL
    packages=find_packages(),
    install_requires=open('requirements.txt').read().splitlines(),
    classifiers=[
        'Programming Language :: Python :: 3',
        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
    ],
    python_requires='>=3.6',
)
import os
import json
import httpx
from bs4 import BeautifulSoup
import pendulum
from pathlib import Path
from dotenv import load_dotenv
from rich import print
import hashlib

import config
from db.models import setup_database, connect_database
from google_client.auth import Metadata
from google_client.google_client import GoogleClient

DOTENV_BASE_DIR = Path(__file__).parent.parent.parent
load_dotenv(Path(DOTENV_BASE_DIR, ".env"))

HOMEBASE_USERNAME = os.environ["CC_HOMEBASE_USERNAME"]
HOMEBASE_PASSWORD = os.environ["CC_HOMEBASE_PASSWORD"]
EMPLOYEE_FIRSTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_FIRSTNAME"]
EMPLOYEE_LASTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_LASTNAME"]
START_DATE = os.environ["CC_HOMEBASE_START_DATE"]
END_DATE = os.environ["CC_HOMEBASE_END_DATE"]
LOOKAHEAD = os.environ["CC_HOMEBASE_LOOKAHEAD"]
LOOKAHEAD = LOOKAHEAD.lower() in ["true", "1", "t", "y", "yes"]
LOOKAHEAD_DAYS = os.environ["CC_HOMEBASE_DAYS_LOOKAHEAD"]
LOOKAHEAD_DAYS = int(LOOKAHEAD_DAYS)


class HomebaseScheduleScraper:
    def __init__(
        self, username, password, first_name, last_name, start_date, end_date
    ) -> None:
        self.username = username
        self.password = password
        self.start_date, self.end_date = self.initialize_date_range(
            start_date, end_date
        )
        self.login_url = "https://app.joinhomebase.com/accounts/sign-in"
        self.base_schedule_url = (
            "https://app.joinhomebase.com/api/fe/schedule_builder/schedule?"
        )
        self.client = httpx.Client()
        self.login_payload = {
            "authenticity_token": self.get_authenticity_token(),
            "account[login]": username,
            "account[password]": password,
            "account[remember_me]": 0,
        }
        self.login()
        self.calendar_json = json.loads(self.get_calendar_json())
        self.employee_first_name = first_name
        self.employee_last_name = last_name
        self.employee_id = self.get_employee_id()
        self.employee_jobs = self.get_employee_jobs()
        self.employee_shifts = self.get_employee_shifts()
        self.employee_shifts_in_range = self.filter_shifts_by_date()
        self.close()

    def close(self):
        self.client.close()

    def get_login_form(self):
        response = self.client.get(self.login_url)

        if response.status_code == 200:
            html_content = BeautifulSoup(response.text, "html.parser")

            return html_content.find("form", method="post")
        else:
            print(f"Failed to retrieve the page. Status Code: {response.status_code}")

    def get_authenticity_token(self):
        login_form = self.get_login_form()
        if login_form:
            input_element = login_form.find(
                "input", attrs={"name": "authenticity_token", "type": "hidden"}
            )
            return input_element.get("value")
        else:
            print("No input element with `name='authenticity_token'` found.")

    def login(self):
        response = self.client.post(self.login_url, data=self.login_payload)

        if response.status_code == 200:
            print(f"Homebase Login Successful. Status Code: {response.status_code}")
        else:
            print(f"Homebase Login failed. Status Code: {response.status_code}")

    def get_schedule_route(self):
        route = f"{self.base_schedule_url}end_date={self.end_date.to_date_string()}&start_date={self.start_date.to_date_string()}"
        print(route)
        return route

    def get_calendar_json(self):
        response = self.client.get(self.get_schedule_route())

        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve the page. Status Code: {response.status_code}")

    def get_employee_id(self):
        for _ in self.calendar_json["included"]:
            if _["type"] == "user" and (
                str(_["attributes"]["firstName"]).lower()
                == self.employee_first_name.lower()
                and str(_["attributes"]["lastName"]).lower()
                == self.employee_last_name.lower()
            ):
                return _["id"]

    def get_employee_jobs(self):
        return [
            _["id"]
            for _ in self.calendar_json["included"]
            if _["type"] == "job"
            and _["relationships"]["user"]["data"]["id"] == self.employee_id
        ]

    def get_employee_shifts(self):
        return (
            _
            for _ in self.calendar_json["included"]
            if _["type"] == "shift"
            and _["relationships"]["owner"]["data"]["id"] in self.employee_jobs
        )

    def initialize_date_range(self, start_date, end_date):
        if start_date == "today":
            start = pendulum.now().start_of("day")
        else:
            start = pendulum.parse(start_date).start_of("day")
        if end_date == "today":
            end = pendulum.now().end_of("day")
        else:
            end = pendulum.parse(end_date).end_of("day")

        if LOOKAHEAD:
            start = start.start_of("week")
            end = end.add(days=LOOKAHEAD_DAYS).end_of("week")

        return start, end

    def filter_shifts_by_date(self):
        return (
            _
            for _ in self.employee_shifts
            if self.start_date
            <= pendulum.parse(_["attributes"]["startAt"])
            <= self.end_date
        )

    def get_employee_shifts_json(self):
        shifts = []

        for _ in self.employee_shifts_in_range:
            shift = {
                "shiftId": _["id"],
                "firstName": self.employee_first_name,
                "lastName": self.employee_last_name,
                "jobRole": _["attributes"]["roleName"],
                "shiftDate": pendulum.parse(
                    _["attributes"]["startAt"]
                ).to_date_string(),
                "startTime": pendulum.parse(
                    _["attributes"]["startAt"]
                ).to_time_string(),
                "endTime": pendulum.parse(_["attributes"]["endAt"]).to_time_string(),
            }

            shifts.append(shift)

        return json.dumps(shifts)


class HomebaseCalendarSync:
    def __init__(self) -> None:
        config.META = Metadata.metadata_singleton_factory()
        config.META.check_for_client_secret_and_import()
        config.GOOGLE = GoogleClient()
        setup_database()
        self.scraper = HomebaseScheduleScraper(
            HOMEBASE_USERNAME,
            HOMEBASE_PASSWORD,
            EMPLOYEE_FIRSTNAME,
            EMPLOYEE_LASTNAME,
            START_DATE,
            END_DATE,
        )
        self.primary_calendar = config.GOOGLE.get_primary_calendar()
        self.primary_calendar_events = config.GOOGLE.get_calendar_events(
            self.primary_calendar["id"]
        )
        self.remote_homebase_shifts = json.loads(
            self.scraper.get_employee_shifts_json()
        )

    def __call__(self):
        self.update_events_db_from_remote()
        self.add_homebase_shifts()
        config.DB.close()

    def get_event_hash(self, event: dict) -> str:
        event_str = json.dumps(event, sort_keys=True)
        return hashlib.sha512(event_str.encode("utf-8")).hexdigest()

    def update_events_db_from_remote(self):
        connect_database()
        remote_events = set()

        for event in self.primary_calendar_events:
            event_id = event["id"]
            event_hash = self.get_event_hash(event)
            remote_events.add(event_id)
            from_homebase = 0  # 0/1 - False/True
            homebase_shift_id = None

            homebase_event = event.get("source")
            if homebase_event:
                shift_id_source = homebase_event["title"].split("-")

                if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
                    homebase_shift_id = shift_id_source[1]
                    from_homebase = 1

            config.DB_CURSOR.execute(
                "SELECT hash FROM events WHERE event_id = ?", (event_id,)
            )
            row = config.DB_CURSOR.fetchone()

            if row is None:
                config.DB_CURSOR.execute(
                    "INSERT INTO events (event_id, hash, from_homebase, homebase_shift_id) VALUES (?, ?, ?, ?)",
                    (event_id, event_hash, from_homebase, homebase_shift_id),
                )
                # print(f"New event added: {event_id}")
            elif row[0] != event_hash:
                config.DB_CURSOR.execute(
                    "UPDATE events SET hash = ? WHERE event_id = ?",
                    (event_hash, event_id),
                )
                print(f"Event updated: {event_id}")
            config.DB.commit()

        # Prune Local Events to match remote
        config.DB_CURSOR.execute("SELECT event_id FROM events")
        local_events = {row[0] for row in config.DB_CURSOR.fetchall()}
        events_to_delete = local_events - remote_events
        for event_id in events_to_delete:
            config.DB_CURSOR.execute(
                "DELETE FROM events WHERE event_id = ?", (event_id,)
            )
            print(f"Event deleted: {event_id}")

        config.DB.commit()

    def get_homebase_events(self) -> set:
        homebase_events = set()

        for _ in self.primary_calendar_events:
            if _.get("source"):
                shift_id_source = _["source"]["title"].split("-")

                if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
                    homebase_events.add(shift_id_source[1])
        return homebase_events

    def add_homebase_shifts(self):
        connect_database()
        remote_shifts = {_["shiftId"] for _ in self.remote_homebase_shifts}
        homebase_events = self.get_homebase_events()

        for shift in self.remote_homebase_shifts:
            shift_hash = self.get_event_hash(shift)
            config.DB_CURSOR.execute(
                "SELECT hash FROM shifts WHERE homebase_shift_id = ?",
                (shift["shiftId"],),
            )
            row = config.DB_CURSOR.fetchone()

            local_time = pendulum.now()
            start = pendulum.parse(
                f"{shift["shiftDate"]} {shift["startTime"]}",
                tz=local_time.timezone_name,
            )
            end = pendulum.parse(
                f"{shift["shiftDate"]} {shift["endTime"]}", tz=local_time.timezone_name
            )
            event = {
                "summary": f"Homebase - {shift["jobRole"]}",
                "description": f"{shift["firstName"]} {shift["lastName"]}",
                "start": {
                    "dateTime": start.to_iso8601_string(),
                    "timeZone": local_time.timezone_name,
                },
                "end": {
                    "dateTime": end.to_iso8601_string(),
                    "timeZone": local_time.timezone_name,
                },
                "source": {
                    "title": f"homebaseShiftId-{shift["shiftId"]}",
                    "url": "https://app.joinhomebase.com/",
                },
            }

            if row is None:
                config.DB_CURSOR.execute(
                    "INSERT INTO shifts (homebase_shift_id, hash) VALUES (?, ?)",
                    (shift["shiftId"], shift_hash),
                )
                print(f"New shift added: {shift_hash}")
                config.DB.commit()

                config.DB_CURSOR.execute(
                    "SELECT hash FROM events WHERE homebase_shift_id = ?",
                    (shift["shiftId"],),
                )
                row = config.DB_CURSOR.fetchone()

                if row is None and shift["shiftId"] not in homebase_events:
                    config.GOOGLE.create_new_event(self.primary_calendar["id"], event)

            elif row[0] != shift_hash:
                config.DB_CURSOR.execute(
                    "UPDATE shifts SET hash = ? WHERE homebase_shift_id = ?",
                    (shift_hash, shift["shiftId"]),
                )
                print(f"Shift updated: {shift_hash}")
                # TODO: for CRUD operations, this is where integration code for UPDATES to
                # TODO  homebase's shift times would be processed.
                config.DB.commit()
            else:
                if shift["shiftId"] not in homebase_events:
                    config.GOOGLE.create_new_event(self.primary_calendar["id"], event)

            # Prune Local Events to match remote
            config.DB_CURSOR.execute("SELECT homebase_shift_id FROM shifts")
            local_shifts = {row[0] for row in config.DB_CURSOR.fetchall()}
            shifts_to_delete = local_shifts - remote_shifts
            for event_id in shifts_to_delete:
                config.DB_CURSOR.execute(
                    "DELETE FROM shifts WHERE homebase_shift_id = ?", (event_id,)
                )
                print(f"Shift deleted: {event_id}")

        config.DB.commit()
        self.update_events_db_from_remote()


def main():
    sync = HomebaseCalendarSync()
    sync()


if __name__ == "__main__":
    main()

====================

complete my setup.py file with an entrypoint on def main and use twine and keyring for deployment

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

homebase_calendar_sync-0.1.0.tar.gz (7.3 kB view details)

Uploaded Source

Built Distribution

homebase_calendar_sync-0.1.0-py3-none-any.whl (9.7 kB view details)

Uploaded Python 3

File details

Details for the file homebase_calendar_sync-0.1.0.tar.gz.

File metadata

File hashes

Hashes for homebase_calendar_sync-0.1.0.tar.gz
Algorithm Hash digest
SHA256 23744f0bca2842845df5bc89b8d854f62500ab5f4c889c1e67709bc8174678a4
MD5 522d95dcf98571f3f041f059b4b97eee
BLAKE2b-256 210fa54bb30367c0d77002c127329b4b6aa4fd4c65fe22bdde2044d7165d8916

See more details on using hashes here.

File details

Details for the file homebase_calendar_sync-0.1.0-py3-none-any.whl.

File metadata

File hashes

Hashes for homebase_calendar_sync-0.1.0-py3-none-any.whl
Algorithm Hash digest
SHA256 dff6c61c0864b7889a327f7c2c80f2fcdc62e9fde763ffc36c50f3b512f03b4c
MD5 985bc90e70d414e31a5980848b88fa8e
BLAKE2b-256 03aaf83047545ab47b3378f8ad2870fe98a61a56baf882485cdd2233afaaf686

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page