filter-stations

Making it easier to navigate and clean station data

These details have not been verified by PyPI

Project links

Homepage

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Project description

Documentation

https://filterstations.netlify.app/

Water Level Pipeline

A series of functions to be added to the filter-stations module in pypi to evalute which TAHMO stations to use that corroborates with the water level
All begins with the coordinates of the gauging station(location of the monitoring sensor)

import os
from pathlib import Path
import haversine as hs
import pandas as pd
import numpy as np
import datetime
import statsmodels.api as sm
from matplotlib.dates import DateFormatter
import matplotlib.pyplot as plt

# config_path
config_path = os.path.join(Path(os.getcwd()).parent.absolute(), 'config.json')

from filter_stations import retreive_data, Interactive_maps, Filter
import json
# Authentication
with open(config_path) as f:
    conf = json.load(f)

apiKey = conf['apiKey']
apiSecret = conf['apiSecret']
fs = retreive_data(apiKey, apiSecret)

# given the radius and the longitude and latitude of the gauging station, return the stations within
def stations_within_radius(radius, latitude, longitude, df=False):
    stations  = fs.get_stations_info()
    stations['distance'] = stations.apply(lambda row: hs.haversine((latitude, longitude), (row['location.latitude'], row['location.longitude'])), axis=1)
    infostations = stations[['code', 'location.latitude','location.longitude', 'distance']].sort_values('distance')
    if df:
        return infostations[infostations['distance'] <= radius]
    else:
        return infostations[infostations['distance'] <= radius].code.values

ewaso = stations_within_radius(100, -0.406689, 36.96301)
ewaso

API request: services/assets/v2/stations





array(['TA00283', 'TA00378', 'TA00754', 'TA00074', 'TA00196', 'TA00073',
       'TA00056', 'TA00029', 'TA00416', 'TA00719', 'TA00258', 'TA00622',
       'TA00028', 'TA00414', 'TA00190', 'TA00078', 'TA00024', 'TA00080',
       'TA00166', 'TA00108', 'TA00026', 'TA00189', 'TA00250', 'TA00182',
       'TA00715', 'TA00377', 'TA00027', 'TA00057', 'TA00134', 'TA00448',
       'TA00774', 'TA00773', 'TA00772', 'TA00775', 'TA00771', 'TA00679',
       'TA00770'], dtype=object)

The assumption here is one can have credential but not the data

From the list of stations get the precipitation data with a certain data completeness check provided
Additionally the start and end date if the data is not provided
The default start date is the day the sensors were set up at DSAIL
Chck the documentation on the types of variables available

def stations_data_check(stations_list, percentage=1, start_date=None, end_date=None, data=None, variables=['pr'], csv_file=None):
    if data is None:
        data = fs.multiple_measurements(stations_list, startDate=start_date, endDate=end_date, variables=variables, csv_file=csv_file)

    # Check the percentage of missing data and return the stations with less than the percentage of missing data
    data.index = data.index.astype('datetime64[ns]')
    data = data.dropna(axis=1, thresh=int(len(data) * percentage))
    data.to_csv(f'{csv_file}.csv')
    return data

stations_df = stations_data_check(list(ewaso), start_date='2022-12-01', end_date='2022-12-31', variables=['pr'], csv_file='ewaso2.csv')

Apart from the completeness another method of validation by eliminating unusable sensors is checking for a positive correlation and lag

The default lag is 3 days between a particular station and the gauging station
The required format is a timeseries data
Provide the column names for evaluation format = [Date, data]
with the change in parameters one can choose above or below threshold

def stations_lag(weather_stations_df, gauging_stations_df, gauging_station_columns, date=None, lag=3, above=False, below=False):
    
    
    # set the date as axis
    # weather_station_df = weather_stations_df.set_index('Date')
    # weather_stations_df.Date= weather_stations_df.Date.apply(pd.to_datetime,dayfirst = True)
    # weather_stations_df = weather_stations_df.set_index('Date')
    # get the starting date of the gauging station the first value
    if date is None:
        date = gauging_stations_df.loc[0, gauging_station_columns[0]]
    start_date = datetime.datetime.strptime(date, "%d/%m/%Y")
    end_date = start_date + datetime.timedelta(len(gauging_stations_df)-1)
    # get the ddataframe from start date to end date
    df_fit = weather_stations_df[start_date:end_date]
    # get the water data list
    water_list = list(gauging_stations_df[f'{gauging_station_columns[1]}'])
    above_thresh_lag = dict()
    below_thresh_lag = dict()
    # get the lag for every column against the water data 
    for cols in df_fit.columns:
        select_list = list(df_fit[cols])
        coefficient_list = list(sm.tsa.stattools.ccf(select_list,water_list, adjusted=False))
        a = np.argmax(coefficient_list)
        b = coefficient_list[a]
        if a > lag:
            above_thresh_lag[cols] = {
                'lag': a,
                'coefficient': b,
                'coefficient_list': coefficient_list,
                'select_list': select_list,
                'water_list' : water_list
            }
        else:
            below_thresh_lag[cols] = {
                'lag': a,
                'coefficient': b,
                'coefficient_list': coefficient_list,
                'select_list': select_list,
                'water_list' : water_list
            }
    if above and below:
        return above_thresh_lag, below_thresh_lag
    elif above:
        return above_thresh_lag
    elif below:
        return below_thresh_lag

water_six = pd.read_csv('./water-level-data-ewaso/1E2020.csv')
water_six

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	time	water_level(m)
0	12/05/2020	2.618646
1	13/05/2020	2.551392
2	14/05/2020	2.507711
3	15/05/2020	2.491130
4	16/05/2020	2.434761
...	...	...
259	26/01/2021	0.947099
260	27/01/2021	0.929186
261	28/01/2021	0.911274
262	29/01/2021	0.910711
263	30/01/2021	0.939971

264 rows Ã— 2 columns

lag_ = stations_lag(stations_df, water_six, ['time', 'water_level(m)'], lag=3,below=True)
lag_

Plotting

Provides visuals of the data

An option to save the
An option of choosing the dpi
provide the startDate based on the water collection starting date

lag_[list(lag_.keys())[0]]['water_list']

import warnings
warnings. filterwarnings('ignore')

def plot_figs(weather_stations, water_list, threshold_list, save=False, dpi=500, date='11-02-2021'):
    start_date = datetime.datetime.strptime(date, "%d-%m-%Y")
    end_date = start_date + datetime.timedelta(len(water_list)-1)
    # weather_stations = weather_stations.set_index('Date')
    df_plot = weather_stations[start_date:end_date]
    df_plot = df_plot[threshold_list].reset_index()
    df_plot.rename(columns={'index':'Date'}, inplace=True)
    
    
    plt.rcParams['figure.figsize'] = (15, 9)
    print('Begin plotting!')
    
    for cols in df_plot.columns[1:]:
        fig, ax1 = plt.subplots()
        color = 'tab:blue'
        ax1.set_xlabel(f'Time', fontsize=24, weight='bold')
        ax1.set_ylabel(f'Rainfall {cols} (mm)', color=color, fontsize=24, weight='bold')
        ax1.bar(pd.to_datetime(df_plot['Date'], format="%d/%m/%Y"), df_plot[f'{cols}'], color=color, width=4, alpha=1.0)
        ax1.tick_params(axis='y', labelcolor=color, labelsize=24)
        ax1.tick_params(axis='x')
        ax1.set_xticklabels(df_plot['Date'], fontsize=21, weight='bold')
        ax1.grid(color='gray', linestyle='--', linewidth=0.8)
        ax1.set(facecolor="white")
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:red'
        ax2.set_ylabel('Water level/Stage (m)', color=color, fontsize=24, weight='bold')
        ax2.plot(pd.to_datetime(df_plot['Date'], format="%d/%m/%Y"), water_list, color=color, linewidth=4)
        ax2.tick_params(axis='y', labelcolor=color, labelsize=24)
        ax2.set(facecolor="white")
        plt.title('Stage and Rainfall against Time', fontsize=22, weight='bold')

        date_form = DateFormatter("%m-%y")
        ax1.xaxis.set_major_formatter(date_form)
        fig.tight_layout()

        if save:
            fig.savefig(f'{cols}.png', dpi=dpi)

plot_figs(stations_df, lag_[list(lag_.keys())[0]]['water_list'], list(lag_.keys()), save=True, date='12-05-2020')

Begin plotting!

water_level_pipeline_16_1

water_level_pipeline_16_2

water_level_pipeline_16_3

water_level_pipeline_16_4

water_level_pipeline_16_5

water_level_pipeline_16_6

water_level_pipeline_16_7

water_level_pipeline_16_8

water_level_pipeline_16_9

Format to get the stations maetadata

def filter_metadata(lag_keys):
    captured_list = [i.split('_')[0] for i in list(lag_keys)]
    return fs.get_stations_info(multipleStations=captured_list)

filter_metadata(list(lag_.keys()))

API request: services/assets/v2/stations

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	code	status	installationdate	elevationground	sensorinstallations	dataloggerinstallations	creatorid	created	updaterid	updated	...	location.countrycode	location.latitude	location.longitude	location.elevationmsl	location.note	location.creatorid	location.created	location.updaterid	location.updated
26	TA00028	1	2015-08-31T00:00:00Z	9.0	None	None	2	2018-12-11T08:35:17.888233Z	2	2018-12-11T08:35:17.888233Z	...	KE	0.055219	37.136747	2003.6	{}	2	2018-10-26T13:32:16.15537Z	37	2022-06-30T11:11:50.27135Z
27	TA00029	1	2015-09-02T00:00:00Z	2.0	None	None	2	2018-12-11T08:36:19.30342Z	2	2018-12-11T08:36:19.30342Z	...	KE	-0.500776	36.587511	2545.8	{}	2	2018-10-26T13:33:31.451613Z	37	2022-02-28T12:25:09.578242Z
53	TA00057	1	2015-10-08T00:00:00Z	2.0	None	None	2	2018-12-11T09:21:29.092833Z	2	2018-12-11T09:21:29.092833Z	...	KE	-1.253030	36.856487	1645.3	{}	2	2018-10-29T09:13:33.768613Z	2	2022-07-26T07:34:06.603938Z
68	TA00074	1	2015-11-19T00:00:00Z	2.0	None	None	2	2018-12-11T09:38:25.742397Z	2	2018-12-11T09:38:25.742397Z	...	KE	-0.566080	37.074412	1726.8	{}	2	2018-10-29T10:35:28.49617Z	2	2022-07-26T07:38:42.100985Z
74	TA00080	1	2016-01-28T00:00:00Z	2.0	None	None	2	2018-12-11T09:43:10.523398Z	2	2018-12-11T09:43:10.523398Z	...	KE	-1.087589	36.818402	1777.3	{}	2	2018-10-29T10:53:47.845042Z	37	2022-02-28T13:07:04.709903Z
150	TA00166	1	2017-05-11T00:00:00Z	2.0	None	None	2	2018-12-12T08:29:28.10697Z	2	2018-12-12T08:29:28.10697Z	...	KE	-0.319508	37.659139	1404.0	{}	2	2018-11-10T08:47:37.949135Z	2	2018-11-10T08:47:37.949135Z

6 rows Ã— 28 columns

Project details

These details have not been verified by PyPI

Project links

Homepage

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Release history Release notifications | RSS feed

0.6.1

Jan 22, 2024

0.5.5

Jan 16, 2024

0.5.4

Dec 19, 2023

0.5.3

Nov 8, 2023

0.5.2

Oct 13, 2023

0.5.1

Oct 11, 2023

0.4.6

Oct 5, 2023

0.4.5

Oct 5, 2023

0.4.4

Oct 5, 2023

0.4.3

Oct 3, 2023

0.4.2

Sep 29, 2023

0.4.1

Sep 28, 2023

0.3.20

Aug 21, 2023

0.3.19

Aug 21, 2023

0.3.18

Aug 21, 2023

0.3.17

Aug 2, 2023

0.3.16

Jul 29, 2023

0.3.15

Jul 26, 2023

0.3.14

Jul 26, 2023

This version

0.3.13

Jul 26, 2023

0.3.12

Jul 25, 2023

0.3.11

Jul 18, 2023

0.3.10

Jun 14, 2023

0.3.9

Jun 13, 2023

0.3.8

Jun 13, 2023

0.3.7

Jun 13, 2023

0.3.6

Jun 13, 2023

0.3.5

Jun 13, 2023

0.3.4

Jun 11, 2023

0.3.3

May 4, 2023

0.3.2

May 3, 2023

0.3.1

May 1, 2023

0.3.0

Apr 29, 2023

0.1.7

Mar 12, 2023

0.1.6

Mar 12, 2023

0.1.5 yanked

Mar 10, 2023

0.1.4 yanked

Mar 10, 2023

0.1.3 yanked

Mar 8, 2023

0.1.2 yanked

Mar 8, 2023

0.1 yanked

Feb 28, 2023

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

filter_stations-0.3.13.tar.gz (22.2 kB view hashes)

Uploaded Jul 26, 2023 Source

Built Distribution

filter_stations-0.3.13-py3-none-any.whl (19.2 kB view hashes)

Uploaded Jul 26, 2023 Python 3

Hashes for filter_stations-0.3.13.tar.gz

Hashes for filter_stations-0.3.13.tar.gz
Algorithm	Hash digest
SHA256	`8046ff1a40b52d78ee35ee624056ad5fbc5a2a8cf6f56af2ceae605e0202419b`
MD5	`e7551598f3621e317e93f2dba4ec6ebb`
BLAKE2b-256	`12678e1fad88789d1c6a51cc761d24b84b388544fae559d48a968d530961092d`

Hashes for filter_stations-0.3.13-py3-none-any.whl

Hashes for filter_stations-0.3.13-py3-none-any.whl
Algorithm	Hash digest
SHA256	`23c7351c7dd9a10b7f8ff2d274715ab263af78c85434a097e79bd84ec6ad4784`
MD5	`9f5eedfc4bfcdbe2a51743469d258bd0`
BLAKE2b-256	`ed24ad02a6eb3f33aab818a7ff8babc8587c0aa3fd5ff961a27b478ff3306780`