Skip to main content

One-stop time series analysis tool, supporting time series data preprocessing, feature engineering, model training, model evaluation, and model prediction.

Project description

PipelineTS

一站式时间序列分析工具,支持时序数据预处理、特征工程、模型训练、模型评估、模型预测等。

安装

# if you don't want to use the prophet model 如果你不想使用prophet模型
# run this
python -m pip install PipelineTS[core]

# if you want to use all models 如果你想使用所有模型
# run this
python -m pip install PipelineTS[all]

快速开始

查看可用模型

from PipelineTS.dataset import LoadWebSales

init_data = LoadWebSales()[['date', 'type_a']]

valid_data = init_data.iloc[-30:, :]
data = init_data.iloc[:-30, :]
device = 'cpu'

from PipelineTS.pipeline import ModelPipeline

# list all models
ModelPipeline.list_all_available_models()
[output]:
['prophet',
 'auto_arima',
 'catboost',
 'lightgbm',
 'xgboost',
 'wide_gbrt',
 'd_linear',
 'n_linear',
 'n_beats',
 'n_hits',
 'tcn',
 'tft',
 'gau',
 'stacking_rnn',
 'time2vec',
 'multi_output_model',
 'multi_step_model',
 'transformer',
 'random_forest',
 'tide']

开始训练

from sklearn.metrics import mean_absolute_error

pipeline = ModelPipeline(
    time_col='date',
    target_col='type_a',
    lags=30,
    random_state=42,
    metric=mean_absolute_error,
    metric_less_is_better=True,
    device=device
)

# training all models
pipeline.fit(data, valid_df=valid_data)

# use best model to predict next 30 steps data point
res = pipeline.predict(30)

数据准备

from PipelineTS.dataset import LoadMessagesSentDataSets
import pandas as pd
# convert time col, the date column is assumed to be date_col
time_col = 'date_col'
target_col = 'ta'
lags = 60  # 往前的窗口大小,数据将会被切割成lags天的多条序列进行训练
n = 40 # 需要预测多少步,在这个例子里为需要预测多少天

# you can also load data with pandas
# init_data = pd.read_csv('/path/to/your/data.csv')
init_data = LoadMessagesSentDataSets()[[time_col, target_col]]

init_data[time_col] = pd.to_datetime(init_data[time_col], format='%Y-%m-%d')

# 划分训练集和测试集
valid_data = init_data.iloc[-n:, :]
data = init_data.iloc[:-n, :]
print("data shape: ", data.shape, ", valid data shape: ", valid_data.shape)
data.tail(5)

# 数据可视化
from PipelineTS.plot import plot_data_period
plot_data_period(
    data.iloc[-300:, :], 
    valid_data, 
    time_col=time_col, 
    target_col=target_col, 
    labels=['Train data', 'Valid_data']
)

image1

单个模型的训练和预测

from PipelineTS.nn_model import TiDEModel
tide = TiDEModel(
    time_col=time_col, target_col=target_col, lags=lags, random_state=42, 
    quantile=0.9, enable_progress_bar=False, enable_model_summary=False
)
tide.fit(data)
tide.predict(n)

PipelineTS 模块

# 如果需要配置模型
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from PipelineTS.pipeline import ModelPipeline, PipelineConfigs

# 第一个为模型的名称,需要在PipelineTS.list_all_available_models()列表中,第二个为dict类型
# dict可以有三个key: 'init_configs', 'fit_configs', 'predict_configs',也可以任意一个,剩余的会自动补全为默认参数
# 其中init_configs为模型初始化参数,fit_configs为模型训练时参数,predict_configs为模型预测时参数
pipeline_configs = PipelineConfigs([
    ('lightgbm', {'init_configs': {'verbose': -1, 'linear_tree': True}}),
    ('multi_output_model', {'init_configs': {'verbose': -1}}),
    ('multi_step_model', {'init_configs': {'verbose': -1}}),
    ('multi_output_model', {
        'init_configs': {'estimator': XGBRegressor, 'random_state': 42, 'kwargs': {'verbosity': 0}}
    }
     ),
    ('multi_output_model', {
        'init_configs': {'estimator': CatBoostRegressor, 'random_state': 42, 'verbose': False}
    }
     ),
])
model_name model_name_with_index model_configs
0lightgbm lightgbm_1 {'init_configs': {'verbose': -1, 'linear_tree': True}, 'fit_configs': {}, 'predict_configs': {}}
1multi_output_modelmulti_output_model_1 {'init_configs': {'verbose': -1}, 'fit_configs': {}, 'predict_configs': {}}
2multi_output_modelmulti_output_model_2 {'init_configs': {'estimator': <class 'xgboost.sklearn.XGBRegressor'>, 'random_state': 42, 'kwargs': {'verbosity': 0}}, 'fit_configs': {}, 'predict_configs': {}}
3multi_output_modelmulti_output_model_3 {'init_configs': {'estimator': <class 'catboost.core.CatBoostRegressor'>, 'random_state': 42, 'verbose': False}, 'fit_configs': {}, 'predict_configs': {}}
4multi_step_model multi_step_model_1 {'init_configs': {'verbose': -1}, 'fit_configs': {}, 'predict_configs': {}}

非区间预测

from sklearn.metrics import mean_absolute_error

from PipelineTS.pipeline import ModelPipeline

pipeline = ModelPipeline(
    time_col=time_col,
    target_col=target_col,
    lags=lags,
    random_state=42,
    metric=mean_absolute_error,
    metric_less_is_better=True,
    configs=pipeline_configs,
    include_init_config_model=False,
    use_standard_scale=False,  # False for MinMaxScaler, True for StandardScaler, None means no data be scaled
    with_quantile_prediction=False,
    device=device,
    # models=['wide_gbrt']  # 支持指定模型
)

pipeline.fit(data, valid_data)

获取PipelineTS中的模型参数

# Gets all configurations for the specified model, default to best model
pipeline.get_model(model_name='wide_gbrt').all_configs

绘制预测结果

# use best model to predict next 30 steps data point
prediction = pipeline.predict(n)  # 可以使用model_name指定pipeline中已训练好的模型

plot_data_period(init_data.iloc[-100:, :], prediction, 
                 time_col=time_col, target_col=target_col)

image1

区间预测

from sklearn.metrics import mean_absolute_error

from PipelineTS.pipeline import ModelPipeline

pipeline = ModelPipeline(
    time_col=time_col,
    target_col=target_col,
    lags=lags,
    random_state=42,
    metric=mean_absolute_error,
    metric_less_is_better=True,
    configs=pipeline_configs,
    include_init_config_model=False,
    use_standard_scale=False,
    with_quantile_prediction=True,  # turn on the quantile prediction switch, if you like
    device=device,
    # models=['wide_gbrt']  # 支持指定模型
)

pipeline.fit(data, valid_data)

绘制预测结果

# use best model to predict next 30 steps data point
prediction = pipeline.predict(n, model_name=None)  # 可以使用model_name指定pipeline中已训练好的模型

plot_data_period(init_data.iloc[-100:, :], prediction, 
                 time_col=time_col, target_col=target_col)

image1

模型、pipeline的保存和加载

from PipelineTS.io import load_model, save_model

# save
save_model(path='/path/to/save/your/fitted_model_or_pipeline.zip', model=your_fitted_model_or_pipeline)
# load
your_loaded_model_or_pipeline = load_model('/path/to/save/your/fitted_model_or_pipeline.zip')

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

PipelineTS-0.3.6.tar.gz (31.3 kB view details)

Uploaded Source

Built Distribution

PipelineTS-0.3.6-py3-none-any.whl (46.5 kB view details)

Uploaded Python 3

File details

Details for the file PipelineTS-0.3.6.tar.gz.

File metadata

  • Download URL: PipelineTS-0.3.6.tar.gz
  • Upload date:
  • Size: 31.3 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.2 CPython/3.11.5

File hashes

Hashes for PipelineTS-0.3.6.tar.gz
Algorithm Hash digest
SHA256 6e4f622e9b756f1d0e5002d08718a4d36331346f5b302c8a4ce1600e7ddd3625
MD5 f4affe4d0fdb88d8a648efc0e056d24a
BLAKE2b-256 2b428aa0967510cbe1fc8eb40c8de824dbd2135b227bf8c13859ec1c9a3142bb

See more details on using hashes here.

File details

Details for the file PipelineTS-0.3.6-py3-none-any.whl.

File metadata

  • Download URL: PipelineTS-0.3.6-py3-none-any.whl
  • Upload date:
  • Size: 46.5 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.2 CPython/3.11.5

File hashes

Hashes for PipelineTS-0.3.6-py3-none-any.whl
Algorithm Hash digest
SHA256 06ae2c6d143937df6c7c2e21faf8963f50ede2a32d8c99c467620f2ebd92719a
MD5 5a91b1c9709cb31ec17e5a237180de58
BLAKE2b-256 b74f3c508a54c8c14459d2c4e871bcf3776a88468466aafa5ad68e9d9cf88b27

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page