work with clickhouse
Project description
Phouse easy work with clickhouse
https://github.com/bedretdinov/phouse
for install
pip3 install git+https://github.com/bedretdinov/phouse
Examples
from phouse.phouse import Phouse
# The connection will be repeat if get failed. It is need for stability your ETL scripts.
# The script will try until get connection. try with interval 60 sec
pd = Phouse.getConnection({
'host':'0.0.0.0',
'user':'default',
'password':'',
'database':'owox'
})
# return pandas dataframe
df = pd.clickhouse_query(''' SELECT database, name FROM system.tables ''')
# !!!! And very important things this data frame must have date column else you get error
df['date'] = pd.to_datetime('today')
# insert to the table
df.clickhouse.append(table='new_table') # The table is created automatically
# truncate table and insert
df.clickhouse.write(table='new_table') # The table is created automatically
# write by package
from sklearn import datasets
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['date'] = pd.to_datetime('today')
first = True
buffer_size = 10
for i in range(0,df.shape[0],buffer_size):
item = df[i:i+buffer_size]
if(first):
item.clickhouse.write(table='iris_test')
first = not first
else:
item.clickhouse.append(table='iris_test')
# How to create engine and partition
# VersionedCollapsingMergeTree
df = pd.DataFrame([
['Y',0.4,1, 56],
['N',0.6,-1,34]
], columns=['status','probability','category','party'])
df.clickhouse.drop(table='test333')
df.clickhouse.VersionedCollapsingMergeTree(
SIGN = 'category',
VERSION = 'party',
PARTITION = 'status',
ORDER_BY = 'status',
INDEX_GRANULARITY = 8192
)
df.clickhouse.append(table='test333')
# __________________________________________________________________
# CollapsingMergeTree
df = pd.DataFrame([
['Y',0.4,1],
['N',0.6,-1]
], columns=['status','probability','category'])
df.clickhouse.drop(table='test333')
df.clickhouse.CollapsingMergeTree(
SIGN = 'category',
PARTITION = 'status',
ORDER_BY = 'status',
INDEX_GRANULARITY = 8192
)
df.clickhouse.append(table='test333')
# __________________________________________________________________
# CollapsingMergeTree
df = pd.DataFrame([
['Y',0.4,1],
['N',0.6,-1]
], columns=['status','probability','category'])
df.clickhouse.drop(table='test333')
df.clickhouse.CollapsingMergeTree(
SIGN = 'category',
PARTITION = 'status',
ORDER_BY = 'status',
INDEX_GRANULARITY = 8192
)
df.clickhouse.append(table='test333')
# __________________________________________________________________
# AggregatingMergeTree
df = pd.DataFrame([
['Y',0.4,56],
['N',0.6,23]
], columns=['status','probability','category'])
df.clickhouse.drop(table='test333')
df.clickhouse.AggregatingMergeTree(
PARTITION = 'status',
ORDER_BY = 'status',
INDEX_GRANULARITY = 8192
)
df.clickhouse.append(table='test333')
# __________________________________________________________________
# Clean table
df.clickhouse.truncate(table='test333')
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
No source distribution files available for this release.See tutorial on generating distribution archives.
Built Distribution
phouse-1.7-py3.6.egg
(8.3 kB
view hashes)