Skip to main content

AutoMated visualization Features Extraction For Data Scientists and data format calculater for application developers

Project description

dashboard builder util generate all posibble stats from Dataframe for DataScience and visualisation purposes

from package.dashboardutil import DashboardElementsBuilder
from package.dataclassifier import DataClassifier
import pandas as pd
df=pd.read_csv("cars.csv")
dat=DataClassifier()
visual=DashboardElementsBuilder(df,dat)
ploats=visual.build_ploats("hist",df.columns.to_list()[1:])
ploat_data=[]
for x in list(ploats):
    for z in list(x):
        ploat_data+=list(z)

above data canbe visualised like below

data={'slow': {'lables': [66.2, 66.4, 66.3, 71.4, 67.9], 'counts': [1, 1, 1, 3, 1]}}
from bokeh.plotting import figure, show

fruits = [str(x) for x in data['slow']['lables']]
counts = data['slow']['counts']

p = figure(x_range=fruits, height=350, title="Range",
           toolbar_location=None, tools="")

p.vbar(x=fruits, top=counts, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

export bulk graphs for all possible conditions

from package.DashBoardsTemplates import export_graphs_hist
from bokeh.plotting import show 
# use any graph for data clustrig or analysis purposes above function using bokeh for bulk visualisation
visual=export_graphs_hist(ploat_data)
# iter visual variable or visualise one by one
show(visual[0])

calucate data formets for visualisation data for formets visulisation purposes

from package.keyborddata import *
from package.formatcalculator import FormatCalculator 
# get hashes chuncks
unique_hashes=FormatCalculator.get_unique_hashes_from_data(ploat_data)
# get combines hashes 
unique_=[]
for x in unique_hashes:
    unique_+=x

calucate data formets for dataframe data for formets data optimisation and validation purposes

from package.keyborddata import *
import pandas as pd
from package.formatcalculator import FormatCalculator
# get df vocabs
vocabdf=FormatCalculator.split_all_labels_to_words_with_new_cols(pd.read_csv("test.csv"))
# get vocabdf formats
formets=FormatCalculator.hash_df_formats(vocabdf)
# get vocabdf formets column wise 
unique_formatas=FormatCalculator.get_unique_hashes_from_df_columnwise(formets)

optimising_regex string

from package.keyborddata import *
import pandas as pd
from package.formatcalculator import FormatCalculator
# get df vocabs
vocabdf=FormatCalculator.split_all_labels_to_words_with_new_cols(pd.read_csv("test.csv"))
# get vocabdf formats
formets=FormatCalculator.hash_df_formats(vocabdf)
# optimise formetts in df
df_list_formetted=[]
for x,y in formets.iterrows():
    for cd in formets.columns.to_list():
        y[cd]=regex_formattor(y[cd])
    df_list_formetted.append(y.to_dict())
# reasamble df with same variable
formets=pd.DataFrame.from_records(df_list_formetted)
# get vocabdf formets column wise 
unique_formatas=FormatCalculator.get_unique_hashes_from_df_columnwise(formets)

generate data mitter

from package.formatcalculator import FormatCalculator
import pandas as pd
import itertools

# reads df from csv
df = pd.read_csv("testdata.csv")
mitter=FormatCalculator.generate_datamiter(df)
mitter._df.to_excel("test.xlsx")
# datamiters optimises data acording to keyboard letters

optimising and manageing large datasets with mitter and row patterns with constructive maths

from package.formatcalculator import FormatCalculator
import pandas as pd
import itertools
import ast

# reads df from csv
df             = pd.read_csv("tw.csv")

mitter         = FormatCalculator.generate_datamiter(df)
a              = mitter.formatwise_mitter()
# get optimised row pattern for data ordring management
a             =  mitter.get_row_ordring_seq_from_dataset(df.head(10),3)

optimizing str patterns

from package.formatcalculator import FormatCalculator,Mitter
import pandas as pd
import ast
# reads df from csv
df             = pd.read_csv("testfile.csv")
mitter         = FormatCalculator.generate_datamiter(df)
mitter         = mitter.formatwise_mitter()
formateld      = Mitter.hash_str_patterns(mitter)
                

calculate _ veriations on row and cols

from package.formatcalculator import FormatCalculator,Mitter
import pandas as pd
from package.variationcalculator import VERIATIONS

# reads df from csv
df             = pd.read_csv("testfile.csv")

mitter         = FormatCalculator.generate_datamiter(df)
mitter2        = mitter.formatwise_mitter()
formateld      = Mitter.hash_str_patterns(mitter2)

veri           = VERIATIONS(formateld,pd.DataFrame(columns=mitter.get_row_ordring_seq_from_dataset(df,iterlen=3)))

cols           = veri.formats_and_no_of_patterns()
sw             = veri.row_sequance_veriations()

regenerate using Veriations in seq

from package.formatcalculator import FormatCalculator,Mitter
import pandas as pd
from package.variationcalculator import VERIATIONS
from package.keyborddata import *
# reads df from csv
df             = pd.read_csv("testfile.csv")

mitter         = FormatCalculator.generate_datamiter(df)
mitter2        = mitter.formatwise_mitter()
formateld      = Mitter.hash_str_patterns(mitter2)
keyboards = (
            alphabets + alphabets_upper + simbols + [str(x) for x in numbers] + [" "]
        )
veri=VERIATIONS(formateld,pd.DataFrame(columns=mitter.get_row_ordring_seq_from_dataset(df,iterlen=3)),keyboard=keyboards)
veri.transform_keybord_seq_to_data()       

optimising mitter columns

from package.keyborddata import alphabets, alphabets_upper, numbers, simbols
 
mitter=FormatCalculator.generate_datamiter(df)
mitter2        = mitter.formatwise_mitter()
formateld      = Mitter.hash_str_patterns(mitter2)
keyboards        = (
            alphabets + alphabets_upper + simbols + [str(x) for x in numbers] + [" "]
        )
veri=VERIATIONS(formateld,pd.DataFrame(columns=mitter.get_row_ordring_seq_from_dataset(df,iterlen=3)),keyboard=keyboards,Mitter=mitter)
veri.clssifiy_column_mitterdata()

regenerate from optimised data

from package.keyborddata import alphabets, alphabets_upper, numbers, simbols
from package.formatcalculator import FormatCalculator
mitter=FormatCalculator.generate_datamiter(df)
mitter2        = mitter.formatwise_mitter()
formateld      = Mitter.hash_str_patterns(mitter2)
keyboards        = (
            alphabets + alphabets_upper + simbols + [str(x) for x in numbers] + [" "]
        )
veri=VERIATIONS(formateld,pd.DataFrame(columns=mitter.get_row_ordring_seq_from_dataset(df,iterlen=3)),keyboard=keyboards,Mitter=mitter)
veri.clssifiy_column_mitterdata()
veri.regenerate_data_from_optimised_mitter()
Documentation LINK

Sponcers Guidelines

please send us email to get sponcers docs for this project rajatsmishra@aol.com

Project Contribution GuideLines

git page link https://github.com/rajat45mishra/DashBoardUtils_Datascience
send us update suggestions on rajatsmishra@aol.com

todo tasks

- add more algorithum in data classifier
- add more graph templates in DashBoaredtemplates class
- use cases docs and api docs for users
- totorials for extracting and scripting formats to solve realword software application optimisation purposes

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

DashBoardUtils-DataScience-1.56.tar.gz (15.2 kB view details)

Uploaded Source

File details

Details for the file DashBoardUtils-DataScience-1.56.tar.gz.

File metadata

File hashes

Hashes for DashBoardUtils-DataScience-1.56.tar.gz
Algorithm Hash digest
SHA256 8cf4e5b42ba235b0d90cacb792def089ff9157b63c87b2fb676d180d792fc822
MD5 c12db322758b0c3637b330bc5bc85ac3
BLAKE2b-256 ee155b579c96cd1ba48fd392b7e419c80b357dbb3acbc6a4c70ec4bbba2a8772

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page