Data Pipeline Framework

Project description

pytzen usage

Source code:

from dataclasses import dataclass
import pydoc
import os
import sys
import pytzen
pytzen.DIR = '/home/p/pytzen/pypi/docs'
extract = pytzen.new_namespace('extract')
transform = pytzen.new_namespace('transform')
load = pytzen.new_namespace('load')
doc_string = pydoc.render_doc('pytzen')
Python Library Documentation: package pytzen

    pytzen - PYTZEN is designed to sketch out data pipelines.


    class MetaType(builtins.type)
     |  MetaType(name, bases, attrs) -> type
     |  Metaclass for ProtoType class. It is responsible for adding the 
     |  meta_attr attribute to the class and initializing the ProtoType 
     |  class.
     |  Method resolution order:
     |      MetaType
     |      builtins.type
     |      builtins.object
     |  Methods defined here:
     |  __call__(self, *args, **kwargs) -> object
     |      Initializes an instance of a derived class within a 
     |      prototype-based design.
     |  ----------------------------------------------------------------------
     |  Class methods defined here:
     |  close() -> None from builtins.type
     |      Finalizes operations by persistently storing class data.
     |  log(message, stdout=True, write=True) -> None from builtins.type
     |      Records a log message with an optional display and storage 
     |      behavior.
     |  store(name, value) -> None from builtins.type
     |      Stores a named value within the class's shared data store.
     |  ----------------------------------------------------------------------
     |  Static methods defined here:
     |  __new__(cls, name, bases, attrs) -> type
     |      Enriches a class with logging, data storage, and closure 
     |      capabilities.
     |  ----------------------------------------------------------------------
     |  Data and other attributes defined here:
     |  NAMESPACE = None
     |  __annotations__ = {'NAMESPACE': <class 'str'>}
     |  ----------------------------------------------------------------------
     |  Methods inherited from builtins.type:
     |  __delattr__(self, name, /)
     |      Implement delattr(self, name).
     |  __dir__(self, /)
     |      Specialized __dir__ implementation for types.
     |  __getattribute__(self, name, /)
     |      Return getattr(self, name).
     |  __init__(self, /, *args, **kwargs)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  __instancecheck__(self, instance, /)
     |      Check if an object is an instance.
     |  __repr__(self, /)
     |      Return repr(self).
     |  __setattr__(self, name, value, /)
     |      Implement setattr(self, name, value).
     |  __sizeof__(self, /)
     |      Return memory consumption of the type object.
     |  __subclasscheck__(self, subclass, /)
     |      Check if a class is a subclass.
     |  __subclasses__(self, /)
     |      Return a list of immediate subclasses.
     |  mro(self, /)
     |      Return a type's method resolution order.
     |  ----------------------------------------------------------------------
     |  Class methods inherited from builtins.type:
     |  __prepare__(...) from builtins.type
     |      __prepare__() -> dict
     |      used to create the namespace for the class statement
     |  ----------------------------------------------------------------------
     |  Data descriptors inherited from builtins.type:
     |  __abstractmethods__
     |  __dict__
     |  __text_signature__
     |  ----------------------------------------------------------------------
     |  Data and other attributes inherited from builtins.type:
     |  __base__ = <class 'type'>
     |      type(object) -> the object's type
     |      type(name, bases, dict, **kwds) -> a new type
     |  __bases__ = (<class 'type'>,)
     |  __basicsize__ = 880
     |  __dictoffset__ = 264
     |  __flags__ = 2148292096
     |  __itemsize__ = 40
     |  __mro__ = (<class 'pytzen.MetaType'>, <class 'type'>, <class 'object'>...
     |  __weakrefoffset__ = 368
    class ProtoType(builtins.object)
     |  ProtoType(*args, **kwargs) -> object
     |  The `ProtoType` class serves as a foundational component in a 
     |  dynamic class creation and configuration management system, 
     |  leveraging a custom metaclass `MetaType` to control instantiation 
     |  behavior.
     |  Methods defined here:
     |  __init__(self) -> None
     |      Initializes a new instance of the `ProtoType` class, 
     |      orchestrated under the controlled instantiation behavior 
     |      enforced by the `MetaType` metaclass.
     |  __setattr__(self, key, value) -> None
     |      Overrides the default behavior for setting attributes to ensure 
     |      that every new attribute added to an instance of `ProtoType` or 
     |      its derived classes is registered in a shared data structure.
     |  close() -> None from builtins.type
     |      Finalizes operations by persistently storing class data.
     |  log(message, stdout=True, write=True) -> None from builtins.type
     |      Records a log message with an optional display and storage 
     |      behavior.
     |  store(name, value) -> None from builtins.type
     |      Stores a named value within the class's shared data store.
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  __weakref__
     |      list of weak references to the object (if defined)
    class SharedData(builtins.object)
     |  SharedData(classes: dict = <factory>, log: dict = <factory>, store: dict = <factory>) -> None
     |  A data class for storing and managing shared pipeline information in 
     |  an immutable structure.
     |  Methods defined here:
     |  __eq__(self, other)
     |  __init__(self, classes: dict = <factory>, log: dict = <factory>, store: dict = <factory>) -> None
     |  __repr__(self)
     |  __setattr__(self, key, value) -> None
     |      Overrides the default attribute setting behavior specifically to 
     |      enforce immutability for attributes once they have been set.
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  __weakref__
     |      list of weak references to the object (if defined)
     |  ----------------------------------------------------------------------
     |  Data and other attributes defined here:
     |  __annotations__ = {'classes': <class 'dict'>, 'log': <class 'dict'>, '...
     |  __dataclass_fields__ = {'classes': Field(name='classes',type=<class 'd...
     |  __dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,or...
     |  __hash__ = None

    new_namespace(namespace: str)
        Creates and returns a new namespace as a module, isolated from 
        the original pytzen package.

    DIR = '/home/p/pytzen/src/pytzen/docs'

print(f'Extract namespace sample: {extract.__doc__}')
print(f'Transform namespace sample: {transform.__doc__}')
print(f'Load namespace sample: {load.__doc__}')
Extract namespace sample: PYTZEN is designed to sketch out data pipelines.

Transform namespace sample: PYTZEN is designed to sketch out data pipelines.

Load namespace sample: PYTZEN is designed to sketch out data pipelines.
    path_json = os.path.join(pytzen.DIR, 'config.json')
    with open(path_json, 'r') as f:
        print("Contents of 'config.json':\n")
except FileNotFoundError:
    print("Error: 'config.json' file not found in the pytzen.DIR directory.",
          "You MUST create a 'config.json' file in the pytzen.DIR directory.",
Contents of 'config.json':

    "str_input": "some_input",
    "int_input": 10,
    "list_input": [
    "dict_input": {
        "key1": "value1",
        "key2": "value2"
class DerivedClass(extract.ProtoType):
    number: int = 137

    def do(self):
        print("I am using a '@dataclass' attribute in the 'ProtoType' way.")
        print(f'I am calling it ( {}')

derived = DerivedClass()
I am using a '@dataclass' attribute in the 'ProtoType' way.
I am calling it ( 137
Help on DerivedClass in module __main__ object:

class DerivedClass(pytzen.ProtoType)
 |  DerivedClass(*args, **kwargs) -> object
 |  DerivedClass(*args, **kwargs) -> object
 |  Method resolution order:
 |      DerivedClass
 |      pytzen.ProtoType
 |      builtins.object
 |  Methods defined here:
 |  __eq__(self, other)
 |  __init__(self, number: int = 137) -> None
 |  __repr__(self)
 |  close() -> None from builtins.type
 |      Finalizes operations by persistently storing class data.
 |  do(self)
 |  log(message, stdout=True, write=True) -> None from builtins.type
 |      Records a log message with an optional display and storage 
 |      behavior.
 |  store(name, value) -> None from builtins.type
 |      Stores a named value within the class's shared data store.
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  __annotations__ = {'number': <class 'int'>}
 |  __dataclass_fields__ = {'number': Field(name='number',type=<class 'int...
 |  __dataclass_params__ = _DataclassParams(init=True,repr=True,eq=True,or...
 |  __hash__ = None
 |  class_path = '__main__.DerivedClass'
 |  number = 137
 |  ----------------------------------------------------------------------
 |  Methods inherited from pytzen.ProtoType:
 |  __setattr__(self, key, value) -> None
 |      Overrides the default behavior for setting attributes to ensure 
 |      that every new attribute added to an instance of `ProtoType` or 
 |      its derived classes is registered in a shared data structure.
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from pytzen.ProtoType:
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  ----------------------------------------------------------------------
 |  Data and other attributes inherited from pytzen.ProtoType:
 |  config = <class 'pytzen.ConfigurationFile'>
 |  data = SharedData(classes={'__main__.DerivedClass': {'a...o', 'log', '...
class DynamicAttribute(extract.ProtoType):
    # There is no '@dataclass' defined attribute.

    def set_dynamically(self):
        # Lets set the attribute on the fly.
        print("I am setting an attribute using 'self.n = 137'.")
        self.n = 137

    def get_data(self):
        print('I am retrieving the data from the object.')
        print(f'I am calling it ( {}\n')
        except AttributeError as e:
            print(f"I cannot call 'self.number' directly.\n{e}.")

dynamic = DynamicAttribute()
print("Conclusion: all attributes are stored in the 'data' object.",
      'There is no way to access it directly.',
      'But, why would you want to do that?',
I am setting an attribute using 'self.n = 137'.
I am retrieving the data from the object.
I am calling it ( 137

I cannot call 'self.number' directly.
'DynamicAttribute' object has no attribute 'n'.
Conclusion: all attributes are stored in the 'data' object.
There is no way to access it directly.
But, why would you want to do that?
class DataClassSample(extract.ProtoType):
    # This is the '@dataclass' way to define a class attribute.
    # The attribute must be declared in the initialization.
    m: int

class RetrieveSharedData(extract.ProtoType):

    def get_data(self):
        print("Objects in 'data' are shared among classes and instances:",

    def change_attribute(self):
            print('I am trying to reset an attribute directly.')
            self.m = 137
        except AttributeError as e:
            print(f"I cannot call 'self.m = 137'.\n{e}")
            print("The attribute also cannot be redefined using '@dataclass'.")

data_sample = DataClassSample(m=100)
get_data_sample = RetrieveSharedData()
Objects in 'data' are shared among classes and instances: 100
I am trying to reset an attribute directly.
I cannot call 'self.m = 137'.
Attribute 'm' already exists and cannot be changed.
The attribute also cannot be redefined using '@dataclass'.
class Nested:
    pre_defined:str = 'I am a pre-defined attribute.'

class AnotherNested:

    def __init__(self, nested_attr:str):
        self.nested_attr = nested_attr
        self.pre_defined = 'I am a pre-defined attribute with the same name.'

class Container(extract.ProtoType):

    def print_nested(self):
        self.nested = Nested('I am a nested attribute.')
        self.another_nested = AnotherNested('Here I am again.')

class UseNested(extract.ProtoType):

    def print_nested(self):
        # The nested attribute is shared among classes and instances.
        # Attributes from the nested class can be redefined. = 'Changed by UseNested.' = 'I changed too.'

container = Container()
use_nested = UseNested()
I am a nested attribute.
Here I am again.
I am a pre-defined attribute.
I am a pre-defined attribute with the same name.
Changed by UseNested.
I changed too.
class ConfigVariableSample(extract.ProtoType):

    def print_configuration_variable(self):
        print("\nConfiguration Variables:")
        print(f"String Input: {self.config.str_input}")
        print(f"Integer Input: {self.config.int_input}")
        print(f"List Input: {self.config.list_input}")
        print(f"Dictionary Input: {self.config.dict_input}")

config = ConfigVariableSample()
Configuration Variables:
String Input: some_input
Integer Input: 10
List Input: ['item1', 'item2']
Dictionary Input: {'key1': 'value1', 'key2': 'value2'}
class KeepLog(extract.ProtoType):

    some_message: str = 'I am a log message.'

    def log_it(self):
        self.log("If you don't want it to be printed, set 'stdout' to 'False'.",
        self.log("If you don't want it to be exported, set 'write' to 'False'.",

class KeepLog2(extract.ProtoType):

    some_message2: str = 'I am a log message again.'

    def log_it(self):
        self.log('My pipeline did something.')

kl = KeepLog()
kl2 = KeepLog2()
2024-05-20 08:35:26.125196: I am a log message.
2024-05-20 08:35:26.125389: If you don't want it to be exported, set 'write' to 'False'.
2024-05-20 08:35:26.125634: I am a log message again.
2024-05-20 08:35:26.125681: My pipeline did something.
{'2024-05-20 08:35:26.125196': 'I am a log message.',
 '2024-05-20 08:35:26.125361': "If you don't want it to be printed, set 'stdout' to 'False'.",
 '2024-05-20 08:35:26.125634': 'I am a log message again.',
 '2024-05-20 08:35:26.125681': 'My pipeline did something.'}
class KeepResultsStored(extract.ProtoType):

    def store_results(self):'some_results', {'result':3, 'diff':4})

class KeepResultsStored2(extract.ProtoType):

    def store_results(self):'some_results2', {'result2':3, 'diff2':4})

krs = KeepResultsStored()
krs2 = KeepResultsStored2()
{'some_results': {'result': 3, 'diff': 4},
 'some_results2': {'result2': 3, 'diff2': 4}}
{'__main__.DerivedClass': {'attributes': {'number': 'int'},
  'methods': ['do', 'log', 'store', 'close']},
 '__main__.DynamicAttribute': {'attributes': {'n': 'int'},
  'methods': ['set_dynamically', 'get_data', 'log', 'store', 'close']},
 '__main__.DataClassSample': {'attributes': {'m': 'int'},
  'methods': ['log', 'store', 'close']},
 '__main__.RetrieveSharedData': {'attributes': {},
  'methods': ['get_data', 'change_attribute', 'log', 'store', 'close']},
 '__main__.Container': {'attributes': {'nested': 'Nested',
   'another_nested': 'AnotherNested'},
  'methods': ['print_nested', 'log', 'store', 'close']},
 '__main__.UseNested': {'attributes': {},
  'methods': ['print_nested', 'log', 'store', 'close']},
 '__main__.ConfigVariableSample': {'attributes': {},
  'methods': ['print_configuration_variable', 'log', 'store', 'close']},
 '__main__.KeepLog': {'attributes': {'some_message': 'str'},
  'methods': ['log_it', 'log', 'store', 'close']},
 '__main__.KeepLog2': {'attributes': {'some_message2': 'str'},
  'methods': ['log_it', 'log', 'store', 'close']},
 '__main__.KeepResultsStored': {'attributes': {},
  'methods': ['store_results', 'log', 'store', 'close']},
 '__main__.KeepResultsStored2': {'attributes': {},
  'methods': ['store_results', 'log', 'store', 'close']}}
# Lets close

# Lets inspect the exported results.
def inspect_json(path):
    import json
    with open(path, 'r') as f:
        print(f"\nContents of '{path}':\n")
        print(json.dumps(json.load(f), indent=4))

inspect_json(os.path.join(pytzen.DIR, 'extract_dataclasses.json'))
inspect_json(os.path.join(pytzen.DIR, 'extract_log.json'))
inspect_json(os.path.join(pytzen.DIR, 'extract_store.json'))
Contents of '/home/p/pytzen/src/pytzen/docs/extract_dataclasses.json':

    "__main__.DerivedClass": {
        "attributes": {
            "number": "int"
        "methods": [
    "__main__.DynamicAttribute": {
        "attributes": {
            "n": "int"
        "methods": [
    "__main__.DataClassSample": {
        "attributes": {
            "m": "int"
        "methods": [
    "__main__.RetrieveSharedData": {
        "attributes": {},
        "methods": [
    "__main__.Container": {
        "attributes": {
            "nested": "Nested",
            "another_nested": "AnotherNested"
        "methods": [
    "__main__.UseNested": {
        "attributes": {},
        "methods": [
    "__main__.ConfigVariableSample": {
        "attributes": {},
        "methods": [
    "__main__.KeepLog": {
        "attributes": {
            "some_message": "str"
        "methods": [
    "__main__.KeepLog2": {
        "attributes": {
            "some_message2": "str"
        "methods": [
    "__main__.KeepResultsStored": {
        "attributes": {},
        "methods": [
    "__main__.KeepResultsStored2": {
        "attributes": {},
        "methods": [

Contents of '/home/p/pytzen/src/pytzen/docs/extract_log.json':

    "2024-05-20 08:35:26.125196": "I am a log message.",
    "2024-05-20 08:35:26.125361": "If you don't want it to be printed, set 'stdout' to 'False'.",
    "2024-05-20 08:35:26.125634": "I am a log message again.",
    "2024-05-20 08:35:26.125681": "My pipeline did something."

Contents of '/home/p/pytzen/src/pytzen/docs/extract_store.json':

    "some_results": {
        "result": 3,
        "diff": 4
    "some_results2": {
        "result2": 3,
        "diff2": 4


