Skip to main content

tfrecords: fast and simple reader and writer

Project description

tfrecords

simplify and transplant the tfrecord and table

update information

    2023-07-01:  Add arrow parquet
    2022-10-30:  Add lmdb leveldb read and writer and add record batch write
    2022-10-17:  Add shared memory for record to read mode with more accelerated Reading.
    2022-02-01:  simplify and transplant the tfrecord dataset

1. record read and write demo , with_share_memory flags will Accelerated Reading

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

import tfrecords

options = tfrecords.TFRecordOptions(compression_type=tfrecords.TFRecordCompressionType.NONE)


def test_write(filename, N=3, context='aaa'):
    with tfrecords.TFRecordWriter(filename, options=options) as file_writer:
        batch_data = []
        for i in range(N):
            d = context + '____' + str(i)
            batch_data.append(d)
            if (i + 1) % 100 == 0:
                file_writer.write_batch(batch_data)
                batch_data.clear()
        if len(batch_data):
            file_writer.write_batch(batch_data)
            batch_data.clear()


def test_record_iterator(example_paths):
    print('test_record_iterator')
    for example_path in example_paths:
        iterator = tfrecords.tf_record_iterator(example_path, options=options, skip_bytes=0, with_share_memory=True)
        offset_list = iterator.read_offsets(0)
        count = iterator.read_count(0)
        print(count)
        num = 0
        for iter in iterator:
            num += 1
            print(iter)


def test_random_reader(example_paths):
    print('test_random_reader')
    for example_path in example_paths:
        file_reader = tfrecords.tf_record_random_reader(example_path, options=options, with_share_memory=True)
        last_pos = 0
        while True:
            try:
                x, pos = file_reader.read(last_pos)
                print(x, pos)
                last_pos = pos

            except Exception as e:
                break


def test_random_reader2(example_paths):
    print('test_random_reader2')
    for example_path in example_paths:
        file_reader = tfrecords.tf_record_random_reader(example_path, options=options, with_share_memory=True)
        skip_bytes = 0
        offset_list = file_reader.read_offsets(skip_bytes)
        for offset, length in offset_list:
            x, _ = file_reader.read(offset)
            print(x)


test_write('d:/example.tfrecords0', 3, 'file0')

example_paths = tfrecords.glob('d:/example.tfrecords*')
print(example_paths)
test_record_iterator(example_paths)
print()
test_random_reader(example_paths)
print()
test_random_reader2(example_paths)
print()

2. leveldb read and write demo

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

from tfrecords import LEVELDB

db_path = 'd:/example_leveldb'


def test_write(db_path):
    options = LEVELDB.LeveldbOptions(create_if_missing=True, error_if_exists=False)
    file_writer = LEVELDB.Leveldb(db_path, options)

    keys, values = [], []
    for i in range(30):
        keys.append(b"input_" + str(i).encode())
        keys.append(b"label_" + str(i).encode())
        values.append(b"xiaoming" + str(i).encode())
        values.append(b"zzs" + str(i).encode())
        if (i + 1) % 1000 == 0:
            file_writer.put_batch(keys, values)
            keys.clear()
            values.clear()
    if len(keys):
        file_writer.put_batch(keys, values)
        keys.clear()
        values.clear()

    file_writer.close()


def test_read(db_path):
    options = LEVELDB.LeveldbOptions(create_if_missing=False, error_if_exists=False)
    reader = LEVELDB.Leveldb(db_path, options)

    def show():
        it = reader.get_iterater(reverse=False)
        i = 0
        for item in it:
            print(i, item)
            i += 1

    def test_find(key):
        value = reader.get(key)
        print('find', type(value), value)

    show()

    test_find(b'input_0')
    test_find(b'input_5')
    test_find(b'input_10')

    reader.close()


test_write(db_path)
test_read(db_path)

3. lmdb read and write demo

# -*- coding: utf-8 -*-
# @Time    : 2022/9/8 15:49

from tfrecords import LMDB

db_path = 'd:/example_lmdb'


def test_write(db_path):
    options = LMDB.LmdbOptions(env_open_flag=0,
                               env_open_mode=0o664,  # 8进制表示
                               txn_flag=0,
                               dbi_flag=0,
                               put_flag=0)
    file_writer = LMDB.Lmdb(db_path, options, map_size=1024 * 1024 * 10)
    keys, values = [], []
    for i in range(30):
        keys.append(b"input_" + str(i).encode())
        keys.append(b"label_" + str(i).encode())
        values.append(b"xiaoming_" + str(i).encode())
        values.append(b"zzs_" + str(i).encode())
        if (i + 1) % 1000 == 0:
            file_writer.put_batch(keys, values)
            keys.clear()
            values.clear()
    if len(keys):
        file_writer.put_batch(keys, values)
    file_writer.close()


def test_read(db_path):
    options = LMDB.LmdbOptions(env_open_flag=LMDB.LmdbFlag.MDB_RDONLY,
                               env_open_mode=0o664,  # 8进制表示
                               txn_flag = 0, # LMDB.LmdbFlag.MDB_RDONLY
                               dbi_flag=0,
                               put_flag=0)
    reader = LMDB.Lmdb(db_path, options, map_size=0)

    def show():
        it = reader.get_iterater(reverse=False)
        i = 0
        for item in it:
            print(i, item)
            i += 1

    def test_find(key):
        value = reader.get(key)
        print('find', type(value), value)

    show()
    test_find('input0')
    test_find('input5')
    test_find(b'input10')
    reader.close()


test_write(db_path)
test_read(db_path)

4. arrow demo

Stream

from tfrecords.python.io.arrow import IPC_Writer,IPC_StreamReader,arrow

path_file = "d:/tmp/data.arrow"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0,1,4])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa","你是谁","张三"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema = schema,arrays=[a,b])
    fs = IPC_Writer(path_file,schema,with_stream = True)
    fs.write_table(table)
    fs.close()

def test_read():
    fs = IPC_StreamReader(path_file)
    table = fs.read_all()
    fs.close()
    print(table)

    col = table.GetColumnByName('text')
    text_list = col.chunk(0)
    for i in range(text_list.length()):
        x = text_list.Value(i)
        print(type(x), x)


test_write()
test_read()

file

from tfrecords.python.io.arrow import IPC_Writer,IPC_StreamReader,IPC_MemoryMappedFileReader,arrow

path_file = "d:/tmp/data.arrow"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0,1,4])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa","你是谁","张三"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema = schema,arrays=[a,b])
    fs = IPC_Writer(path_file,schema,with_stream = False)
    fs.write_table(table)
    fs.close()


def test_read():

    fs = IPC_MemoryMappedFileReader(path_file)
    for i in range(fs.num_record_batches()):
        batch = fs.read_batch(i)
        print(batch)
    fs.close()


test_write()
test_read()

4. parquet demo

from tfrecords.python.io.arrow import ParquetWriter,IPC_StreamReader,ParquetReader,arrow
path_file = "d:/tmp/data.parquet"

def test_write():
    schema = arrow.schema([
        arrow.field('id', arrow.int32()),
        arrow.field('text', arrow.utf8())
    ])

    a = arrow.Int32Builder()
    a.AppendValues([0, 1, 4, 5])
    a = a.Finish().Value()

    b = arrow.StringBuilder()
    b.AppendValues(["aaaa", "你是谁", "张三", "李赛"])
    b = b.Finish().Value()

    table = arrow.Table.Make(schema=schema, arrays=[a, b])

    fs = ParquetWriter(path_file, schema)
    fs.write_table(table)
    fs.close()

def test_read():

    fs = ParquetReader(path_file,options=dict(buffer_size=2))
    table = fs.read_table()
    fs.close()
    table = table.Flatten().Value()
    print(table)

    col = table.GetColumnByName('text')
    text_list = col.chunk(0)
    for i in range(text_list.length()):
        x = text_list.Value(i)
        print(type(x),x)


test_write()
test_read()

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distributions

No source distribution files available for this release.See tutorial on generating distribution archives.

Built Distributions

If you're not sure about the file name format, learn more about wheel file names.

tfrecords-0.3.1-cp314-cp314-win_amd64.whl (14.4 MB view details)

Uploaded CPython 3.14Windows x86-64

tfrecords-0.3.1-cp314-cp314-manylinux2014_x86_64.whl (18.2 MB view details)

Uploaded CPython 3.14

tfrecords-0.3.1-cp314-cp314-manylinux2014_aarch64.whl (14.3 MB view details)

Uploaded CPython 3.14

tfrecords-0.3.1-cp313-cp313-win_amd64.whl (13.8 MB view details)

Uploaded CPython 3.13Windows x86-64

tfrecords-0.3.1-cp313-cp313-manylinux2014_x86_64.whl (18.2 MB view details)

Uploaded CPython 3.13

tfrecords-0.3.1-cp313-cp313-manylinux2014_aarch64.whl (14.3 MB view details)

Uploaded CPython 3.13

tfrecords-0.3.1-cp312-cp312-win_amd64.whl (13.8 MB view details)

Uploaded CPython 3.12Windows x86-64

tfrecords-0.3.1-cp312-cp312-manylinux2014_x86_64.whl (18.2 MB view details)

Uploaded CPython 3.12

tfrecords-0.3.1-cp312-cp312-manylinux2014_aarch64.whl (14.3 MB view details)

Uploaded CPython 3.12

tfrecords-0.3.1-cp311-cp311-win_amd64.whl (13.8 MB view details)

Uploaded CPython 3.11Windows x86-64

tfrecords-0.3.1-cp311-cp311-manylinux2014_x86_64.whl (18.2 MB view details)

Uploaded CPython 3.11

tfrecords-0.3.1-cp311-cp311-manylinux2014_aarch64.whl (14.4 MB view details)

Uploaded CPython 3.11

tfrecords-0.3.1-cp310-cp310-win_amd64.whl (13.8 MB view details)

Uploaded CPython 3.10Windows x86-64

tfrecords-0.3.1-cp310-cp310-manylinux2014_x86_64.whl (18.2 MB view details)

Uploaded CPython 3.10

tfrecords-0.3.1-cp310-cp310-manylinux2014_aarch64.whl (14.4 MB view details)

Uploaded CPython 3.10

File details

Details for the file tfrecords-0.3.1-cp314-cp314-win_amd64.whl.

File metadata

  • Download URL: tfrecords-0.3.1-cp314-cp314-win_amd64.whl
  • Upload date:
  • Size: 14.4 MB
  • Tags: CPython 3.14, Windows x86-64
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.10.9

File hashes

Hashes for tfrecords-0.3.1-cp314-cp314-win_amd64.whl
Algorithm Hash digest
SHA256 0d6a7e677243630c9575ae525125261d1e1d94da070deda45701b842479f1815
MD5 f482ec15c4ce6818501ddace40017a80
BLAKE2b-256 fa869d54c95baeae51099e057e8c8fb437363aa8769c325ffdb76477ccdcc807

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp314-cp314-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp314-cp314-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 da1d1f078cf80e7562615370bc935412ed847d7282e4f6164703691509379194
MD5 147d2da76c70a8cbf723aa98179040d8
BLAKE2b-256 bfed9102411720c3b8dfd81b4035e34e47e896242c54f7586541cead61f5f4d1

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp314-cp314-manylinux2014_aarch64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp314-cp314-manylinux2014_aarch64.whl
Algorithm Hash digest
SHA256 93bc19648e4a23923f592c5338862d73b414aa036651a3497685c01f74e264d1
MD5 c7240e3a9a8d8a8e01080ba7c80ed352
BLAKE2b-256 cac00c88861b8083e337e97f7f1c7604d0f6bc76f71c5494807612a8193954fb

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp313-cp313-win_amd64.whl.

File metadata

  • Download URL: tfrecords-0.3.1-cp313-cp313-win_amd64.whl
  • Upload date:
  • Size: 13.8 MB
  • Tags: CPython 3.13, Windows x86-64
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.10.9

File hashes

Hashes for tfrecords-0.3.1-cp313-cp313-win_amd64.whl
Algorithm Hash digest
SHA256 a770699ec43c62c114073c66c4f6e55ff41e10aa37261d8e7ba2766f732fbe2c
MD5 5b34e49f82a295c531afb8f4f0e1b828
BLAKE2b-256 4011c097875595f06109d3980653d99be6a0def26114474acfbbe3da0767c3a2

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp313-cp313-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp313-cp313-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 c9f9ab44570c83a1d0b815654cc126adcf14361c494f7f0d79152392951ceff1
MD5 bb2b09531bf63fbaa1ed98888626b329
BLAKE2b-256 0a813117e84c669e275ba2fab6fba42c0f6a0ad558efe02b0d896d800c7d06ed

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp313-cp313-manylinux2014_aarch64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp313-cp313-manylinux2014_aarch64.whl
Algorithm Hash digest
SHA256 a0257fda554644ee640558d18c6a872c53f26af301405fb43376b489467f578f
MD5 dbeeade9b1a2c18e5637d0885bf3ef85
BLAKE2b-256 49782b540d00a71fd9df25159cc9d69168b11c61e551531a7cad07011b418145

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp312-cp312-win_amd64.whl.

File metadata

  • Download URL: tfrecords-0.3.1-cp312-cp312-win_amd64.whl
  • Upload date:
  • Size: 13.8 MB
  • Tags: CPython 3.12, Windows x86-64
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.10.9

File hashes

Hashes for tfrecords-0.3.1-cp312-cp312-win_amd64.whl
Algorithm Hash digest
SHA256 c68509e5938e42a43c00ee81206e9a4c27ab299440fb3c8bdde109399cb51edf
MD5 736c538d70bd215cb23c8ef0ce20c4cc
BLAKE2b-256 89a06f6c92f741fbfae4eec7e82d45d0811ef6fe67e419a490863a18efe116a1

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp312-cp312-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp312-cp312-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 4fe7255acd4a3fb8e0bb70341b4867fcbccbb03b08f5cf1ab276f271f55a5600
MD5 0672b8d072f900da25675d1cfc02c6aa
BLAKE2b-256 2111a5c28eb7464afe27915d514c27701b998fefef152af49267fadb0781d9d2

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp312-cp312-manylinux2014_aarch64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp312-cp312-manylinux2014_aarch64.whl
Algorithm Hash digest
SHA256 0676736e5985c29556ee44ea101c9b4dc20669812e9522dcc9c56d2a5cce5eca
MD5 8c5bc954aea87363e805063792ac4662
BLAKE2b-256 c69cfc3da624637d28147e86141216508e7eee1bbd96c362b6f739407d1b7e1c

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp311-cp311-win_amd64.whl.

File metadata

  • Download URL: tfrecords-0.3.1-cp311-cp311-win_amd64.whl
  • Upload date:
  • Size: 13.8 MB
  • Tags: CPython 3.11, Windows x86-64
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.10.9

File hashes

Hashes for tfrecords-0.3.1-cp311-cp311-win_amd64.whl
Algorithm Hash digest
SHA256 fca790a514ff3164a954647180e6697db81fa51dae185d4573e41dc34bcacf88
MD5 0b1e1e682b0ddf3916025e6f0e732762
BLAKE2b-256 36131a4747c8836dccde306bb96f77aaab6a12b20fcf0ef27115c06936907292

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp311-cp311-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp311-cp311-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 29c73ea10589f578d7e5b7613849a79d8c65d2102b48567f3b0a13e757add570
MD5 6ae38ffeb47a42c85355db0ae8aa2fb1
BLAKE2b-256 9c0eb1185e147e32f7eaaa2cc9487e329ef9524c263e3b4e8a5f8103389dced9

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp311-cp311-manylinux2014_aarch64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp311-cp311-manylinux2014_aarch64.whl
Algorithm Hash digest
SHA256 2894393d53c8d555ea408408be3e14b05ecd69c6b8205696c52405f96a383ca5
MD5 dc5ee5a12cd33fff97149eadf5e7de6d
BLAKE2b-256 6b109f143334c42fe76303b05d6235c535ba0c3958500e29fdef44c155a849c8

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp310-cp310-win_amd64.whl.

File metadata

  • Download URL: tfrecords-0.3.1-cp310-cp310-win_amd64.whl
  • Upload date:
  • Size: 13.8 MB
  • Tags: CPython 3.10, Windows x86-64
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.10.9

File hashes

Hashes for tfrecords-0.3.1-cp310-cp310-win_amd64.whl
Algorithm Hash digest
SHA256 3e21f2824bd6c7ed5813bc761d31d3c56b7b1256cdb685b82aae9e7f99f14733
MD5 303bf9893a689b3f8fd0f8fa3562ec51
BLAKE2b-256 da7700fa60e4b65b11adffbd31b666cf9ccd76496ec3b75a80f29aba9e9226fd

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp310-cp310-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp310-cp310-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 7abcb42dd23bc4f70ce90dcd6744222f73628e8cc7c8a8b44da59fc58e920c3f
MD5 94bd3a445524acc99890a4b6782f6a84
BLAKE2b-256 44669417486d2ea41003244ce983f50e727e5659d0f53367c8524cc2d46bce25

See more details on using hashes here.

File details

Details for the file tfrecords-0.3.1-cp310-cp310-manylinux2014_aarch64.whl.

File metadata

File hashes

Hashes for tfrecords-0.3.1-cp310-cp310-manylinux2014_aarch64.whl
Algorithm Hash digest
SHA256 31e8f48fee04526552bb8667ba02707f7ec92b34e740133405f01b9fa97052cb
MD5 7ba3c42076741d0bde5af037e05fd19d
BLAKE2b-256 fb73232191066e58aead039ff8285e2694fc0da4204c7938736fd107d7ecfbc7

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page