Cli util to sort large files
Project description
Cli util to sort large file.
Installation
$ python3 -m pip install fsort
Usage
Create file
fsort.py create-file --filename largefile.txt
Sort file
fsort.py external-sort --filename largefile.txt --size 20
Result would be output.txt
sorted file
Also you will notice files chunk_* to be aware of how a large file was splitted into small ones.
Source code
import string
from contextlib import ExitStack
from heapq import merge
from itertools import count, islice
from random import choice, randint
import click
chunk_names = []
@click.group()
def cli():
pass
@cli.command()
@click.option('--filename', help='File to sort')
@click.option('--size', default=50000, help='Size of each chunk')
def external_sort(filename, size):
"""
Sort file large file by chunks storing these chunks
into separate files with the given size – 50K by default.
Result would be `output.txt` file with a sorted text.
"""
with open(filename) as f:
for c in count(1):
sorted_chunk = sorted(islice(f, size))
if not sorted_chunk:
break
chunk_name = f'chunk_{c}.txt'
chunk_names.append(chunk_name)
with open(chunk_name, 'w') as chunk_file:
chunk_file.writelines(sorted_chunk)
with ExitStack() as stack, open('output.txt', 'w') as of:
files = (
stack.enter_context(open(chunk))
for chunk
in chunk_names
)
of.writelines(merge(*files))
def generate_text(length=None):
word_length = randint(8, length or 45)
return ''.join(choice(string.printable) for i in range(length))
@cli.command()
@click.option('--filename', default='large_file.txt', help="File's name")
@click.option('--lines', default=100, help='Rows in a file')
@click.option('--line-length', default=45, help="Max line length")
def create_file(filename, lines, line_length):
with open(filename, 'w') as f:
for i in range(lines):
f.write(f'{generate_text(line_length)}\n')
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
fsort-0.1.2.tar.gz
(1.8 kB
view hashes)
Built Distribution
fsort-0.1.2-py3-none-any.whl
(2.8 kB
view hashes)