Skip to main content

Groups lists/NumPy arrays by continuous sequence

Project description

Group lists/NumPy arrays by continuous sequence

$pip install group-by-continuous-sequence
import numpy as np
import random
from group_by_continuous_sequence import search_sequence_in_list, search_sequence_in_list_with_repeated_numbers, find_sequence_in_np_array

iterable = random.choices(range(2, 6), k=1000)


# Out[4]: [4, 3, 4, 2, 3, 2, 3, 3, 4, 5, 2, 5, 2, 2, 2, 3, 4, 4, 4, 2, 3, 3, 5, 4, 4, 2, 5, 4 ...]

# Find groups of consecutive items. (no difference between the numbers in this case) - first number in the tuple is the index, the second is the value
#
a1 = search_sequence_in_list(
    iterable, difference=0, return_index=True, return_values=True
)
# [[(0, 4)],
#  [(1, 3)],
#  [(2, 4)],
#  [(3, 2)],
#  [(4, 3)],
#  [(5, 2)],
#  [(6, 3), (7, 3)],
#  [(8, 4)],
#  [(9, 5)],
#  [(10, 2)],
#  [(11, 5)],
#  [(12, 2), (13, 2), (14, 2)],
#  [(15, 3)],
#  [(16, 4), (17, 4), (18, 4)],
#  [(19, 2)],
#  [(20, 3), (21, 3)],
#  [(22, 5)],
#  [(23, 4), (24, 4)],
#  [(25, 2)],
#  [(26, 5)],
#  [(27, 4), (28, 4)],
#  [(29, 3)],
#  [(30, 2)],
#  [(31, 4)],
#  [(32, 5), (33, 5)],
#  [(34, 2)],
#  [(35, 4), (36, 4)]
#  ...


# (difference of 1 between consecutive numbers) - only index

a2 = search_sequence_in_list(
    iterable, difference=1, return_index=True, return_values=False
)
# [[0],
#  [1, 2],
#  [3, 4],
#  [5, 6],
#  [7, 8, 9],
#  [10],
#  [11],
#  [12],
#  [13],
#  [14, 15, 16],
#  [17],
#  [18],
#  [19, 20],
#  [21],
#  [22],
#  [23],
#  [24],
#  [25],
#  [26],
#  [27],
#  [28],
#  [29],
#  [30],
#  [31, 32],
#  ...


# (difference of 2 between consecutive numbers) - only values

a3 = search_sequence_in_list(
    iterable, difference=2, return_values=True, return_index=False
)
# ...
# [4],
# [4],
# [2],
# [3],
# [3, 5],
# [4],
# [4],
# [2],
# [5],
# [4],
# [4],
# [3],
# [2, 4],
# [5],
# [5],
# [2, 4],
# [4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5],
# [5],
# [2, 4],
# ...


# (difference of 3 between consecutive numbers) - only values

a4 = search_sequence_in_list(
    iterable, difference=3, return_values=True, return_index=False
)
# [[4],
#  [3],
#  [4],
#  [2],
#  [3],
#  [2],
#  [3],
#  [3],
#  [4],
#  [5],
#  [2, 5],
#  [2],
#  [2],
#  [2],
#  [3],
#  [4],
#  [4],
#  [4],
#  [2],
#  [3],
#  [3],
#  [5],
#  [4],
#  [4],
#  [2, 5],
#  [4],
#  [4],
#  [3],
#  [2],
#  [4],
#  [5],
#  [5],
#  [2],
#  [4],
#  [4],
#  [2, 5] ...]


# ((19, 2), (20, 3), (21, 3)) # Includes repeated numbers
# if ignore_only_repeated is True: Matches like: [(1,4), (1,4)] will be ignored, because there
# is only one hit (4)

a21 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=1,
    return_index=True,
    return_values=True,
    ignore_only_repeated=True,
)

# [(0, 4),
#  ((1, 3), (2, 4)),
#  ((3, 2), (4, 3)),
#  ((5, 2), (6, 3), (7, 3), (8, 4), (9, 5)),
#  (10, 2),
#  (11, 5),
#  ((12, 2), (13, 2), (14, 2), (15, 3), (16, 4), (17, 4), (18, 4)),
#  ((19, 2), (20, 3), (21, 3)),
#  (22, 5),
#  (23, 4),
#  (24, 4),
#  (25, 2),
#  (26, 5),
#  (27, 4),
#  (28, 4),
#  (29, 3),
#  (30, 2),
#  ((31, 4), (32, 5), (33, 5)),
#  (34, 2), ...
#

# difference of 2 between consecutive numbers), accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)
a31 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=2,
    return_index=False,
    return_values=True,
    ignore_only_repeated=False,
)

# [3, 3],
# [4],
# [5],
# [2],
# [5],
# [2, 2, 2],
# [3],
# [4, 4, 4],
# [2],
# [3, 3, 5],
# [4, 4],
# [2],
# [5],
# [4, 4],
# [3],
# [2, 4],
# [5, 5],
# [2, 4, 4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5, 5],
# [2, 4],
# [2],
# [3],
# [4],
# [3, 3],
# [2, 2, 2],


# difference of 2 between consecutive numbers), not accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)

a41 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=3,
    return_index=True,
    return_values=True,
    ignore_only_repeated=True,
)

# (767, 5),
# ((768, 2), (769, 5), (770, 5)),
# (771, 2),
# (772, 3),
# (773, 2),
# (774, 2),
# (775, 3),
# (776, 3),
# (777, 2),
# (778, 4),
# (779, 5),
# (780, 3),
# (781, 4),
# ((782, 2), (783, 2), (784, 5)),
# (785, 4),
# (786, 5),
# (787, 5),
# (788, 4),
# (789, 4),
# ((790, 2), (791, 5)),
# (792, 3),
# (793, 2),
# (794, 4),
# (795, 4),
# ((796, 2), (797, 5)),
# (798, 2),
# (799, 4),
# (800, 4),
# (801, 2),
# (802, 4),
# ((803, 2), (804, 2), (805, 5)),
# (806, 3),
# (807, 5),
# (808, 4),
# ((809, 2), (810, 2), (811, 5), (812, 5), (813, 5)),
# (814, 4),
# (815, 4),
# (816, 3),

# if inonline is True: all values are in one row [1,2,3,4,5,6,7,8] instead of [[1,2,3,4],[5,6,7,8]]
m = find_sequence_in_np_array(np.asarray(iterable), [3, 4, 5])


# [{'inoneline': True,
#   'location': array([[7],
#          [8],
#          [9]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[85],
#          [86],
#          [87]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[176],
#          [177],
#          [178]], dtype=int64), ...

# Works with nested arrays too
m = find_sequence_in_np_array(np.asarray(iterable).reshape((10, 10, 10)), [3, 4, 5])
# m
# Out[28]:
# [{'inoneline': True,
#   'location': array([[0, 0, 7],
#          [0, 0, 8],
#          [0, 0, 9]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[0, 8, 5],
#          [0, 8, 6],
#          [0, 8, 7]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[1, 7, 6],
#          [1, 7, 7],
#          [1, 7, 8]], dtype=int64),

# and also with strings
m = find_sequence_in_np_array(
    np.asarray(iterable).reshape((10, 10, 5, 2)).astype(str), ["3", "4", "5"]
)
# [{'inoneline': False,
#   'location': array([[0, 0, 3, 1],
#          [0, 0, 4, 0],
#          [0, 0, 4, 1]], dtype=int64),
#   'values': ['3', '4', '5']},
#  {'inoneline': False,
#   'location': array([[0, 8, 2, 1],
#          [0, 8, 3, 0],
#          [0, 8, 3, 1]], dtype=int64),
#   'values': ['3', '4', '5']},
#  {'inoneline': False,
#   'location': array([[1, 7, 3, 0],
#          [1, 7, 3, 1],
#          [1, 7, 4, 0]], dtype=int64),
#   'values': ['3', '4', '5']},

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

group_by_continuous_sequence-0.10.tar.gz (8.6 kB view details)

Uploaded Source

Built Distribution

File details

Details for the file group_by_continuous_sequence-0.10.tar.gz.

File metadata

File hashes

Hashes for group_by_continuous_sequence-0.10.tar.gz
Algorithm Hash digest
SHA256 d718f050caadb766eb1a8c45596902b18e042c9daa8f2ffabfe2d1c9d80a6793
MD5 1f4c3d6324735f11b6f23885fd879ab9
BLAKE2b-256 b492a52cfd9337847ddce669d82b26eeba101071186a7a32538ec2ee96450a3d

See more details on using hashes here.

File details

Details for the file group_by_continuous_sequence-0.10-py3-none-any.whl.

File metadata

File hashes

Hashes for group_by_continuous_sequence-0.10-py3-none-any.whl
Algorithm Hash digest
SHA256 f1ab7084bbb5dc893d21b8480ce8fa1486b6ddf300f24bd8b59504790c00d5e5
MD5 6e28585a5139d93f2412e90566476c07
BLAKE2b-256 9ceeaa7c463d0fbac922502cf84b4ed2420f74c41789304e1515e4aeb4e2f200

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page