Regex search in files
Project description
Regex search in all kind of files (in chunks) + binary sub
pip install regexfilesearch
import regex
from regexfilesearch import regex_filesearch, get_all_files_in_folders_with_subdir_limit
alf = get_all_files_in_folders_with_subdir_limit(
folders=r"C:\Users\Gamer\anaconda3\envs\stopjogo", maxsubdirs=0
)
df = regex_filesearch(
files=alf,
regexpressions=[r"\bchar\b", r"import.*?pandas "],
with_context=True,
chunksize=8192,
flags=regex.IGNORECASE,
)
print(df.loc[df.aa_bytesstart > 1][:20].to_string(max_colwidth=10))
aa_chunkno aa_file aa_bytesstart aa_bytesend aa_chunkstart aa_chunkend aa_regex aa_regexpattern aa_replace aa_get_context aa_partial_result aa_full_match aa_partial_match aa_groups aa_allcaptures aa_result aa_result_utf8 aa_bytesstart_whole aa_bytesend_whole
12 1 C:\Use... 52 56 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 52 56
13 1 C:\Use... 182 186 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 182 186
14 1 C:\Use... 217 221 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 217 221
15 1 C:\Use... 254 258 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 254 258
16 1 C:\Use... 440 444 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 440 444
17 1 C:\Use... 582 586 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 582 586
18 1 C:\Use... 622 626 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 622 626
19 1 C:\Use... 661 665 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 661 665
20 1 C:\Use... 52 56 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 52 56
21 1 C:\Use... 182 186 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 182 186
22 1 C:\Use... 217 221 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 217 221
23 1 C:\Use... 254 258 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 254 258
24 1 C:\Use... 440 444 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 440 444
25 1 C:\Use... 582 586 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 582 586
26 1 C:\Use... 622 626 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 622 626
27 1 C:\Use... 661 665 0 16384 <regex... regex.... f(newf... f() False (b'#in... <NA> ((0, b... 0 b'char' char 661 665
29 1 C:\Use... 20080 20084 0 16384 <regex... regex.... f(newf... f() False (b'<\x... <NA> ((0, b... 0 b'char' char 20080 20084
30 1 C:\Use... 20236 20240 0 16384 <regex... regex.... f(newf... f() False (b'<\x... <NA> ((0, b... 0 b'char' char 20236 20240
31 1 C:\Use... 20260 20264 0 16384 <regex... regex.... f(newf... f() False (b'<\x... <NA> ((0, b... 0 b'char' char 20260 20264
32 1 C:\Use... 21457 21461 0 16384 <regex... regex.... f(newf... f() False (b'<\x... <NA> ((0, b... 0 b'char' char 21457 21461
print(df.loc[df.aa_bytesstart > 1][:20].iloc[0].to_string())
aa_chunkno 1
aa_file C:\Users\Gamer\anaconda3\envs\stopjogo\mademod...
aa_bytesstart 52
aa_bytesend 56
aa_chunkstart 0
aa_chunkend 16384
aa_regex <regex.Match object; span=(52, 56), match=b'ch...
aa_regexpattern regex.Regex(b'\\bchar\\b', flags=regex.A | reg...
aa_replace f(newfile:str, newbytes:bytes)
aa_get_context f()
aa_partial_result False
aa_full_match (b'#include <stdio.h>\r\n\r\n\r\n\r\n\r\n// np...
aa_partial_match <NA>
aa_groups ((0, b'char'), ((52, 56),))
aa_allcaptures 0
aa_result b'char'
aa_result_utf8 char
aa_bytesstart_whole 52
aa_bytesend_whole 56
# df.loc[df.aa_bytesstart > 1][:20].aa_replace.iloc[0]('c:\\testestest.py', 'CHARRRRRRR')
# substitutes one match, never changes the original file
// np=np.byte, c=signed CHARRRRRRR, ctypes=ctypes.c_byte, code=b
// numpy.int8: 8-bit signed integer (-128 to 127).
// Signed integer type, compatible with C char
void cfun_byte(const signed char *indatav, size_t size, signed char *outdatav )
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
regexfilesearch-0.11.tar.gz
(7.3 kB
view details)
Built Distribution
File details
Details for the file regexfilesearch-0.11.tar.gz
.
File metadata
- Download URL: regexfilesearch-0.11.tar.gz
- Upload date:
- Size: 7.3 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.9.13
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 114321029dcd13174a44fe742527f479b49f66264862b3354139932af0aef469 |
|
MD5 | 3bcedb498b3d0a825303db658bd36a6d |
|
BLAKE2b-256 | 45641a162c3f4d034b4674079ead9cde0f121685e4a8e65589441bba3ebe48d0 |
File details
Details for the file regexfilesearch-0.11-py3-none-any.whl
.
File metadata
- Download URL: regexfilesearch-0.11-py3-none-any.whl
- Upload date:
- Size: 8.8 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.9.13
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | e39c92075aa91a81bca9d0b46cdc7eb6e6875fdbef2c5f0572bf28c07dec1431 |
|
MD5 | f1c62f92187b83a982552345a26abab4 |
|
BLAKE2b-256 | 1829418fa4aa87f64491e10401cea4e721a3cb45b86e76a97b5aa306e81d7ee8 |