Document Scanner SDK for document edge detection, border cropping, perspective correction and brightness adjustment
Project description
Python Document Scanner SDK
The project is a Python binding to Dynamsoft C/C++ Document Scanner SDK. It aims to help developers quickly build desktop document scanner applications in Python on Windows and Linux.
About Dynamsoft Document Scanner
Get a 30-day FREE trial license to activate the SDK.
Supported Python Edition
- Python 3.x
Dependencies
pip install opencv-python
Command-line Usage
# Scan documents from images
$ scandocument -f <file-name> -l <license-key>
# Scan documents from camera video stream
$ scandocument -c 1 -l <license-key>
Quick Start
-
Scan documents from an image file:
import argparse import docscanner import sys import numpy as np import cv2 import time def showNormalizedImage(name, normalized_image): mat = docscanner.convertNormalizedImage2Mat(normalized_image) cv2.imshow(name, mat) return mat def process_file(filename, scanner): image = cv2.imread(filename) results = scanner.detectMat(image) for result in results: x1 = result.x1 y1 = result.y1 x2 = result.x2 y2 = result.y2 x3 = result.x3 y3 = result.y3 x4 = result.x4 y4 = result.y4 normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4) showNormalizedImage("Normalized Image", normalized_image) cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2) cv2.imshow('Document Image', image) cv2.waitKey(0) normalized_image.save(str(time.time()) + '.png') print('Image saved') def scandocument(): """ Command-line script for scanning documents from a given image """ parser = argparse.ArgumentParser(description='Scan documents from an image file') parser.add_argument('-f', '--file', help='Path to the image file') parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key') args = parser.parse_args() # print(args) try: filename = args.file license = args.license if filename is None: parser.print_help() return # set license if license == '': docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==") else: docscanner.initLicense(license) # initialize mrz scanner scanner = docscanner.createInstance() ret = scanner.setParameters(docscanner.Templates.color) if filename is not None: process_file(filename, scanner) except Exception as err: print(err) sys.exit(1) scandocument()
-
Scan documents from camera video stream:
import argparse import docscanner import sys import numpy as np import cv2 import time g_results = None g_normalized_images = [] def callback(results): global g_results g_results = results def showNormalizedImage(name, normalized_image): mat = docscanner.convertNormalizedImage2Mat(normalized_image) cv2.imshow(name, mat) return mat def process_video(scanner): scanner.addAsyncListener(callback) cap = cv2.VideoCapture(0) while True: ret, image = cap.read() ch = cv2.waitKey(1) if ch == 27: break elif ch == ord('n'): # normalize image if g_results != None: g_normalized_images = [] index = 0 for result in g_results: x1 = result.x1 y1 = result.y1 x2 = result.x2 y2 = result.y2 x3 = result.x3 y3 = result.y3 x4 = result.x4 y4 = result.y4 normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4) g_normalized_images.append((str(index), normalized_image)) mat = showNormalizedImage(str(index), normalized_image) index += 1 elif ch == ord('s'): # save image for data in g_normalized_images: # cv2.imwrite('images/' + str(time.time()) + '.png', image) cv2.destroyWindow(data[0]) data[1].save(str(time.time()) + '.png') print('Image saved') g_normalized_images = [] if image is not None: scanner.detectMatAsync(image) if g_results != None: for result in g_results: x1 = result.x1 y1 = result.y1 x2 = result.x2 y2 = result.y2 x3 = result.x3 y3 = result.y3 x4 = result.x4 y4 = result.y4 cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2) cv2.putText(image, 'Press "n" to normalize image', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2) cv2.putText(image, 'Press "s" to save image', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2) cv2.putText(image, 'Press "ESC" to exit', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2) cv2.imshow('Document Scanner', image) def scandocument(): """ Command-line script for scanning documents from camera video stream. """ parser = argparse.ArgumentParser(description='Scan documents from camera') parser.add_argument('-c', '--camera', default=False, type=bool, help='Whether to show the image') parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key') args = parser.parse_args() # print(args) try: license = args.license camera = args.camera if camera is False: parser.print_help() return # set license if license == '': docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==") else: docscanner.initLicense(license) # initialize mrz scanner scanner = docscanner.createInstance() ret = scanner.setParameters(docscanner.Templates.color) if camera is True: process_video(scanner) except Exception as err: print(err) sys.exit(1) scandocument()
Methods
-
docscanner.initLicense('YOUR-LICENSE-KEY')
# set the license keydocscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
-
docscanner.createInstance()
# create a Document Scanner instancescanner = docscanner.createInstance()
-
detectFile(filename)
# do edge detection from an image fileresults = scanner.detectFile(<filename>)
-
detectMat(Mat image)
# do edge detection from Matimage = cv2.imread(<filename>) results = scanner.detectMat(image) for result in results: x1 = result.x1 y1 = result.y1 x2 = result.x2 y2 = result.y2 x3 = result.x3 y3 = result.y3 x4 = result.x4 y4 = result.y4
-
setParameters(Template)
# Select color, binary or grayscale templatescanner.setParameters(docscanner.Templates.color)
-
addAsyncListener(callback function)
# start a native thread to run document scanning tasks -
detectMatAsync(<opencv mat data>)
# put a document scanning task into the native queuedef callback(results): for result in results: print(result.x1) print(result.y1) print(result.x2) print(result.y2) print(result.x3) print(result.y3) print(result.x4) print(result.y4) import cv2 image = cv2.imread(<filename>) scanner.addAsyncListener(callback) scanner.detectMatAsync(image) sleep(5)
-
normalizeBuffer(mat, x1, y1, x2, y2, x3, y3, x4, y4)
# do perspective correction from Matnormalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
-
normalizeFile(filename, x1, y1, x2, y2, x3, y3, x4, y4)
# do perspective correction from a filenormalized_image = scanner.normalizeFile(<filename>, x1, y1, x2, y2, x3, y3, x4, y4)
-
normalized_image.save(filename)
# save the normalized image to a filenormalized_image.save(<filename>)
-
normalized_image.recycle()
# release the memory of the normalized image
C/C++ API
To customize Python API based on C/C++, please refer to the online documentation.
How to Build the Python Document Scanner Extension
-
Create a source distribution:
python setup.py sdist
-
setuptools:
python setup_setuptools.py build python setup_setuptools.py develop
-
Build wheel:
pip wheel . --verbose # Or python setup.py bdist_wheel
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distributions
Hashes for document-scanner-sdk-1.0.1.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | ae2db9f972619dc65069f01f55d3da381f2d423e66f47858f8bc85932bc42158 |
|
MD5 | 5d93e8a9718c8e7734ea88e5af93f7e6 |
|
BLAKE2b-256 | eb960e637a1bdfbbd25eb0fd62dfdbffcd866fe7f91911c1bb6acf497b0e1f6c |
Hashes for document_scanner_sdk-1.0.1-cp310-cp310-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8e7e9790ba3c6cb8f19a3b89496565cc38557c72556210b8c2ed6264fe4cde1b |
|
MD5 | 1432341c1255a50e969a2a412ddaf2b8 |
|
BLAKE2b-256 | 6ed5fe51e9e3b0e7ce4120b27337bb78c94e77f58e5ff073837761d1c0e450f9 |
Hashes for document_scanner_sdk-1.0.1-cp310-cp310-manylinux_2_24_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | e6be0be65f7ca601edd53e4d65fab8fb8e0942ac73361e02b13a9d91f376823f |
|
MD5 | 8af8605e99ad7779ac635f5d5d8f8ab1 |
|
BLAKE2b-256 | 63cd43532bede8df1ea3f7171c2ac615eb063fe193d13781e6803016c41e0496 |
Hashes for document_scanner_sdk-1.0.1-cp39-cp39-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | be5b3b93059590f6cc46c7ad004e59558aae9a36b84b3d9593007783d2f0ba06 |
|
MD5 | 84cb6a636628177ad46d35c0cc5bbf07 |
|
BLAKE2b-256 | ffff5228e0c627bbc6d9d939d4d6bdf0c7a884ec3c34178c3078820e74b9854c |
Hashes for document_scanner_sdk-1.0.1-cp39-cp39-manylinux_2_24_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2a1078331e9227d5ad798533ca6374b785c0dccf417f2d4a1f253eb3b8601ddc |
|
MD5 | 96da0f52759d02f64366548298eb03a2 |
|
BLAKE2b-256 | 4c60a73a0d00862bd53f42baf4f9a7408ebf2ba6faf4a3cd0fa6f4f3b299dd76 |
Hashes for document_scanner_sdk-1.0.1-cp38-cp38-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 85f8c988caca3949f47032342709f196f642c0ac10dbc44317f7240c1779e0de |
|
MD5 | 313fd7c4f578e644d3e86fbde7125f63 |
|
BLAKE2b-256 | efb16ab47941da94434a306734f803829880e13d7db896474d1567b6d11e3194 |
Hashes for document_scanner_sdk-1.0.1-cp38-cp38-manylinux_2_24_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | d9da84af9186aee1b3fa1520d846267f4955f898e2f4716ccc1fcb28db8e6be3 |
|
MD5 | 0059e6006a152d2c5698a08b675f739f |
|
BLAKE2b-256 | 007278d2f21ce76e233e9a116eaae8a1fb9b12f318b341a848ca1b532d26eed7 |
Hashes for document_scanner_sdk-1.0.1-cp37-cp37m-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | aa129feb564cdf88e42d931108a12378886cfdc5f1dff18a069050eb452c4e4d |
|
MD5 | abff49b6c88f77e9fb9abaa2cb580ce5 |
|
BLAKE2b-256 | 3f86937b71c2651282427cea1acabe90095ad15a37daad62be2a85f0dd297f7d |
Hashes for document_scanner_sdk-1.0.1-cp37-cp37m-manylinux_2_24_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2ec5d62e06e7c32b6a17e327ea92d7f6b4c46f24f350529995258c78cf5dcdf4 |
|
MD5 | 7a4a50fff4674d8876280c3379a1437c |
|
BLAKE2b-256 | 4a237e22ec0bb5aacd9c1925db191f7cc71ff7ff99e2b5d7f3a9b501118ea4b4 |
Hashes for document_scanner_sdk-1.0.1-cp36-cp36m-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | f0a0066e5f6072e2280919bc933700b21df0ddebfbb70632ebd3320edb2eb1b4 |
|
MD5 | 146fbf4c90c9acfc25896a650f3deff0 |
|
BLAKE2b-256 | 332a0d4601b2b8ebfc11b624b4377a925676416b5061184c0275eca70fa59c77 |
Hashes for document_scanner_sdk-1.0.1-cp36-cp36m-manylinux_2_24_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | b6a5eca220e1930cfb5cb883262d835ee2f31e2ff48f8a1b432bf82d4f3121aa |
|
MD5 | dce9da42a9adbce298d6241e0c6a315e |
|
BLAKE2b-256 | 63d1293491f6476de4b5866394716fa814018f3141102918f36536c79f609849 |