Skip to main content

Document Scanner SDK for document edge detection, border cropping, perspective correction and brightness adjustment

Project description

Python Document Scanner SDK

The project is a Python binding to Dynamsoft C/C++ Document Scanner SDK. It aims to help developers quickly build desktop document scanner applications in Python on Windows and Linux.

About Dynamsoft Document Scanner

Get a 30-day FREE trial license to activate the SDK.

Supported Python Edition

  • Python 3.x

Dependencies

pip install opencv-python

Command-line Usage

# Scan documents from images
$ scandocument -f <file-name> -l <license-key>

# Scan documents from camera video stream
$ scandocument -c 1 -l <license-key>

Quick Start

  • Scan documents from an image file:

    import argparse
    import docscanner
    import sys
    import numpy as np
    import cv2
    import time
    
    def showNormalizedImage(name, normalized_image):
        mat = docscanner.convertNormalizedImage2Mat(normalized_image)
        cv2.imshow(name, mat)
        return mat
    
    def process_file(filename, scanner):
        image = cv2.imread(filename)
        results = scanner.detectMat(image)
        for result in results:
            x1 = result.x1
            y1 = result.y1
            x2 = result.x2
            y2 = result.y2
            x3 = result.x3
            y3 = result.y3
            x4 = result.x4
            y4 = result.y4
            
            normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
            showNormalizedImage("Normalized Image", normalized_image)
            cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
        
        cv2.imshow('Document Image', image)
        cv2.waitKey(0)
        
        normalized_image.save(str(time.time()) + '.png')
        print('Image saved')
    
    def scandocument():
        """
        Command-line script for scanning documents from a given image
        """
        parser = argparse.ArgumentParser(description='Scan documents from an image file')
        parser.add_argument('-f', '--file', help='Path to the image file')
        parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')
        args = parser.parse_args()
        # print(args)
        try:
            filename = args.file
            license = args.license
            
            if filename is None:
                parser.print_help()
                return
            
            # set license
            if  license == '':
                docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
            else:
                docscanner.initLicense(license)
                
            # initialize mrz scanner
            scanner = docscanner.createInstance()
            ret = scanner.setParameters(docscanner.Templates.color)
    
            if filename is not None:
                process_file(filename, scanner)
                
        except Exception as err:
            print(err)
            sys.exit(1)
    
    scandocument()
    

    python document scanner from file

  • Scan documents from camera video stream:

    import argparse
    import docscanner
    import sys
    import numpy as np
    import cv2
    import time
    
    g_results = None
    g_normalized_images = []
    
    def callback(results):
        global g_results
        g_results = results
    
    def showNormalizedImage(name, normalized_image):
        mat = docscanner.convertNormalizedImage2Mat(normalized_image)
        cv2.imshow(name, mat)
        return mat
        
    def process_video(scanner):
        scanner.addAsyncListener(callback)
        
        cap = cv2.VideoCapture(0)
        while True:
            ret, image = cap.read()
            
            ch = cv2.waitKey(1)
            if ch == 27:
                break
            elif ch == ord('n'): # normalize image
                if g_results != None:
                    g_normalized_images = []
                    index = 0
                    for result in g_results:
                        x1 = result.x1
                        y1 = result.y1
                        x2 = result.x2
                        y2 = result.y2
                        x3 = result.x3
                        y3 = result.y3
                        x4 = result.x4
                        y4 = result.y4
                        
                        normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
                        g_normalized_images.append((str(index), normalized_image))
                        mat = showNormalizedImage(str(index), normalized_image)
                        index += 1
            elif ch == ord('s'): # save image
                for data in g_normalized_images:
                    # cv2.imwrite('images/' + str(time.time()) + '.png', image)
                    cv2.destroyWindow(data[0])
                    data[1].save(str(time.time()) + '.png')
                    print('Image saved')
                    
                g_normalized_images = []
                
            if image is not None:
                scanner.detectMatAsync(image)
            
            if g_results != None:
                for result in g_results:
                    x1 = result.x1
                    y1 = result.y1
                    x2 = result.x2
                    y2 = result.y2
                    x3 = result.x3
                    y3 = result.y3
                    x4 = result.x4
                    y4 = result.y4
                    
                    cv2.drawContours(image, [np.int0([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
                
            cv2.putText(image, 'Press "n" to normalize image', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            cv2.putText(image, 'Press "s" to save image', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            cv2.putText(image, 'Press "ESC" to exit', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            cv2.imshow('Document Scanner', image)
    
    def scandocument():
        """
        Command-line script for scanning documents from camera video stream.
        """
        parser = argparse.ArgumentParser(description='Scan documents from camera')
        parser.add_argument('-c', '--camera', default=False, type=bool, help='Whether to show the image')
        parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')
        args = parser.parse_args()
        # print(args)
        try:
            license = args.license
            camera = args.camera
            
            if camera is False:
                parser.print_help()
                return
            
            # set license
            if  license == '':
                docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
            else:
                docscanner.initLicense(license)
                
            # initialize mrz scanner
            scanner = docscanner.createInstance()
            ret = scanner.setParameters(docscanner.Templates.color)
    
            if camera is True:
                process_video(scanner)
                
        except Exception as err:
            print(err)
            sys.exit(1)
    
    scandocument()
    

    python document scanner from camera

Methods

  • docscanner.initLicense('YOUR-LICENSE-KEY') # set the license key

    docscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")
    
  • docscanner.createInstance() # create a Document Scanner instance

    scanner = docscanner.createInstance()
    
  • detectFile(filename) # do edge detection from an image file

    results = scanner.detectFile(<filename>)
    
  • detectMat(Mat image) # do edge detection from Mat

    image = cv2.imread(<filename>)
    results = scanner.detectMat(image)
    for result in results:
        x1 = result.x1
        y1 = result.y1
        x2 = result.x2
        y2 = result.y2
        x3 = result.x3
        y3 = result.y3
        x4 = result.x4
        y4 = result.y4
    
  • setParameters(Template) # Select color, binary or grayscale template

    scanner.setParameters(docscanner.Templates.color)
    
  • addAsyncListener(callback function) # start a native thread to run document scanning tasks

  • detectMatAsync(<opencv mat data>) # put a document scanning task into the native queue

    def callback(results):
        for result in results:
            print(result.x1)
            print(result.y1)
            print(result.x2)
            print(result.y2)
            print(result.x3)
            print(result.y3)
            print(result.x4)
            print(result.y4)
                                                        
    import cv2
    image = cv2.imread(<filename>)
    scanner.addAsyncListener(callback)
    scanner.detectMatAsync(image)
    sleep(5)
    
  • normalizeBuffer(mat, x1, y1, x2, y2, x3, y3, x4, y4) # do perspective correction from Mat

    normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
    
  • normalizeFile(filename, x1, y1, x2, y2, x3, y3, x4, y4) # do perspective correction from a file

    normalized_image = scanner.normalizeFile(<filename>, x1, y1, x2, y2, x3, y3, x4, y4)
    
  • normalized_image.save(filename) # save the normalized image to a file

    normalized_image.save(<filename>)
    
  • normalized_image.recycle() # release the memory of the normalized image

  • clearAsyncListener() # stop the native thread and clear the registered Python function

C/C++ API

To customize Python API based on C/C++, please refer to the online documentation.

How to Build the Python Document Scanner Extension

  • Create a source distribution:

    python setup.py sdist
    
  • setuptools:

    python setup_setuptools.py build
    python setup_setuptools.py develop 
    
  • Build wheel:

    pip wheel . --verbose
    # Or
    python setup.py bdist_wheel
    

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

document-scanner-sdk-1.0.2.tar.gz (10.9 MB view details)

Uploaded Source

Built Distributions

document_scanner_sdk-1.0.2-cp310-cp310-win_amd64.whl (4.3 MB view details)

Uploaded CPython 3.10 Windows x86-64

document_scanner_sdk-1.0.2-cp310-cp310-manylinux_2_24_x86_64.whl (6.9 MB view details)

Uploaded CPython 3.10 manylinux: glibc 2.24+ x86-64

document_scanner_sdk-1.0.2-cp39-cp39-win_amd64.whl (4.3 MB view details)

Uploaded CPython 3.9 Windows x86-64

document_scanner_sdk-1.0.2-cp39-cp39-manylinux_2_24_x86_64.whl (6.9 MB view details)

Uploaded CPython 3.9 manylinux: glibc 2.24+ x86-64

document_scanner_sdk-1.0.2-cp38-cp38-win_amd64.whl (4.3 MB view details)

Uploaded CPython 3.8 Windows x86-64

document_scanner_sdk-1.0.2-cp38-cp38-manylinux_2_24_x86_64.whl (6.9 MB view details)

Uploaded CPython 3.8 manylinux: glibc 2.24+ x86-64

document_scanner_sdk-1.0.2-cp37-cp37m-win_amd64.whl (4.3 MB view details)

Uploaded CPython 3.7m Windows x86-64

document_scanner_sdk-1.0.2-cp37-cp37m-manylinux_2_24_x86_64.whl (6.9 MB view details)

Uploaded CPython 3.7m manylinux: glibc 2.24+ x86-64

document_scanner_sdk-1.0.2-cp36-cp36m-win_amd64.whl (4.3 MB view details)

Uploaded CPython 3.6m Windows x86-64

document_scanner_sdk-1.0.2-cp36-cp36m-manylinux_2_24_x86_64.whl (6.9 MB view details)

Uploaded CPython 3.6m manylinux: glibc 2.24+ x86-64

File details

Details for the file document-scanner-sdk-1.0.2.tar.gz.

File metadata

  • Download URL: document-scanner-sdk-1.0.2.tar.gz
  • Upload date:
  • Size: 10.9 MB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.1 CPython/3.9.15

File hashes

Hashes for document-scanner-sdk-1.0.2.tar.gz
Algorithm Hash digest
SHA256 a3c236abd490c546263cc1d0f67ed4f0b946b9040aba5c29c34133676e7d2c52
MD5 2f0d5fd0cafea00528b2a6eb67041352
BLAKE2b-256 9d2881913ea3357ccb8a80932618ab871a1b9f14299f6285a17f8f404f835dbc

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp310-cp310-win_amd64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp310-cp310-win_amd64.whl
Algorithm Hash digest
SHA256 07507edc6bfee8e9b178af34ee252f16a6729d805f4029a619d3c69bd3ab3899
MD5 d996ce0e6e0c93c96a8f9d5b8398ce03
BLAKE2b-256 be783c01c7aac4c4df11fdc918e7f8c4afbcd5badb5562a749d094be99847958

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp310-cp310-manylinux_2_24_x86_64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp310-cp310-manylinux_2_24_x86_64.whl
Algorithm Hash digest
SHA256 abe060e26b6aaf098bf2b08e90c94a8415b063d94a71adbef0892b89c92583f9
MD5 dc7eb3c4c15e55ad217ebbe3208eb5af
BLAKE2b-256 04cf4d277921a3e9ef3ac922f9094b932a27348c9695184fe3bf3139e91da8f9

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp39-cp39-win_amd64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp39-cp39-win_amd64.whl
Algorithm Hash digest
SHA256 602b309d000eba7b135dabec7f2ac88d57edf687a0d4891ee36286911818dab3
MD5 c9921d07499a7b0c779e718e2c7393f2
BLAKE2b-256 fbe275e07e43bd1a4f18c97c90de431ebc639a5cf7eb01b485204a52523899df

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp39-cp39-manylinux_2_24_x86_64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp39-cp39-manylinux_2_24_x86_64.whl
Algorithm Hash digest
SHA256 0f9fa34dc12c4e02f810b96cb2cf62d4fd6cac1712f31bae1af9479276c11612
MD5 7c75699b8eb354778228aae384a73dc7
BLAKE2b-256 77b2baf3fdbc588de288eda2a404973eb8a8b971ca360594978378eb408dd124

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp38-cp38-win_amd64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp38-cp38-win_amd64.whl
Algorithm Hash digest
SHA256 fa98ca131306e4f7b25c5ea4d1d394ffeed8e159448b7599615db7740a4d5220
MD5 c40bd978aa6477103be8d1a5f9acce6d
BLAKE2b-256 6149e9be42e62f8e7201b90857cab411db53036d0aa05e54abf1e67464efba90

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp38-cp38-manylinux_2_24_x86_64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp38-cp38-manylinux_2_24_x86_64.whl
Algorithm Hash digest
SHA256 b53051a0d60b4b2ad265186c61283a29e6ef8b59dfaf83bac3d2c4698285fad4
MD5 fbb0dd5cc3af5eaab5e3d70116950882
BLAKE2b-256 836df0680b7fd68217308a9d4c98e7e6643420546081aee1da738013e38ba362

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp37-cp37m-win_amd64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp37-cp37m-win_amd64.whl
Algorithm Hash digest
SHA256 d3928ec3b5473632e193c4ce9aecb556a8b6544b5b62d6c9476ad1dedc08c1c6
MD5 f08c1692c90fae722f3dd6b722a227a4
BLAKE2b-256 8abc59bd3a126866b39b07186358fe32a7637c2a33578f81a923120c67360080

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp37-cp37m-manylinux_2_24_x86_64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp37-cp37m-manylinux_2_24_x86_64.whl
Algorithm Hash digest
SHA256 afa40773c8862285af4d4750c368756a11fd2e7fe0f323880604da388fe1e41d
MD5 47c1ac3362b2b2eeec061e12788926f0
BLAKE2b-256 2fff51112cbda4a533517b0a900a815cf09b27ee6643b6b444df3b6b30aad5fd

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp36-cp36m-win_amd64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp36-cp36m-win_amd64.whl
Algorithm Hash digest
SHA256 f2756f7d2528a7dfbb0d97d31323dc71328af250176825817395557cb9d686d7
MD5 0ca937151c8ff72914897a23f2fe7d57
BLAKE2b-256 9d443cc98ad26eb4ebfeeee21f43336ead7e1644e5a3497a435e05a07eeeaf62

See more details on using hashes here.

File details

Details for the file document_scanner_sdk-1.0.2-cp36-cp36m-manylinux_2_24_x86_64.whl.

File metadata

File hashes

Hashes for document_scanner_sdk-1.0.2-cp36-cp36m-manylinux_2_24_x86_64.whl
Algorithm Hash digest
SHA256 05bec8a27f1090c61353a256cb8ad5cbeb826c07b0b78045a2552a8033580cd7
MD5 6cdc73ed2efaff9e1c4336f5f0d071f0
BLAKE2b-256 20b9d6d24be8aa2f491736cd6da07282a547638464fa255914d3d31a924f4b93

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page