OCR on screenshots with tesseract - Windows only
Project description
Scan screenshots with Tesseract
Example 1 (screenshots from BlueStacks using ADB)
import cv2
import pandas as pd
import numpy as np
from time import sleep
from tesseract_window_scanner import pd_add_tesseract, sub_color_in_image, \
substitute_colors_with_equal_rgb_values, draw_tesseract_results, get_tesseractdf,keyboard,ScreenShots
def activate_stop():
global stop
stop = True
tesseractpath = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pd_add_tesseract(tesseractpath)
languages = "de+pt+deu"
sc2 = ScreenShots(
hwnd=None, adb_path=r"C:\ProgramData\adb\adb.exe", adb_serial="localhost:5735"
)
quit_key = "q"
sc2.imshow_adb(sleep_time=0.05, quit_key=quit_key)
sleep(1)
sc2.enable_show_edited_images()
stop = False
keyboard.add_hotkey(quit_key, activate_stop)
showresults = True
while not stop:
screenshot_window = sc2.imget_adb()
# optional filter
screenshot_window = sub_color_in_image(
img=screenshot_window,
conditions=(("r", ">", 200), "|", ("g", ">", 200), "|", ("b", ">", 200)),
newcolor=(255, 255, 255),
)
df = get_tesseractdf(
screenshot_window, lang=languages, drop_empty_strings=True, conf_thresh=60
)
tesserresults = draw_tesseract_results(
dft=df, img=screenshot_window, conf_thresh=60
)
if showresults:
sc2.show_edited_image(tesserresults) # show the edited pic
print(df[["text", "conf"]])
Example 2 (screenshots from BlueStacks using hwnd)
import cv2
import pandas as pd
import numpy as np
from time import sleep
from tesseract_window_scanner import pd_add_tesseract, sub_color_in_image, \
substitute_colors_with_equal_rgb_values, draw_tesseract_results, get_tesseractdf,keyboard,ScreenShots
def activate_stop():
global stop
stop = True
tesseractpath = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
languages = "de+pt+deu"
pd_add_tesseract(tesseractpath)
sc2 = ScreenShots()
sc2.find_window_with_regex("[bB]lue[sS]tacks.*")
quit_key = "e"
sc2.imshow_hwnd(sleep_time=0.05, quit_key=quit_key)
sleep(1)
sc2.enable_show_edited_images()
stop = False
keyboard.add_hotkey(quit_key, activate_stop)
showresults = True
while not stop:
screenshot_window = sc2.imget_hwnd()
# screenshot_window=sub_color_in_image(img=screenshot_window, conditions=(('r' ,'>', 200) ,'|' ,('g' ,'>', 200), '|', ('b' ,'>', 200)), newcolor=(255,255,255))
df = get_tesseractdf(
screenshot_window, lang=languages, drop_empty_strings=True, conf_thresh=60
)
tesserresults = draw_tesseract_results(
dft=df, img=screenshot_window, conf_thresh=60
)
if showresults:
sc2.show_edited_image(tesserresults) # show the edited pic
print(df[["text", "conf"]])
Example 3 (screenshots from BlueStacks using hwnd without showing the results)
import cv2
import pandas as pd
import numpy as np
from time import sleep
from tesseract_window_scanner import pd_add_tesseract, sub_color_in_image, \
substitute_colors_with_equal_rgb_values, draw_tesseract_results, get_tesseractdf,keyboard,ScreenShots
def activate_stop():
global stop
stop = True
tesseractpath = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
languages = "de+pt+deu"
pd_add_tesseract(tesseractpath)
sc2 = ScreenShots()
sc2.find_window_with_regex("[bB]lue[sS]tacks.*")
quit_key = "x"
stop = False
keyboard.add_hotkey(quit_key, activate_stop)
while not stop:
screenshot_window = sc2.imget_hwnd()
screenshot_window = sub_color_in_image(
img=screenshot_window,
conditions=(("r", ">", 200), "|", ("g", ">", 200), "|", ("b", ">", 200)),
newcolor=(255, 255, 255),
)
df = get_tesseractdf(
screenshot_window, lang=languages, drop_empty_strings=True, conf_thresh=60
)
print(df[["text", "conf"]])
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for tesseract_window_scanner-0.11.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | 089828d089d3fc9d53ea1b7f25e56ce4b55af98c3872a47b913ad7ef301b3e85 |
|
MD5 | e34df3be830c3d3fee8404a5cdf530e8 |
|
BLAKE2b-256 | 9477866cf6c372d404f3fbcff82b45d4a93af58ea7e898cf50267790cdaebd84 |
Close
Hashes for tesseract_window_scanner-0.11-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 685d75f99a575995e35d6b8914e7bf67f7045f082e40b5ee89a4dae4f2e89b86 |
|
MD5 | dfec7e8da6a0af80efa1b044fb09e2a4 |
|
BLAKE2b-256 | 22ccf391005e6e64eb4d8d16cfa0cc0074e295c0a820a2a03e7709b16d3f1a05 |