test-driver: Use futures for OCR parallelization
This commit is contained in:
parent
e6ea13f4ea
commit
819d304a39
@ -1,7 +1,7 @@
|
|||||||
import multiprocessing
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from concurrent.futures import Future, ThreadPoolExecutor
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from test_driver.errors import MachineError
|
from test_driver.errors import MachineError
|
||||||
@ -33,17 +33,19 @@ def perform_ocr_variants_on_screenshot(
|
|||||||
# Docs suggest to run it with OMP_THREAD_LIMIT=1 for hundreds of parallel
|
# Docs suggest to run it with OMP_THREAD_LIMIT=1 for hundreds of parallel
|
||||||
# runs. Our average test run is somewhere inbetween.
|
# runs. Our average test run is somewhere inbetween.
|
||||||
# https://github.com/tesseract-ocr/tesseract/issues/3109
|
# https://github.com/tesseract-ocr/tesseract/issues/3109
|
||||||
processes = max(1, int(os.process_cpu_count() / 4))
|
workers = max(1, int(os.process_cpu_count() / 4))
|
||||||
with multiprocessing.Pool(processes=processes) as pool:
|
with ThreadPoolExecutor(max_workers=workers) as e:
|
||||||
image_paths: list[Path] = [screenshot_path]
|
# The idea here is to let the first tesseract call run on the raw image
|
||||||
|
# while the other two are preprocessed + tesseracted in parallel
|
||||||
|
future_results: list[Future] = [e.submit(_run_tesseract, screenshot_path)]
|
||||||
if variants:
|
if variants:
|
||||||
image_paths.extend(
|
|
||||||
pool.starmap(
|
def tesseract_processed(inverted: bool) -> str:
|
||||||
_preprocess_screenshot,
|
return _run_tesseract(_preprocess_screenshot(screenshot_path, inverted))
|
||||||
[(screenshot_path, False), (screenshot_path, True)],
|
|
||||||
)
|
future_results.append(e.submit(tesseract_processed, False))
|
||||||
)
|
future_results.append(e.submit(tesseract_processed, True))
|
||||||
return pool.map(_run_tesseract, image_paths)
|
return [future.result() for future in future_results]
|
||||||
|
|
||||||
|
|
||||||
def _run_tesseract(image: Path) -> str:
|
def _run_tesseract(image: Path) -> str:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user