Skip to content

Commit

Permalink
WIP for progress report
Browse files Browse the repository at this point in the history
  • Loading branch information
apyrgio committed Oct 15, 2024
1 parent 6b65881 commit fff7be7
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 52 deletions.
137 changes: 137 additions & 0 deletions dangerzone/ctx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import datetime

This comment has been minimized.

Copy link
@almet

almet Oct 16, 2024

Member

nit: I believe it would be better to name this "context", or even "conversion_context".

import enum
import logging
import time
from typing import Callable

from colorama import Fore, Style

from .document import Document

log = logging.getLogger(__name__)


class ConversionCtx:

EST_PERCENT_START_CONVERSION_PROC = 1
EST_PERCENT_GATHER_PAGES = 2
EST_PERCENT_CONVERT_PAGES = 96
EST_PERCENT_COMPLETE_CONVERSION = 1

MSG_CONVERSION_PROCESS_TYPE = "process"

# Conversion state
STATE_NOT_STARTED = enum.auto()
STATE_STARTING_CONVERSION_PROC = enum.auto()
STATE_GATHERING_PAGES = enum.auto()
STATE_CONVERTING_PAGES = enum.auto()
STATE_COMPLETED = enum.auto()
STATE_FAILED = enum.auto()

def __init__(
self,
document: Document,
ocr_lang: str | None = None,
progress_callback: Callable | None = None,
) -> None:
self.doc = document
self.ocr_lang = ocr_lang
self.callback = progress_callback

conversion_total = 100 # FiXME:
assert conversion_total == 100

self.percentage: float = 0.0
self.cur_page = 0
self.pages = 0
self.page_timer_start = None
self.state = self.STATE_NOT_STARTED

def is_not_started(self) -> bool:
return self.state is self.STATE_NOT_STARTED

def is_started(self) -> bool:
return self.state in (
self.STATE_STARTING_CONVERSION_PROC,
self.STATE_GATHERING_PAGES,
self.STATE_CONVERTING_PAGES,
)

def is_completed(self) -> bool:
return self.state is Document.STATE_COMPLETED

def is_failed(self) -> bool:
return self.state is Document.STATE_FAILED

def increase(self, step: float) -> None:
assert step > 0
self.percentage += step

def print_message(self, text: str, error: bool = False) -> None:
s = Style.BRIGHT + Fore.YELLOW + f"[doc {self.doc.id}] "
s += Fore.CYAN + f"{int(self.percentage)}% " + Style.RESET_ALL
if error:
s += Fore.RED + text + Style.RESET_ALL
log.error(s)
else:
s += text
log.info(s)

if self.callback:
self.callback(error, text, self.percentage)

def start_conversion_proc(self):
self.state = self.STATE_STARTING_CONVERSION_PROC
self.print_message(
f"Starting a {self.MSG_CONVERSION_PROCESS_TYPE} for the document conversion"
)

def start_page_gathering(self):
self.state = self.STATE_GATHERING_PAGES
self.increase(self.EST_PERCENT_START_CONVERSION_PROC)
self.print_message("Getting number of pages")

def set_total_pages(self, pages: int) -> None:
self.state = self.STATE_CONVERTING_PAGES
self.increase(self.EST_PERCENT_GATHER_PAGES)
assert pages > 0
self.pages = pages

def page_iter(self, pages):
self.set_total_pages(pages)
for page in range(1, pages + 1):
self.start_converting_page(page)
yield page
self.finished_converting_page()

def start_converting_page(self, page: int) -> None:
searchable = "searchable " if self.ocr_lang else ""
remaining = ""

if not self.page_timer_start:
self.page_timer_start = time.monotonic()
else:
processed_pages = page - 1
elapsed = time.monotonic() - self.page_timer_start
elapsed_per_page = elapsed / processed_pages
remaining = (self.pages - processed_pages) * elapsed_per_page
remaining = datetime.timedelta(seconds=round(remaining))
remaining = f" (remaining: {remaining}s)"

self.print_message(
f"Converting page {page}/{self.pages} from pixels to {searchable}PDF{remaining}"
)

def finished_converting_page(self) -> None:
self.increase(self.EST_PERCENT_CONVERT_PAGES / self.pages)

def fail(self, msg: str) -> None:
self.state = self.STATE_FAILED
self.print_message(msg, error=True)
self.doc.mark_as_failed()

def success(self) -> None:
self.state = self.STATE_COMPLETED
self.percentage = 100
self.doc.mark_as_safe()
self.print_message("Conversion completed successfully")
8 changes: 3 additions & 5 deletions dangerzone/gui/main_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ..isolation_provider.dummy import Dummy
from ..isolation_provider.qubes import Qubes, is_qubes_native_conversion
from ..util import get_resource_path, get_subprocess_startupinfo, get_version
from ..ctx import ConversionCtx
from .logic import Alert, CollapsibleBox, DangerzoneGui, UpdateDialog
from .updater import UpdateReport

Expand Down Expand Up @@ -1124,11 +1125,8 @@ def __init__(
self.dangerzone = dangerzone

def convert_document(self) -> None:
self.dangerzone.isolation_provider.convert(
self.document,
self.ocr_lang,
self.progress_callback,
)
ctx = ConversionCtx(self.document, self.ocr_lang, self.progress_callback)
self.dangerzone.isolation_provider.convert(ctx)
self.finished.emit(self.error)

def progress_callback(self, error: bool, text: str, percentage: int) -> None:
Expand Down
56 changes: 14 additions & 42 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ..conversion.common import DEFAULT_DPI, INT_BYTES
from ..document import Document
from ..util import get_tessdata_dir, replace_control_chars
from ..ctx import ConversionCtx

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,29 +98,24 @@ def __init__(self) -> None:
def install(self) -> bool:
pass

def convert(
self,
document: Document,
ocr_lang: Optional[str],
progress_callback: Optional[Callable] = None,
) -> None:
self.progress_callback = progress_callback
def convert(self, ctx: ConversionCtx) -> None:
document = ctx.doc
document.mark_as_converting()
try:
ctx.start_conversion_proc()
with self.doc_to_pixels_proc(document) as conversion_proc:
self.convert_with_proc(document, ocr_lang, conversion_proc)
document.mark_as_safe()
ctx.start_page_gathering()
self.convert_with_proc(ctx, conversion_proc)
if document.archive_after_conversion:
document.archive()
ctx.success()
except errors.ConversionException as e:
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
ctx.fail(str(e))
except Exception as e:
log.exception(
f"An exception occurred while converting document '{document.id}'"
)
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
ctx.fail(str(e))

def ocr_page(self, pixmap: fitz.Pixmap, ocr_lang: str) -> bytes:
"""Get a single page as pixels, OCR it, and return a PDF as bytes."""
Expand Down Expand Up @@ -157,12 +153,13 @@ def pixels_to_pdf_page(

def convert_with_proc(
self,
document: Document,
ocr_lang: Optional[str],
ctx: ConversionCtx,
p: subprocess.Popen,
) -> None:
ocr_lang = ctx.ocr_lang
document = ctx.doc
percentage = 0.0
with open(document.input_filename, "rb") as f:
with open(ctx.doc.input_filename, "rb") as f:
try:
assert p.stdin is not None
p.stdin.write(f.read())
Expand All @@ -178,13 +175,7 @@ def convert_with_proc(

safe_doc = fitz.Document()

for page in range(1, n_pages + 1):
searchable = "searchable " if ocr_lang else ""
text = (
f"Converting page {page}/{n_pages} from pixels to {searchable}PDF"
)
self.print_progress(document, False, text, percentage)

for page in ctx.page_iter(n_pages):
width = read_int(p.stdout)
height = read_int(p.stdout)
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
Expand Down Expand Up @@ -216,25 +207,6 @@ def convert_with_proc(
safe_doc.save(document.sanitized_output_filename)
os.replace(document.sanitized_output_filename, document.output_filename)

# TODO handle leftover code input
text = "Successfully converted document"
self.print_progress(document, False, text, 100)

def print_progress(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
if error:
s += Fore.RED + text + Style.RESET_ALL
log.error(s)
else:
s += text
log.info(s)

if self.progress_callback:
self.progress_callback(error, text, percentage)

def get_proc_exception(
self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION
) -> Exception:
Expand Down
8 changes: 3 additions & 5 deletions dangerzone/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from . import errors, util
from .document import Document
from .ctx import ConversionCtx
from .isolation_provider.base import IsolationProvider
from .settings import Settings
from .util import get_resource_path
Expand Down Expand Up @@ -65,12 +66,9 @@ def convert_documents(
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None:
def convert_doc(document: Document) -> None:
ctx = ConversionCtx(document, ocr_lang, stdout_callback)
try:
self.isolation_provider.convert(
document,
ocr_lang,
stdout_callback,
)
self.isolation_provider.convert(ctx)
except Exception as e:
log.exception(
f"Unexpected error occurred while converting '{document}'"
Expand Down
26 changes: 26 additions & 0 deletions prog_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/python3

import logging
import sys
import time

from dangerzone import document
from dangerzone.ctx import ConversionCtx


def main():
logging.basicConfig(level=logging.INFO)
doc = document.Document()
ctx = ConversionCtx(doc)
ctx.start_conversion_proc()
ctx.start_page_gathering()
for page in ctx.page_iter(10):
time.sleep(0.2)
if not page % 5:
ctx.fail(f"Failed during page {page}")

ctx.success()


if __name__ == "__main__":
sys.exit(main())

0 comments on commit fff7be7

Please sign in to comment.