-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimage_generation.py
98 lines (75 loc) · 3.08 KB
/
image_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pdf2image
import os, pathlib
import cv2
from tqdm import tqdm
from models import Page
from utils import update_status
def split_images(project, split_pct=.5, task=None, steps=None):
if project.is_split:
project.remove_split_pages()
seq = 1
pages = project.get_pages(original_only=True)
for i, page in enumerate(pages):
path = pathlib.Path(page.get_img())
name = str(path.name)
print(path)
# Read the image
img = cv2.imread(str(path))
width = img.shape[1]
# Cut the image @ pct point
width_cutoff = int(width * split_pct)
a_img = img[:, :width_cutoff]
b_img = img[:, width_cutoff:]
new_a = f"{path.stem}-a{path.suffix}"
new_b = f"{path.stem}-b{path.suffix}"
cv2.imwrite(str(path).replace(name, new_a), a_img)
cv2.imwrite(str(path).replace(name, new_b), b_img)
a_half = Page(sequence=seq, image=new_a, type="split", width=a_img.shape[1], height=a_img.shape[0])
b_half = Page(sequence=seq+1, image=new_b, type="split", width=b_img.shape[1] , height=b_img.shape[0])
project.add_page(a_half)
project.add_page(b_half)
seq += 2
if task:
update_status(task, 'Splitting images...', i, len(pages), steps)
project.set_split(True)
return True
def export_binary_images(project, task=None, steps=None):
print("\n*** Binarizing images... ***")
pages = project.get_pages()
for i, page in enumerate(pages):
img_path = page.get_img()
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
(thresh, im_bw) = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
cv2.imwrite(img_path.replace('.jpg', '.tiff'), im_bw)
if task:
update_status(task, 'Binarizing images...', i, len(pages), steps)
project.set_binarized(True)
return True
def export_pdf_images(project, task=None, steps=None):
print("\n*** Converting PDF to images... ****")
input_file = project.get_pdf()
output_dir = project.get_image_dir()
# Make images folder
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Get pdf info
info = pdf2image.pdfinfo_from_path(input_file, userpw=None, poppler_path=None)
# Iterate pages
maxPages = info["Pages"]
i = 1
for page in range(1, maxPages + 1, 10):
pil_images = pdf2image.convert_from_path(input_file, use_cropbox=True, dpi=200, first_page=page, last_page=min(page + 10 - 1, maxPages))
print(f"*** Saving images {page}-{page+9}... ***")
for image in tqdm(pil_images):
# Save file to disk
file_name = f"{i}.jpg"
image_path = os.path.join(output_dir, file_name)
image.save(image_path)
# Save page to database
width, height = image.size
page = Page(sequence=i, image=file_name, type="original", width=width, height=height)
project.add_page(page)
if task:
update_status(task, 'Converting PDF to images...', i, maxPages, steps)
i += 1
return output_dir