2024-01-05 10:53:12 +00:00
|
|
|
import os
|
|
|
|
from pypdf import PdfReader, PdfWriter
|
|
|
|
|
2024-01-05 16:43:01 +00:00
|
|
|
import datetime as dt
|
|
|
|
import logging
|
|
|
|
import sys
|
|
|
|
|
|
|
|
# Setup Logging
|
|
|
|
logging.basicConfig(
|
|
|
|
# level=logging.ERROR,
|
|
|
|
# level=logging.INFO,
|
|
|
|
level=logging.DEBUG,
|
|
|
|
format=str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
|
|
|
handlers=[
|
|
|
|
logging.FileHandler("/var/log/" + str(dt.datetime.today().strftime('%Y-%m-%d')) + "_-_pdf_util.log"),
|
|
|
|
logging.StreamHandler(sys.stdout)
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-01-05 10:53:12 +00:00
|
|
|
class pdf_util:
|
|
|
|
def __init__(self, file_path):
|
|
|
|
self.file_path = file_path
|
|
|
|
self.file_name = os.path.basename(file_path)
|
|
|
|
self.file_name_wo_extension = os.path.splitext(os.path.basename(file_path))[0]
|
|
|
|
|
2024-01-22 15:47:35 +00:00
|
|
|
def split_pdf_with_location(self, output_filepath, no_names=False, int_padding=False):
|
2024-01-05 10:53:12 +00:00
|
|
|
out_filenames = []
|
2024-01-05 16:43:01 +00:00
|
|
|
os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
|
2024-01-05 10:53:12 +00:00
|
|
|
with open(self.file_path, 'rb') as pdf_file:
|
|
|
|
pdf_reader = PdfReader(pdf_file)
|
|
|
|
num_pages = len(pdf_reader.pages)
|
|
|
|
for page_num in range(num_pages):
|
|
|
|
writer = PdfWriter()
|
|
|
|
writer.add_page(pdf_reader.pages[page_num])
|
|
|
|
|
2024-01-22 15:47:35 +00:00
|
|
|
str_page_num = str(page_num + 1)
|
|
|
|
if int_padding:
|
|
|
|
str_page_num = str_page_num.zfill(4)
|
|
|
|
|
2024-01-05 16:43:01 +00:00
|
|
|
if no_names:
|
2024-01-22 15:47:35 +00:00
|
|
|
out_filename = os.path.dirname(output_filepath) + '/' + str_page_num + '.pdf'
|
2024-01-05 16:43:01 +00:00
|
|
|
else:
|
2024-01-22 15:47:35 +00:00
|
|
|
out_filename = os.path.dirname(output_filepath) + '/' + self.file_name_wo_extension + '_' + str_page_num + '.pdf'
|
2024-01-05 16:43:01 +00:00
|
|
|
|
2024-01-05 10:53:12 +00:00
|
|
|
with open(out_filename, 'wb') as outfile:
|
|
|
|
writer.write(outfile)
|
|
|
|
out_filenames.append(out_filename)
|
|
|
|
|
|
|
|
return out_filenames
|
|
|
|
|
2024-01-05 16:43:01 +00:00
|
|
|
# Deprecate when pdf_project_manager takes effect
|
2024-01-22 15:47:35 +00:00
|
|
|
def split_pdf(self, int_padding=False):
|
2024-01-05 16:43:01 +00:00
|
|
|
os.makedirs(os.path.dirname(self.file_path) + "/split_pdf", exist_ok=True)
|
2024-01-22 15:47:35 +00:00
|
|
|
return self.split_pdf_with_location(os.path.dirname(self.file_path) + "/split_pdf/", False, int_padding)
|
2024-01-05 16:43:01 +00:00
|
|
|
|
|
|
|
def merge_pdf_with_and_location(self, merge_file_path, output_filepath):
|
|
|
|
os.makedirs(os.path.dirname(output_filepath), exist_ok=True)
|
2024-01-05 10:53:12 +00:00
|
|
|
pdf_reader_1 = PdfReader(self.file_path)
|
|
|
|
pdf_reader_2 = PdfReader(merge_file_path)
|
|
|
|
writer = PdfWriter()
|
|
|
|
|
|
|
|
for page_num in range(len(pdf_reader_1.pages)):
|
|
|
|
writer.add_page(pdf_reader_1.pages[page_num])
|
|
|
|
|
|
|
|
for page_num in range(len(pdf_reader_2.pages)):
|
|
|
|
writer.add_page(pdf_reader_2.pages[page_num])
|
|
|
|
|
2024-01-05 16:43:01 +00:00
|
|
|
with open(output_filepath, 'wb') as outfile:
|
2024-01-05 10:53:12 +00:00
|
|
|
writer.write(outfile)
|
|
|
|
|
2024-01-05 16:43:01 +00:00
|
|
|
return output_filepath
|
|
|
|
|
|
|
|
# Deprecate when pdf_project_manager takes effect
|
|
|
|
def merge_pdf_with(self, merge_file_path, merged_name="merged"):
|
|
|
|
os.makedirs(os.path.dirname(self.file_path) + "/merge_pdf", exist_ok=True)
|
|
|
|
return self.merge_pdf_with_and_location(merge_file_path, os.path.dirname(self.file_path) + "/merge_pdf" + '/merger.pdf')
|