diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dbe9c82 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index fd57988..9a228d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,12 @@ - FROM python:3.11-slim +# FROM python:3.11.3 # set the working directory WORKDIR /app RUN mkdir /app/uploads RUN mkdir /app/split RUN mkdir /app/merge +RUN mkdir /app/projects RUN apt-get update # install dependencies diff --git a/app.py b/app.py index c521abf..256cc17 100644 --- a/app.py +++ b/app.py @@ -1,13 +1,28 @@ import shutil import os from flask import Flask, render_template, request, redirect, jsonify, send_from_directory -from PyPDF2 import PdfReader, PdfWriter -from pathlib import Path +from pdf_util.pdf_project_manager import pdf_project_manager + +import datetime as dt +import logging +import sys + +# Setup Logging +logging.basicConfig( + # level=logging.ERROR, + # level=logging.INFO, + level=logging.DEBUG, + format=str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler("/var/log/" + str(dt.datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"), + logging.StreamHandler(sys.stdout) + ] +) + app = Flask(__name__) -UPLOAD_FOLDER = 'uploads' -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.config['UPLOAD_FOLDER'] = 'uploads' @app.route('/') @@ -27,41 +42,30 @@ def send_merge(path): @app.route('/split_to_zip', methods=['POST']) def split_to_zip(): - if 'pdf' not in request.files: + if 'pdf_1' not in request.files: return redirect(request.url) - pdf_file = request.files['pdf'] + pdf_file = request.files['pdf_1'] if pdf_file.filename == '': return redirect(request.url) if pdf_file: - in_filename = pdf_file.filename + in_filename = pdf_file.filename.rsplit('.', 1)[0] filename = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file.filename) pdf_file.save(filename) - out_filenames = [] - Path("/tmp/split_pdf").mkdir(parents=True, exist_ok=True) - with open(filename, 'rb') as pdf_file: - pdf_reader = PdfReader(pdf_file) - num_pages = len(pdf_reader.pages) - for page_num in range(num_pages): + # Use pdf_project_manager to split File + pdf_project = pdf_project_manager() + pdf_project.add_pdf(filename) - writer = PdfWriter() - writer.add_page(pdf_reader.pages[page_num]) + logging.debug(in_filename) + logging.debug(os.path.splitext(pdf_file.filename)[0]) - out_filename = '/tmp/split_pdf/' + in_filename + '_' + str(page_num) + '.pdf' - with open(out_filename, 'wb') as outfile: - writer.write(outfile) - out_filenames.append(out_filename) - - shutil.make_archive(in_filename.rsplit('.', 1)[0] + '_splitted', 'zip', "/tmp/split_pdf") - zip_filename = in_filename.rsplit('.', 1)[0] + "_splitted.zip" + shutil.make_archive(in_filename + '_splitted', 'zip', "/app/projects/" + pdf_project.uuid + '/splitted') + zip_filename = in_filename + "_splitted.zip" os.rename("/app/" + zip_filename, "/app/split/" + zip_filename) - for temp_file in out_filenames: - Path.unlink(temp_file) - response = jsonify({"url": "/split/" + zip_filename, "name": zip_filename}) # response.headers.add("Access-Control-Allow-Origin", "*") return response @@ -79,35 +83,27 @@ def merge_to_pdf(): return redirect(request.url) if pdf_file_1: - filename_1 = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file_1.filename) + filename_1 = os.path.join(os.path.dirname(os.path.realpath(__file__)), app.config['UPLOAD_FOLDER'], pdf_file_1.filename) pdf_file_1.save(filename_1) if pdf_file_2: - filename_2 = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file_2.filename) + filename_2 = os.path.join(os.path.dirname(os.path.realpath(__file__)), app.config['UPLOAD_FOLDER'], pdf_file_2.filename) pdf_file_2.save(filename_2) if pdf_file_1 and pdf_file_2: + logging.debug(filename_1) + logging.debug(filename_2) - with open(filename_1, 'rb') as pdf_file_1, open(filename_2, 'rb') as pdf_file_2: - pdf_reader_1 = PdfReader(pdf_file_1) - pdf_reader_2 = PdfReader(pdf_file_2) + # Use pdf_project_manager to split File + pdf_project = pdf_project_manager() + pdf_project.add_pdf(filename_1) + pdf_project.add_pdf(filename_2) + out_path = "/app/projects/" + pdf_project.uuid + "/complete.pdf" + logging.debug(out_path) + os.rename(out_path, "/app/merge/merger.pdf") - Path("/tmp/merge_pdf").mkdir(parents=True, exist_ok=True) - writer = PdfWriter() - - for page_num in range(len(pdf_reader_1.pages)): - writer.add_page(pdf_reader_1.pages[page_num]) - - for page_num in range(len(pdf_reader_2.pages)): - writer.add_page(pdf_reader_2.pages[page_num]) - - out_filename = '/app/merge/merger.pdf' - with open(out_filename, 'wb') as outfile: - writer.write(outfile) - - response = jsonify({"url": '/merge/merger.pdf', "name": 'merge.pdf'}) - # response.headers.add("Access-Control-Allow-Origin", "*") - return response + response = jsonify({"url": '/merge/merger.pdf', "name": os.path.splitext(os.path.basename(out_path))[0]}) + return response if __name__ == '__main__': diff --git a/docker-compose.yaml b/docker-compose.yaml index 115747c..8b0d2c4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,6 +24,6 @@ services: ./init.sh " ports: - - 8001:8000 + - 8002:8000 diff --git a/init.sh b/init.sh index e0c81f7..f1fe207 100755 --- a/init.sh +++ b/init.sh @@ -1,6 +1,9 @@ #!/bin/bash -# (cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app) # Dev (Logging to console) -(cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app) +# (cd /app/ && pytest tests/test_pdf_util.py) +(cd /app/ && pytest -o log_cli=true) + +# (cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app) +(cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app --log-level debug) # Dev (Logging to console) /bin/bash \ No newline at end of file diff --git a/pdf_util/__init__.py b/pdf_util/__init__.py new file mode 100644 index 0000000..203562b --- /dev/null +++ b/pdf_util/__init__.py @@ -0,0 +1 @@ +# __init__.py \ No newline at end of file diff --git a/pdf_util/pdf_project_manager.py b/pdf_util/pdf_project_manager.py new file mode 100644 index 0000000..c1cb0ee --- /dev/null +++ b/pdf_util/pdf_project_manager.py @@ -0,0 +1,97 @@ +import uuid +import os +import shutil +import traceback +import glob + +import datetime as dt +import logging +import sys + +from pdf_util.pdf_util import pdf_util + +base_path = "/app/projects/" + +# Setup Logging +logging.basicConfig( + # level=logging.ERROR, + # level=logging.INFO, + level=logging.DEBUG, + format=str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler("/var/log/" + str(dt.datetime.today().strftime('%Y-%m-%d')) + "_-_pdf_project_manager.log"), + logging.StreamHandler(sys.stdout) + ] +) + + +class pdf_project_manager: + def __init__(self, uuid4=None): + if uuid4 is not None: + self.uuid = uuid4 + else: + self.uuid = str(uuid.uuid4()) + + try: + self.pdf_init = os.path.isfile('/app/projects/' + self.uuid + '/complete.pdf') + except Exception as e: + logging.warning("Error looking up file: " + str(e)) + logging.warning("Stacktrace: " + str(traceback.format_exc())) + self.pdf_init = False + + os.makedirs(base_path + self.uuid, exist_ok=True) + self.pdf_handler = None + + def merge_all_single_pages(self): + listing = glob.glob(base_path + self.uuid + '/splitted/*.pdf') + listing.sort() + shutil.copyfile(listing.pop(0), base_path + self.uuid + "/complete.pdf") + + for pdf_file in listing: + print(pdf_file) + pdf_util(base_path + self.uuid + "/complete.pdf").merge_pdf_with_and_location(pdf_file, base_path + self.uuid + "/tmp_complete.pdf") + shutil.copyfile(base_path + self.uuid + "/tmp_complete.pdf", base_path + self.uuid + "/complete.pdf") + os.remove(base_path + self.uuid + "/tmp_complete.pdf") + + def add_pdf(self, pdf_path): + if not self.pdf_init: + shutil.copyfile(pdf_path, base_path + self.uuid + "/complete.pdf") + self.pdf_handler = pdf_util(base_path + self.uuid + "/complete.pdf") + self.pdf_init = True + else: + shutil.copyfile(pdf_path, base_path + self.uuid + "/tmp.pdf") + pdf_util(base_path + self.uuid + "/complete.pdf").merge_pdf_with_and_location(base_path + self.uuid + "/tmp.pdf", base_path + self.uuid + "/tmp_complete.pdf") + shutil.copyfile(base_path + self.uuid + "/tmp_complete.pdf", base_path + self.uuid + "/complete.pdf") + os.remove(base_path + self.uuid + "/tmp_complete.pdf") + os.remove(base_path + self.uuid + "/tmp.pdf") + self.pdf_handler = pdf_util(base_path + self.uuid + "/complete.pdf") + + self.pdf_handler.split_pdf_with_location(base_path + self.uuid + '/splitted/', True, True) + + def move_page(self, from_location, to_location): + try: + if from_location <= 0 or to_location <= 0: + raise ValueError("Pagenumber smaller/equal Zero") + + if from_location < to_location: + shutil.move(base_path + self.uuid + '/splitted/' + str(from_location).zfill(4) + '.pdf', base_path + self.uuid + '/splitted/tmp.pdf') + for num in range(from_location, to_location): + print(num) + shutil.move(base_path + self.uuid + '/splitted/' + str(num + 1).zfill(4) + '.pdf', base_path + self.uuid + '/splitted/' + str(num).zfill(4) + '.pdf') + shutil.move(base_path + self.uuid + '/splitted/tmp.pdf', base_path + self.uuid + '/splitted/' + str(to_location).zfill(4) + '.pdf') + + elif from_location > to_location: + shutil.move(base_path + self.uuid + '/splitted/' + str(from_location).zfill(4) + '.pdf', base_path + self.uuid + '/splitted/tmp.pdf') + for num in reversed(range(to_location, from_location)): + print(num) + print("move: " + str(num).zfill(4) + " | to: " + str(num + 1).zfill(4)) + shutil.move(base_path + self.uuid + '/splitted/' + str(num).zfill(4) + '.pdf', base_path + self.uuid + '/splitted/' + str(num + 1).zfill(4) + '.pdf') + shutil.move(base_path + self.uuid + '/splitted/tmp.pdf', base_path + self.uuid + '/splitted/' + str(to_location).zfill(4) + '.pdf') + else: + raise ValueError("from_location and to_location are the same") + + self.merge_all_single_pages() + + except Exception as e: + logging.error("Error while moving page: " + str(e)) + logging.error("Stacktrace: " + str(traceback.format_exc())) diff --git a/pdf_util/pdf_util.py b/pdf_util/pdf_util.py new file mode 100644 index 0000000..9364990 --- /dev/null +++ b/pdf_util/pdf_util.py @@ -0,0 +1,77 @@ +import os +from pypdf import PdfReader, PdfWriter + +import datetime as dt +import logging +import sys + +# Setup Logging +logging.basicConfig( + # level=logging.ERROR, + # level=logging.INFO, + level=logging.DEBUG, + format=str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler("/var/log/" + str(dt.datetime.today().strftime('%Y-%m-%d')) + "_-_pdf_util.log"), + logging.StreamHandler(sys.stdout) + ] +) + + +class pdf_util: + def __init__(self, file_path): + self.file_path = file_path + self.file_name = os.path.basename(file_path) + self.file_name_wo_extension = os.path.splitext(os.path.basename(file_path))[0] + + def split_pdf_with_location(self, output_filepath, no_names=False, int_padding=False): + out_filenames = [] + os.makedirs(os.path.dirname(output_filepath), exist_ok=True) + with open(self.file_path, 'rb') as pdf_file: + pdf_reader = PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + for page_num in range(num_pages): + writer = PdfWriter() + writer.add_page(pdf_reader.pages[page_num]) + + str_page_num = str(page_num + 1) + if int_padding: + str_page_num = str_page_num.zfill(4) + + if no_names: + out_filename = os.path.dirname(output_filepath) + '/' + str_page_num + '.pdf' + else: + out_filename = os.path.dirname(output_filepath) + '/' + self.file_name_wo_extension + '_' + str_page_num + '.pdf' + + with open(out_filename, 'wb') as outfile: + writer.write(outfile) + out_filenames.append(out_filename) + + return out_filenames + + # Deprecate when pdf_project_manager takes effect + def split_pdf(self, int_padding=False): + os.makedirs(os.path.dirname(self.file_path) + "/split_pdf", exist_ok=True) + return self.split_pdf_with_location(os.path.dirname(self.file_path) + "/split_pdf/", False, int_padding) + + def merge_pdf_with_and_location(self, merge_file_path, output_filepath): + os.makedirs(os.path.dirname(output_filepath), exist_ok=True) + pdf_reader_1 = PdfReader(self.file_path) + pdf_reader_2 = PdfReader(merge_file_path) + writer = PdfWriter() + + for page_num in range(len(pdf_reader_1.pages)): + writer.add_page(pdf_reader_1.pages[page_num]) + + for page_num in range(len(pdf_reader_2.pages)): + writer.add_page(pdf_reader_2.pages[page_num]) + + with open(output_filepath, 'wb') as outfile: + writer.write(outfile) + + return output_filepath + + # Deprecate when pdf_project_manager takes effect + def merge_pdf_with(self, merge_file_path, merged_name="merged"): + os.makedirs(os.path.dirname(self.file_path) + "/merge_pdf", exist_ok=True) + return self.merge_pdf_with_and_location(merge_file_path, os.path.dirname(self.file_path) + "/merge_pdf" + '/merger.pdf') diff --git a/requirements.txt b/requirements.txt index 5710153..bdcffa7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ flask -PyPDF2 -gunicorn \ No newline at end of file +gunicorn +pypdf + +# For testing +pytest \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index b603233..4dd0ff5 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,19 +1,80 @@ - + - PDF Web Toolkit - - + + + PDF Tools -

PDF Splitter

-
- -

PDF Merger

-
-
- -
+
+

PDF Splitter

+
+ + +

PDF Merger

+
+
+ +
+ + + + - + \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..203562b --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# __init__.py \ No newline at end of file diff --git a/tests/sample_pdfs/sample_10_page.pdf b/tests/sample_pdfs/sample_10_page.pdf new file mode 100644 index 0000000..99d31ce Binary files /dev/null and b/tests/sample_pdfs/sample_10_page.pdf differ diff --git a/tests/sample_pdfs/sample_1_page.pdf b/tests/sample_pdfs/sample_1_page.pdf new file mode 100644 index 0000000..e25081e Binary files /dev/null and b/tests/sample_pdfs/sample_1_page.pdf differ diff --git a/tests/sample_pdfs/sample_2_page.pdf b/tests/sample_pdfs/sample_2_page.pdf new file mode 100644 index 0000000..dbf091d --- /dev/null +++ b/tests/sample_pdfs/sample_2_page.pdf @@ -0,0 +1,198 @@ +%PDF-1.3 +%âãÏÓ + +1 0 obj +<< +/Type /Catalog +/Outlines 2 0 R +/Pages 3 0 R +>> +endobj + +2 0 obj +<< +/Type /Outlines +/Count 0 +>> +endobj + +3 0 obj +<< +/Type /Pages +/Count 2 +/Kids [ 4 0 R 6 0 R ] +>> +endobj + +4 0 obj +<< +/Type /Page +/Parent 3 0 R +/Resources << +/Font << +/F1 9 0 R +>> +/ProcSet 8 0 R +>> +/MediaBox [0 0 612.0000 792.0000] +/Contents 5 0 R +>> +endobj + +5 0 obj +<< /Length 1074 >> +stream +2 J +BT +0 0 0 rg +/F1 0027 Tf +57.3750 722.2800 Td +( A Simple PDF File ) Tj +ET +BT +/F1 0010 Tf +69.2500 688.6080 Td +( This is a small demonstration .pdf file - ) Tj +ET +BT +/F1 0010 Tf +69.2500 664.7040 Td +( just for use in the Virtual Mechanics tutorials. More text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 652.7520 Td +( text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 628.8480 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 616.8960 Td +( text. And more text. Boring, zzzzz. And more text. And more text. And ) Tj +ET +BT +/F1 0010 Tf +69.2500 604.9440 Td +( more text. And more text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 592.9920 Td +( And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 569.0880 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 557.1360 Td +( text. And more text. And more text. Even more. Continued on page 2 ...) Tj +ET +endstream +endobj + +6 0 obj +<< +/Type /Page +/Parent 3 0 R +/Resources << +/Font << +/F1 9 0 R +>> +/ProcSet 8 0 R +>> +/MediaBox [0 0 612.0000 792.0000] +/Contents 7 0 R +>> +endobj + +7 0 obj +<< /Length 676 >> +stream +2 J +BT +0 0 0 rg +/F1 0027 Tf +57.3750 722.2800 Td +( Simple PDF File 2 ) Tj +ET +BT +/F1 0010 Tf +69.2500 688.6080 Td +( ...continued from page 1. Yet more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 676.6560 Td +( And more text. And more text. And more text. And more text. And more ) Tj +ET +BT +/F1 0010 Tf +69.2500 664.7040 Td +( text. Oh, how boring typing this stuff. But not as boring as watching ) Tj +ET +BT +/F1 0010 Tf +69.2500 652.7520 Td +( paint dry. And more text. And more text. And more text. And more text. ) Tj +ET +BT +/F1 0010 Tf +69.2500 640.8000 Td +( Boring. More, a little more text. The end, and just as well. ) Tj +ET +endstream +endobj + +8 0 obj +[/PDF /Text] +endobj + +9 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding +>> +endobj + +10 0 obj +<< +/Creator (Rave \(http://www.nevrona.com/rave\)) +/Producer (Nevrona Designs) +/CreationDate (D:20060301072826) +>> +endobj + +xref +0 11 +0000000000 65535 f +0000000019 00000 n +0000000093 00000 n +0000000147 00000 n +0000000222 00000 n +0000000390 00000 n +0000001522 00000 n +0000001690 00000 n +0000002423 00000 n +0000002456 00000 n +0000002574 00000 n + +trailer +<< +/Size 11 +/Root 1 0 R +/Info 10 0 R +>> + +startxref +2714 +%%EOF diff --git a/tests/test_pdf_project_manager.py b/tests/test_pdf_project_manager.py new file mode 100644 index 0000000..573eed0 --- /dev/null +++ b/tests/test_pdf_project_manager.py @@ -0,0 +1,116 @@ +import os +import shutil +from pdf_util.pdf_project_manager import pdf_project_manager + + +def test_basic_object_creation(): + test_pdf_project_manager = pdf_project_manager() + print(test_pdf_project_manager.uuid) + assert len(test_pdf_project_manager.uuid) == 36 + + shutil.rmtree('/app/projects/' + test_pdf_project_manager.uuid) + + +def test_folder_creation(): + test_pdf_project_manager = pdf_project_manager() + print(test_pdf_project_manager.uuid) + assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid) + + shutil.rmtree('/app/projects/' + test_pdf_project_manager.uuid) + + +def test_merge_all_single_pages(): + test_pdf_project_manager = pdf_project_manager() + print(test_pdf_project_manager.uuid) + assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid) + + test_pdf_project_manager.add_pdf("/app/tests/sample_pdfs/sample_10_page.pdf") + print(os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf')) + assert os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == os.stat("/app/tests/sample_pdfs/sample_10_page.pdf").st_size + + os.remove('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + assert not os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + test_pdf_project_manager.merge_all_single_pages() + print(os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf')) + assert os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 81291 + + shutil.rmtree('/app/projects/' + test_pdf_project_manager.uuid) + + +def test_add_multiple_pdfs(): + test_pdf_project_manager = pdf_project_manager() + print(test_pdf_project_manager.uuid) + assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid) + + test_pdf_project_manager.add_pdf("/app/tests/sample_pdfs/sample_10_page.pdf") + print(os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf')) + assert os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + print(os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid + '/splitted')) + assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid + '/splitted') + + print(os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf')) + assert os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf') + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size == 3167 + + test_pdf_project_manager.add_pdf("/app/tests/sample_pdfs/sample_2_page.pdf") + print(os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf')) + assert os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size == 3167 + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0011.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0011.pdf').st_size == 1804 + + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 48408 + + shutil.rmtree('/app/projects/' + test_pdf_project_manager.uuid) + + +def test_move_pages(): + test_pdf_project_manager = pdf_project_manager() + print(test_pdf_project_manager.uuid) + assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid) + + test_pdf_project_manager.add_pdf("/app/tests/sample_pdfs/sample_10_page.pdf") + test_pdf_project_manager.add_pdf("/app/tests/sample_pdfs/sample_2_page.pdf") + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 48408 + + os.remove('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + assert not os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + test_pdf_project_manager.move_page(1, 4) + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0004.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0004.pdf').st_size == 3167 + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 83909 + + os.remove('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + assert not os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + test_pdf_project_manager.move_page(4, 1) + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0001.pdf').st_size == 3167 + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 83908 + + os.remove('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + assert not os.path.isfile('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf') + + test_pdf_project_manager.move_page(1, 12) + test_pdf_project_manager.move_page(12, 2) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/splitted/0002.pdf').st_size == 3167 + print(os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size) + assert os.stat('/app/projects/' + test_pdf_project_manager.uuid + '/complete.pdf').st_size == 83909 + + shutil.rmtree('/app/projects/' + test_pdf_project_manager.uuid) diff --git a/tests/test_pdf_util.py b/tests/test_pdf_util.py new file mode 100644 index 0000000..fb33e8d --- /dev/null +++ b/tests/test_pdf_util.py @@ -0,0 +1,301 @@ +import os +import shutil +from pdf_util.pdf_util import pdf_util + + +def test_split_pdf(): + # Single Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf() + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_1.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_1.pdf").st_size == 69339 + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf() + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_1.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_1.pdf").st_size == 1804 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_2.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_2.pdf").st_size == 1405 + + # Ten Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf() + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_1.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_1.pdf").st_size == 3167 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_2.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_2.pdf").st_size == 2888 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_3.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_3.pdf").st_size == 6670 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_4.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_4.pdf").st_size == 3043 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_5.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_5.pdf").st_size == 9968 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_6.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_6.pdf").st_size == 5367 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_7.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_7.pdf").st_size == 10093 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_8.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_8.pdf").st_size == 8578 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_9.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_9.pdf").st_size == 30188 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_10.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_10.pdf").st_size == 3789 + + shutil.rmtree("/app/tests/sample_pdfs/split_pdf/") + + +def test_split_pdf_int_padding(): + # Single Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf(True) + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_0001.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_0001.pdf").st_size == 69339 + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf(True) + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_0001.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_0001.pdf").st_size == 1804 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_0002.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_0002.pdf").st_size == 1405 + + # Ten Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf(True) + print(test_file) + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0001.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0001.pdf").st_size == 3167 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0002.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0002.pdf").st_size == 2888 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0003.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0003.pdf").st_size == 6670 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0004.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0004.pdf").st_size == 3043 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0005.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0005.pdf").st_size == 9968 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0006.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0006.pdf").st_size == 5367 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0007.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0007.pdf").st_size == 10093 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0008.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0008.pdf").st_size == 8578 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0009.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0009.pdf").st_size == 30188 + print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0010.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_0010.pdf").st_size == 3789 + + shutil.rmtree("/app/tests/sample_pdfs/split_pdf/") + + +def test_split_pdf_and_location(): + # Single Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf_with_location("/tmp/test_directory/", False) + print(test_file) + print(os.stat("/tmp/test_directory/sample_1_page_1.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_1_page_1.pdf").st_size == 69339 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf_with_location("/tmp/test_directory/", True) + print(test_file) + print(os.stat("/tmp/test_directory/1.pdf").st_size) + assert os.stat("/tmp/test_directory/1.pdf").st_size == 69339 + + shutil.rmtree("/tmp/test_directory/") + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf_with_location("/tmp/test_directory/", False) + print(test_file) + print(os.stat("/tmp/test_directory/sample_2_page_1.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_2_page_1.pdf").st_size == 1804 + print(os.stat("/tmp/test_directory/sample_2_page_2.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_2_page_2.pdf").st_size == 1405 + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf_with_location("/tmp/test_directory/", True) + print(test_file) + print(os.stat("/tmp/test_directory/1.pdf").st_size) + assert os.stat("/tmp/test_directory/1.pdf").st_size == 1804 + print(os.stat("/tmp/test_directory/2.pdf").st_size) + assert os.stat("/tmp/test_directory/2.pdf").st_size == 1405 + + shutil.rmtree("/tmp/test_directory/") + + # Ten Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf_with_location("/tmp/test_directory/", False) + print(test_file) + print(os.stat("/tmp/test_directory/sample_10_page_1.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_1.pdf").st_size == 3167 + print(os.stat("/tmp/test_directory/sample_10_page_2.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_2.pdf").st_size == 2888 + print(os.stat("/tmp/test_directory/sample_10_page_3.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_3.pdf").st_size == 6670 + print(os.stat("/tmp/test_directory/sample_10_page_4.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_4.pdf").st_size == 3043 + print(os.stat("/tmp/test_directory/sample_10_page_5.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_5.pdf").st_size == 9968 + print(os.stat("/tmp/test_directory/sample_10_page_6.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_6.pdf").st_size == 5367 + print(os.stat("/tmp/test_directory/sample_10_page_7.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_7.pdf").st_size == 10093 + print(os.stat("/tmp/test_directory/sample_10_page_8.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_8.pdf").st_size == 8578 + print(os.stat("/tmp/test_directory/sample_10_page_9.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_9.pdf").st_size == 30188 + print(os.stat("/tmp/test_directory/sample_10_page_10.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_10.pdf").st_size == 3789 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf_with_location("/tmp/test_directory/", True) + print(test_file) + print(os.stat("/tmp/test_directory/1.pdf").st_size) + assert os.stat("/tmp/test_directory/1.pdf").st_size == 3167 + print(os.stat("/tmp/test_directory/2.pdf").st_size) + assert os.stat("/tmp/test_directory/2.pdf").st_size == 2888 + print(os.stat("/tmp/test_directory/3.pdf").st_size) + assert os.stat("/tmp/test_directory/3.pdf").st_size == 6670 + print(os.stat("/tmp/test_directory/4.pdf").st_size) + assert os.stat("/tmp/test_directory/4.pdf").st_size == 3043 + print(os.stat("/tmp/test_directory/5.pdf").st_size) + assert os.stat("/tmp/test_directory/5.pdf").st_size == 9968 + print(os.stat("/tmp/test_directory/6.pdf").st_size) + assert os.stat("/tmp/test_directory/6.pdf").st_size == 5367 + print(os.stat("/tmp/test_directory/7.pdf").st_size) + assert os.stat("/tmp/test_directory/7.pdf").st_size == 10093 + print(os.stat("/tmp/test_directory/8.pdf").st_size) + assert os.stat("/tmp/test_directory/8.pdf").st_size == 8578 + print(os.stat("/tmp/test_directory/9.pdf").st_size) + assert os.stat("/tmp/test_directory/9.pdf").st_size == 30188 + print(os.stat("/tmp/test_directory/10.pdf").st_size) + assert os.stat("/tmp/test_directory/10.pdf").st_size == 3789 + + shutil.rmtree("/tmp/test_directory/") + + +def test_split_pdf_and_location_int_padding(): + # Single Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf_with_location("/tmp/test_directory/", False, True) + print(test_file) + print(os.stat("/tmp/test_directory/sample_1_page_0001.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_1_page_0001.pdf").st_size == 69339 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf_with_location("/tmp/test_directory/", True, True) + print(test_file) + print(os.stat("/tmp/test_directory/0001.pdf").st_size) + assert os.stat("/tmp/test_directory/0001.pdf").st_size == 69339 + + shutil.rmtree("/tmp/test_directory/") + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf_with_location("/tmp/test_directory/", False, True) + print(test_file) + print(os.stat("/tmp/test_directory/sample_2_page_0001.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_2_page_0001.pdf").st_size == 1804 + print(os.stat("/tmp/test_directory/sample_2_page_0002.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_2_page_0002.pdf").st_size == 1405 + + # Two Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf_with_location("/tmp/test_directory/", True, True) + print(test_file) + print(os.stat("/tmp/test_directory/0001.pdf").st_size) + assert os.stat("/tmp/test_directory/0001.pdf").st_size == 1804 + print(os.stat("/tmp/test_directory/0002.pdf").st_size) + assert os.stat("/tmp/test_directory/0002.pdf").st_size == 1405 + + shutil.rmtree("/tmp/test_directory/") + + # Ten Pages + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf_with_location("/tmp/test_directory/", False, True) + print(test_file) + print(os.stat("/tmp/test_directory/sample_10_page_0001.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0001.pdf").st_size == 3167 + print(os.stat("/tmp/test_directory/sample_10_page_0002.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0002.pdf").st_size == 2888 + print(os.stat("/tmp/test_directory/sample_10_page_0003.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0003.pdf").st_size == 6670 + print(os.stat("/tmp/test_directory/sample_10_page_0004.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0004.pdf").st_size == 3043 + print(os.stat("/tmp/test_directory/sample_10_page_0005.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0005.pdf").st_size == 9968 + print(os.stat("/tmp/test_directory/sample_10_page_0006.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0006.pdf").st_size == 5367 + print(os.stat("/tmp/test_directory/sample_10_page_0007.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0007.pdf").st_size == 10093 + print(os.stat("/tmp/test_directory/sample_10_page_0008.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0008.pdf").st_size == 8578 + print(os.stat("/tmp/test_directory/sample_10_page_0009.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0009.pdf").st_size == 30188 + print(os.stat("/tmp/test_directory/sample_10_page_0010.pdf").st_size) + assert os.stat("/tmp/test_directory/sample_10_page_0010.pdf").st_size == 3789 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf_with_location("/tmp/test_directory/", True, True) + print(test_file) + print(os.stat("/tmp/test_directory/0001.pdf").st_size) + assert os.stat("/tmp/test_directory/0001.pdf").st_size == 3167 + print(os.stat("/tmp/test_directory/0002.pdf").st_size) + assert os.stat("/tmp/test_directory/0002.pdf").st_size == 2888 + print(os.stat("/tmp/test_directory/0003.pdf").st_size) + assert os.stat("/tmp/test_directory/0003.pdf").st_size == 6670 + print(os.stat("/tmp/test_directory/0004.pdf").st_size) + assert os.stat("/tmp/test_directory/0004.pdf").st_size == 3043 + print(os.stat("/tmp/test_directory/0005.pdf").st_size) + assert os.stat("/tmp/test_directory/0005.pdf").st_size == 9968 + print(os.stat("/tmp/test_directory/0006.pdf").st_size) + assert os.stat("/tmp/test_directory/0006.pdf").st_size == 5367 + print(os.stat("/tmp/test_directory/0007.pdf").st_size) + assert os.stat("/tmp/test_directory/0007.pdf").st_size == 10093 + print(os.stat("/tmp/test_directory/0008.pdf").st_size) + assert os.stat("/tmp/test_directory/0008.pdf").st_size == 8578 + print(os.stat("/tmp/test_directory/0009.pdf").st_size) + assert os.stat("/tmp/test_directory/0009.pdf").st_size == 30188 + print(os.stat("/tmp/test_directory/0010.pdf").st_size) + assert os.stat("/tmp/test_directory/0010.pdf").st_size == 3789 + + shutil.rmtree("/tmp/test_directory/") + + +def test_merge_pdf_with(): + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").merge_pdf_with("/app/tests/sample_pdfs/sample_2_page.pdf") + print(test_file) + print(os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size == 71761 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").merge_pdf_with("/app/tests/sample_pdfs/sample_10_page.pdf") + print(test_file) + print(os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size == 115044 + + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").merge_pdf_with("/app/tests/sample_pdfs/sample_10_page.pdf") + print(test_file) + print(os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size) + assert os.stat("/app/tests/sample_pdfs/merge_pdf/merger.pdf").st_size == 48427 + + shutil.rmtree("/app/tests/sample_pdfs/merge_pdf/") + + +def test_merge_pdf_with_and_location(): + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").merge_pdf_with_and_location("/app/tests/sample_pdfs/sample_2_page.pdf", "/tmp/test_directory/merged_file.pdf") + print(test_file) + print(os.stat("/tmp/test_directory/merged_file.pdf").st_size) + assert os.stat("/tmp/test_directory/merged_file.pdf").st_size == 71761 + shutil.rmtree("/tmp/test_directory/") + + test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").merge_pdf_with_and_location("/app/tests/sample_pdfs/sample_10_page.pdf", "/tmp/test_directory/merge_pdf/merger.pdf") + print(test_file) + print(os.stat("/tmp/test_directory/merge_pdf/merger.pdf").st_size) + assert os.stat("/tmp/test_directory/merge_pdf/merger.pdf").st_size == 115044 + shutil.rmtree("/tmp/test_directory/") + + test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").merge_pdf_with_and_location("/app/tests/sample_pdfs/sample_10_page.pdf", "/tmp/test_directory/merge_pdf/bigfile.pdf") + print(test_file) + print(os.stat("/tmp/test_directory/merge_pdf/bigfile.pdf").st_size) + assert os.stat("/tmp/test_directory/merge_pdf/bigfile.pdf").st_size == 48427 + + shutil.rmtree("/tmp/test_directory/") + + +def test_rotate_pages(): + # Write test code to verify the behavior of the rotate_pages method + pass + + +def test_ocr_pages(): + # Write test code to verify the behavior of the rotate_pages method + pass