dev #1
|
@ -0,0 +1 @@
|
|||
.vscode/
|
|
@ -6,6 +6,7 @@ WORKDIR /app
|
|||
RUN mkdir /app/uploads
|
||||
RUN mkdir /app/split
|
||||
RUN mkdir /app/merge
|
||||
RUN mkdir /app/projects
|
||||
RUN apt-get update
|
||||
|
||||
# install dependencies
|
||||
|
|
78
app.py
78
app.py
|
@ -1,13 +1,30 @@
|
|||
import shutil
|
||||
import os
|
||||
from flask import Flask, render_template, request, redirect, jsonify, send_from_directory
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pathlib import Path
|
||||
from pdf_util.pdf_util import pdf_util
|
||||
|
||||
import datetime as dt
|
||||
import logging
|
||||
import sys
|
||||
|
||||
# Setup Logging
|
||||
logging.basicConfig(
|
||||
# level=logging.ERROR,
|
||||
# level=logging.INFO,
|
||||
level=logging.DEBUG,
|
||||
format="Start: " + str(dt.datetime.now()).replace(" ", "_") + " | %(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("/var/log/" + str(dt.datetime.today().strftime('%Y-%m-%d')) + "_-_cron.log"),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
UPLOAD_FOLDER = 'uploads'
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config['UPLOAD_FOLDER'] = 'uploads'
|
||||
|
||||
|
||||
@app.route('/')
|
||||
|
@ -36,27 +53,19 @@ def split_to_zip():
|
|||
return redirect(request.url)
|
||||
|
||||
if pdf_file:
|
||||
in_filename = pdf_file.filename
|
||||
in_filename = pdf_file.filename.rsplit('.', 1)[0]
|
||||
filename = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file.filename)
|
||||
pdf_file.save(filename)
|
||||
|
||||
out_filenames = []
|
||||
Path("/tmp/split_pdf").mkdir(parents=True, exist_ok=True)
|
||||
with open(filename, 'rb') as pdf_file:
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
num_pages = len(pdf_reader.pages)
|
||||
for page_num in range(num_pages):
|
||||
# Use pdf_utils Module to split File
|
||||
out_filenames = pdf_util(filename).split_pdf()
|
||||
logging.debug(out_filenames)
|
||||
|
||||
writer = PdfWriter()
|
||||
writer.add_page(pdf_reader.pages[page_num])
|
||||
logging.debug(in_filename)
|
||||
logging.debug(os.path.splitext(pdf_file.filename)[0])
|
||||
|
||||
out_filename = '/tmp/split_pdf/' + in_filename + '_' + str(page_num) + '.pdf'
|
||||
with open(out_filename, 'wb') as outfile:
|
||||
writer.write(outfile)
|
||||
out_filenames.append(out_filename)
|
||||
|
||||
shutil.make_archive(in_filename.rsplit('.', 1)[0] + '_splitted', 'zip', "/tmp/split_pdf")
|
||||
zip_filename = in_filename.rsplit('.', 1)[0] + "_splitted.zip"
|
||||
shutil.make_archive(in_filename + '_splitted', 'zip', os.path.dirname(filename) + "/split_pdf")
|
||||
zip_filename = in_filename + "_splitted.zip"
|
||||
os.rename("/app/" + zip_filename, "/app/split/" + zip_filename)
|
||||
|
||||
for temp_file in out_filenames:
|
||||
|
@ -79,35 +88,24 @@ def merge_to_pdf():
|
|||
return redirect(request.url)
|
||||
|
||||
if pdf_file_1:
|
||||
filename_1 = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file_1.filename)
|
||||
filename_1 = os.path.join(os.path.dirname(os.path.realpath(__file__)), app.config['UPLOAD_FOLDER'], pdf_file_1.filename)
|
||||
pdf_file_1.save(filename_1)
|
||||
|
||||
if pdf_file_2:
|
||||
filename_2 = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file_2.filename)
|
||||
filename_2 = os.path.join(os.path.dirname(os.path.realpath(__file__)), app.config['UPLOAD_FOLDER'], pdf_file_2.filename)
|
||||
pdf_file_2.save(filename_2)
|
||||
|
||||
if pdf_file_1 and pdf_file_2:
|
||||
logging.debug(filename_1)
|
||||
logging.debug(filename_2)
|
||||
|
||||
with open(filename_1, 'rb') as pdf_file_1, open(filename_2, 'rb') as pdf_file_2:
|
||||
pdf_reader_1 = PdfReader(pdf_file_1)
|
||||
pdf_reader_2 = PdfReader(pdf_file_2)
|
||||
# Use pdf_utils Module to split File
|
||||
out_path = pdf_util(filename_1).merge_pdf_with(filename_2)
|
||||
logging.debug(out_path)
|
||||
os.rename(out_path, "/app/merge/merger.pdf")
|
||||
|
||||
Path("/tmp/merge_pdf").mkdir(parents=True, exist_ok=True)
|
||||
writer = PdfWriter()
|
||||
|
||||
for page_num in range(len(pdf_reader_1.pages)):
|
||||
writer.add_page(pdf_reader_1.pages[page_num])
|
||||
|
||||
for page_num in range(len(pdf_reader_2.pages)):
|
||||
writer.add_page(pdf_reader_2.pages[page_num])
|
||||
|
||||
out_filename = '/app/merge/merger.pdf'
|
||||
with open(out_filename, 'wb') as outfile:
|
||||
writer.write(outfile)
|
||||
|
||||
response = jsonify({"url": '/merge/merger.pdf', "name": 'merge.pdf'})
|
||||
# response.headers.add("Access-Control-Allow-Origin", "*")
|
||||
return response
|
||||
response = jsonify({"url": '/merge/merger.pdf', "name": os.path.splitext(os.path.basename(out_path))[0]})
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
7
init.sh
7
init.sh
|
@ -1,6 +1,9 @@
|
|||
#!/bin/bash
|
||||
|
||||
# (cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app) # Dev (Logging to console)
|
||||
(cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app)
|
||||
# (cd /app/ && pytest tests/test_pdf_util.py)
|
||||
(cd /app/ && pytest)
|
||||
|
||||
# (cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app)
|
||||
(cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app) # Dev (Logging to console)
|
||||
|
||||
/bin/bash
|
|
@ -0,0 +1 @@
|
|||
# __init__.py
|
|
@ -0,0 +1,9 @@
|
|||
from pypdf import PdfReader, PdfWriter
|
||||
import uuid
|
||||
import os
|
||||
|
||||
class pdf_project_manager:
|
||||
def __init__(self):
|
||||
self.uuid = str(uuid.uuid4())
|
||||
os.makedirs("/app/projects/" + self.uuid, exist_ok=True)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
import os
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
class pdf_util:
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
self.file_name = os.path.basename(file_path)
|
||||
self.file_name_wo_extension = os.path.splitext(os.path.basename(file_path))[0]
|
||||
|
||||
|
||||
def split_pdf(self):
|
||||
out_filenames = []
|
||||
os.makedirs(os.path.dirname(self.file_path) + "/split_pdf", exist_ok=True)
|
||||
with open(self.file_path, 'rb') as pdf_file:
|
||||
pdf_reader = PdfReader(pdf_file)
|
||||
num_pages = len(pdf_reader.pages)
|
||||
for page_num in range(num_pages):
|
||||
|
||||
writer = PdfWriter()
|
||||
writer.add_page(pdf_reader.pages[page_num])
|
||||
|
||||
out_filename = os.path.dirname(self.file_path) + '/split_pdf/' + self.file_name_wo_extension + '_' + str(page_num + 1) + '.pdf'
|
||||
with open(out_filename, 'wb') as outfile:
|
||||
writer.write(outfile)
|
||||
out_filenames.append(out_filename)
|
||||
|
||||
return out_filenames
|
||||
|
||||
|
||||
def merge_pdf_with(self, merge_file_path, merged_name="merged"):
|
||||
os.makedirs(os.path.dirname(self.file_path) + "/merge_pdf", exist_ok=True)
|
||||
pdf_reader_1 = PdfReader(self.file_path)
|
||||
pdf_reader_2 = PdfReader(merge_file_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
for page_num in range(len(pdf_reader_1.pages)):
|
||||
writer.add_page(pdf_reader_1.pages[page_num])
|
||||
|
||||
for page_num in range(len(pdf_reader_2.pages)):
|
||||
writer.add_page(pdf_reader_2.pages[page_num])
|
||||
|
||||
out_path = os.path.dirname(self.file_path) + "/merge_pdf" + '/merger.pdf'
|
||||
with open(out_path, 'wb') as outfile:
|
||||
writer.write(outfile)
|
||||
|
||||
return out_path
|
|
@ -1,3 +1,6 @@
|
|||
flask
|
||||
PyPDF2
|
||||
gunicorn
|
||||
pypdf
|
||||
|
||||
# For testing
|
||||
pytest
|
|
@ -0,0 +1 @@
|
|||
# __init__.py
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,198 @@
|
|||
%PDF-1.3
|
||||
%âãÏÓ
|
||||
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Outlines 2 0 R
|
||||
/Pages 3 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<<
|
||||
/Type /Outlines
|
||||
/Count 0
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<<
|
||||
/Type /Pages
|
||||
/Count 2
|
||||
/Kids [ 4 0 R 6 0 R ]
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<<
|
||||
/Type /Page
|
||||
/Parent 3 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 9 0 R
|
||||
>>
|
||||
/ProcSet 8 0 R
|
||||
>>
|
||||
/MediaBox [0 0 612.0000 792.0000]
|
||||
/Contents 5 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
<< /Length 1074 >>
|
||||
stream
|
||||
2 J
|
||||
BT
|
||||
0 0 0 rg
|
||||
/F1 0027 Tf
|
||||
57.3750 722.2800 Td
|
||||
( A Simple PDF File ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 688.6080 Td
|
||||
( This is a small demonstration .pdf file - ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 664.7040 Td
|
||||
( just for use in the Virtual Mechanics tutorials. More text. And more ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 652.7520 Td
|
||||
( text. And more text. And more text. And more text. ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 628.8480 Td
|
||||
( And more text. And more text. And more text. And more text. And more ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 616.8960 Td
|
||||
( text. And more text. Boring, zzzzz. And more text. And more text. And ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 604.9440 Td
|
||||
( more text. And more text. And more text. And more text. And more text. ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 592.9920 Td
|
||||
( And more text. And more text. ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 569.0880 Td
|
||||
( And more text. And more text. And more text. And more text. And more ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 557.1360 Td
|
||||
( text. And more text. And more text. Even more. Continued on page 2 ...) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<<
|
||||
/Type /Page
|
||||
/Parent 3 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 9 0 R
|
||||
>>
|
||||
/ProcSet 8 0 R
|
||||
>>
|
||||
/MediaBox [0 0 612.0000 792.0000]
|
||||
/Contents 7 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
7 0 obj
|
||||
<< /Length 676 >>
|
||||
stream
|
||||
2 J
|
||||
BT
|
||||
0 0 0 rg
|
||||
/F1 0027 Tf
|
||||
57.3750 722.2800 Td
|
||||
( Simple PDF File 2 ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 688.6080 Td
|
||||
( ...continued from page 1. Yet more text. And more text. And more text. ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 676.6560 Td
|
||||
( And more text. And more text. And more text. And more text. And more ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 664.7040 Td
|
||||
( text. Oh, how boring typing this stuff. But not as boring as watching ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 652.7520 Td
|
||||
( paint dry. And more text. And more text. And more text. And more text. ) Tj
|
||||
ET
|
||||
BT
|
||||
/F1 0010 Tf
|
||||
69.2500 640.8000 Td
|
||||
( Boring. More, a little more text. The end, and just as well. ) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
8 0 obj
|
||||
[/PDF /Text]
|
||||
endobj
|
||||
|
||||
9 0 obj
|
||||
<<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/Name /F1
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
>>
|
||||
endobj
|
||||
|
||||
10 0 obj
|
||||
<<
|
||||
/Creator (Rave \(http://www.nevrona.com/rave\))
|
||||
/Producer (Nevrona Designs)
|
||||
/CreationDate (D:20060301072826)
|
||||
>>
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 11
|
||||
0000000000 65535 f
|
||||
0000000019 00000 n
|
||||
0000000093 00000 n
|
||||
0000000147 00000 n
|
||||
0000000222 00000 n
|
||||
0000000390 00000 n
|
||||
0000001522 00000 n
|
||||
0000001690 00000 n
|
||||
0000002423 00000 n
|
||||
0000002456 00000 n
|
||||
0000002574 00000 n
|
||||
|
||||
trailer
|
||||
<<
|
||||
/Size 11
|
||||
/Root 1 0 R
|
||||
/Info 10 0 R
|
||||
>>
|
||||
|
||||
startxref
|
||||
2714
|
||||
%%EOF
|
|
@ -0,0 +1,15 @@
|
|||
import pytest
|
||||
import os
|
||||
from pdf_util.pdf_project_manager import pdf_project_manager
|
||||
|
||||
def test_basic_object_creation():
|
||||
test_pdf_project_manager = pdf_project_manager()
|
||||
print(test_pdf_project_manager.uuid)
|
||||
assert len(test_pdf_project_manager.uuid) == 36
|
||||
|
||||
|
||||
def test_folder_creation():
|
||||
test_pdf_project_manager = pdf_project_manager()
|
||||
print(test_pdf_project_manager.uuid)
|
||||
assert os.path.isdir('/app/projects/' + test_pdf_project_manager.uuid)
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import pytest
|
||||
import os
|
||||
from pdf_util.pdf_util import pdf_util
|
||||
|
||||
def test_split_pages():
|
||||
# Single Pages
|
||||
test_file = pdf_util("/app/tests/sample_pdfs/sample_1_page.pdf").split_pdf()
|
||||
print(test_file)
|
||||
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_1.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_1_page_1.pdf").st_size == 69339
|
||||
|
||||
# Two Pages
|
||||
test_file = pdf_util("/app/tests/sample_pdfs/sample_2_page.pdf").split_pdf()
|
||||
print(test_file)
|
||||
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_1.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_1.pdf").st_size == 1804
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_2.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_2_page_2.pdf").st_size == 1405
|
||||
|
||||
# Ten Pages
|
||||
test_file = pdf_util("/app/tests/sample_pdfs/sample_10_page.pdf").split_pdf()
|
||||
print(test_file)
|
||||
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_1.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_1.pdf").st_size == 3167
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_2.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_2.pdf").st_size == 2888
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_3.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_3.pdf").st_size == 6670
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_4.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_4.pdf").st_size == 3043
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_5.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_5.pdf").st_size == 9968
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_6.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_6.pdf").st_size == 5367
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_7.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_7.pdf").st_size == 10093
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_8.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_8.pdf").st_size == 8578
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_9.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_9.pdf").st_size == 30188
|
||||
print(os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_10.pdf").st_size)
|
||||
assert os.stat("/app/tests/sample_pdfs/split_pdf/sample_10_page_10.pdf").st_size == 3789
|
||||
|
||||
|
||||
def test_merge_with():
|
||||
# Write test code to verify the behavior of the merge_with method
|
||||
pass
|
||||
|
||||
def test_rotate_pages():
|
||||
# Write test code to verify the behavior of the rotate_pages method
|
||||
pass
|
||||
|
||||
def test_ocr_pages():
|
||||
# Write test code to verify the behavior of the rotate_pages method
|
||||
pass
|
Loading…
Reference in New Issue