This commit is contained in:
Niklas Müller 2023-10-11 19:51:35 +02:00
parent 0ee6440ea3
commit 52dc67adc1
8 changed files with 174 additions and 0 deletions

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM python:3.11-slim
# set the working directory
WORKDIR /app
RUN mkdir /app/uploads
RUN mkdir /app/split
RUN apt-get update
# install dependencies
COPY ./requirements.txt /app
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt
# copy the scripts to the folder
COPY . /app
CMD ["bash", "init.sh"]

67
app.py Normal file
View File

@ -0,0 +1,67 @@
from flask import Flask, render_template, request, redirect, url_for, jsonify, send_from_directory
import os
from PyPDF2 import PdfReader, PdfWriter
from pathlib import Path
import shutil
import os
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
@app.route('/')
def index():
return render_template('index.html')
@app.route('/split/<path:path>')
def send_report(path):
return send_from_directory('split', path)
@app.route('/split', methods=['POST'])
def split_file():
if 'pdf' not in request.files:
return redirect(request.url)
pdf_file = request.files['pdf']
if pdf_file.filename == '':
return redirect(request.url)
if pdf_file:
in_filename = pdf_file.filename
filename = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file.filename)
pdf_file.save(filename)
out_filenames = []
Path("/tmp/split_pdf").mkdir(parents=True, exist_ok=True)
# Process the PDF file (e.g., extract text)
with open(filename, 'rb') as pdf_file:
pdf_reader = PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
text = ''
for page_num in range(num_pages):
writer = PdfWriter()
writer.add_page(pdf_reader.pages[page_num])
out_filename = '/tmp/split_pdf/' + in_filename + '_' + str(page_num) + '.pdf'
with open(out_filename, 'wb') as outfile:
writer.write(outfile)
out_filenames.append(out_filename)
shutil.make_archive(in_filename.rsplit('.', 1)[0] + '_splitted', 'zip', "/tmp/split_pdf")
zip_filename = in_filename.rsplit('.', 1)[0] + "_splitted.zip"
os.rename("/app/" + zip_filename, "/app/split/" + zip_filename)
for temp_file in out_filenames:
Path.unlink(temp_file)
response = jsonify({"url": "/split/" + zip_filename, "name": zip_filename})
# response.headers.add("Access-Control-Allow-Origin", "*")
return response
if __name__ == '__main__':
app.run(debug=True)

29
docker-compose.yaml Normal file
View File

@ -0,0 +1,29 @@
################################################################
#
# Development:
# nodemon.cmd --ext '*' --exec docker-compose up --build pdf-web-toolkit
#
# docker build -t pdf-web-toolkit .
# docker run -p 8001:8000 -i -t pdf-web-toolkit
################################################################
version: "3"
# networks:
# pdf-web-toolkit-network:
services:
pdf-web-toolkit:
build: ./
container_name: pdf-web-toolkit
restart: always
# networks:
# - pdf-web-toolkit-network
command: >
sh -c "
./init.sh
"
ports:
- 8001:8000

6
init.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
# (cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app) # Dev (Logging to console)
(cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app)
/bin/bash

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
flask
PyPDF2
gunicorn

View File

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-filetype-pdf" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M14 4.5V14a2 2 0 0 1-2 2h-1v-1h1a1 1 0 0 0 1-1V4.5h-2A1.5 1.5 0 0 1 9.5 3V1H4a1 1 0 0 0-1 1v9H2V2a2 2 0 0 1 2-2h5.5L14 4.5ZM1.6 11.85H0v3.999h.791v-1.342h.803c.287 0 .531-.057.732-.173.203-.117.358-.275.463-.474a1.42 1.42 0 0 0 .161-.677c0-.25-.053-.476-.158-.677a1.176 1.176 0 0 0-.46-.477c-.2-.12-.443-.179-.732-.179Zm.545 1.333a.795.795 0 0 1-.085.38.574.574 0 0 1-.238.241.794.794 0 0 1-.375.082H.788V12.48h.66c.218 0 .389.06.512.181.123.122.185.296.185.522Zm1.217-1.333v3.999h1.46c.401 0 .734-.08.998-.237a1.45 1.45 0 0 0 .595-.689c.13-.3.196-.662.196-1.084 0-.42-.065-.778-.196-1.075a1.426 1.426 0 0 0-.589-.68c-.264-.156-.599-.234-1.005-.234H3.362Zm.791.645h.563c.248 0 .45.05.609.152a.89.89 0 0 1 .354.454c.079.201.118.452.118.753a2.3 2.3 0 0 1-.068.592 1.14 1.14 0 0 1-.196.422.8.8 0 0 1-.334.252 1.298 1.298 0 0 1-.483.082h-.563v-2.707Zm3.743 1.763v1.591h-.79V11.85h2.548v.653H7.896v1.117h1.606v.638H7.896Z"/>
</svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

45
templates/index.html Normal file
View File

@ -0,0 +1,45 @@
<!DOCTYPE html>
<html>
<head>
<title>PDF Splitter</title>
<link rel="shortcut icon" href="/static/icons/filetype-pdf.svg" type="image/x-icon">
</head>
<body>
<h1>PDF Splitter</h1>
<input type="file" id="pdfFile" accept=".pdf"><br>
<button onclick="uploadPDF()">Split PDF into individual pages</button>
<div id="output"></div>
<script>
function downloadURI(uri, name) {
var link = document.createElement("a");
link.download = name;
link.href = uri;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
delete link;
}
function uploadPDF() {
const fileInput = document.getElementById('pdfFile');
const file = fileInput.files[0];
const formData = new FormData();
formData.append('pdf', file);
const backendURL = '/split';
fetch(backendURL, {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
console.debug("data from Backend: ", data)
downloadURI(data["url"], data["name"]);
})
.catch(error => console.error(error));
}
</script>
</body>
</html>

4
wsgi.py Normal file
View File

@ -0,0 +1,4 @@
from app import app # Import your Flask app from app.py
if __name__ == "__main__":
app.run()