From 52dc67adc1380b35dfedeef4f0860ce768da4d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20M=C3=BCller?= Date: Wed, 11 Oct 2023 19:51:35 +0200 Subject: [PATCH] INIT --- Dockerfile | 17 +++++++++ app.py | 67 +++++++++++++++++++++++++++++++++++ docker-compose.yaml | 29 +++++++++++++++ init.sh | 6 ++++ requirements.txt | 3 ++ static/icons/filetype-pdf.svg | 3 ++ templates/index.html | 45 +++++++++++++++++++++++ wsgi.py | 4 +++ 8 files changed, 174 insertions(+) create mode 100644 Dockerfile create mode 100644 app.py create mode 100644 docker-compose.yaml create mode 100755 init.sh create mode 100644 requirements.txt create mode 100644 static/icons/filetype-pdf.svg create mode 100644 templates/index.html create mode 100644 wsgi.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d861b4f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ + +FROM python:3.11-slim + +# set the working directory +WORKDIR /app +RUN mkdir /app/uploads +RUN mkdir /app/split +RUN apt-get update + +# install dependencies +COPY ./requirements.txt /app +RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --no-cache-dir --upgrade -r requirements.txt + +# copy the scripts to the folder +COPY . /app + +CMD ["bash", "init.sh"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..afdb85a --- /dev/null +++ b/app.py @@ -0,0 +1,67 @@ +from flask import Flask, render_template, request, redirect, url_for, jsonify, send_from_directory +import os +from PyPDF2 import PdfReader, PdfWriter + +from pathlib import Path +import shutil +import os + +app = Flask(__name__) + +UPLOAD_FOLDER = 'uploads' +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/split/') +def send_report(path): + return send_from_directory('split', path) + +@app.route('/split', methods=['POST']) +def split_file(): + if 'pdf' not in request.files: + return redirect(request.url) + + pdf_file = request.files['pdf'] + + if pdf_file.filename == '': + return redirect(request.url) + + if pdf_file: + in_filename = pdf_file.filename + filename = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file.filename) + pdf_file.save(filename) + + + out_filenames = [] + Path("/tmp/split_pdf").mkdir(parents=True, exist_ok=True) + # Process the PDF file (e.g., extract text) + with open(filename, 'rb') as pdf_file: + pdf_reader = PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + text = '' + for page_num in range(num_pages): + + writer = PdfWriter() + writer.add_page(pdf_reader.pages[page_num]) + + out_filename = '/tmp/split_pdf/' + in_filename + '_' + str(page_num) + '.pdf' + with open(out_filename, 'wb') as outfile: + writer.write(outfile) + out_filenames.append(out_filename) + + shutil.make_archive(in_filename.rsplit('.', 1)[0] + '_splitted', 'zip', "/tmp/split_pdf") + zip_filename = in_filename.rsplit('.', 1)[0] + "_splitted.zip" + os.rename("/app/" + zip_filename, "/app/split/" + zip_filename) + + for temp_file in out_filenames: + Path.unlink(temp_file) + + response = jsonify({"url": "/split/" + zip_filename, "name": zip_filename}) + # response.headers.add("Access-Control-Allow-Origin", "*") + return response + +if __name__ == '__main__': + app.run(debug=True) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..115747c --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,29 @@ +################################################################ +# +# Development: +# nodemon.cmd --ext '*' --exec docker-compose up --build pdf-web-toolkit +# +# docker build -t pdf-web-toolkit . +# docker run -p 8001:8000 -i -t pdf-web-toolkit +################################################################ + +version: "3" + +# networks: +# pdf-web-toolkit-network: + +services: + pdf-web-toolkit: + build: ./ + container_name: pdf-web-toolkit + restart: always +# networks: +# - pdf-web-toolkit-network + command: > + sh -c " + ./init.sh + " + ports: + - 8001:8000 + + diff --git a/init.sh b/init.sh new file mode 100755 index 0000000..e0c81f7 --- /dev/null +++ b/init.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# (cd /app/ && gunicorn --access-logfile '-' --error-logfile '-' -w 4 -b 0.0.0.0:8000 wsgi:app) # Dev (Logging to console) +(cd /app/ && gunicorn -w 4 -b 0.0.0.0:8000 wsgi:app) + +/bin/bash \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5710153 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +flask +PyPDF2 +gunicorn \ No newline at end of file diff --git a/static/icons/filetype-pdf.svg b/static/icons/filetype-pdf.svg new file mode 100644 index 0000000..e1fc9b6 --- /dev/null +++ b/static/icons/filetype-pdf.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..5ffec28 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,45 @@ + + + + PDF Splitter + + + +

PDF Splitter

+
+ +
+ + + + diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..1f144bb --- /dev/null +++ b/wsgi.py @@ -0,0 +1,4 @@ +from app import app # Import your Flask app from app.py + +if __name__ == "__main__": + app.run() \ No newline at end of file