1

I have an imposed document that are 4-up on a standard pdf. I need to reverse impose them back to 1-up pdf's. I have looked at the solution here. This works perfectly to split the pdf in half. But I'm new to python scripts and can't quite figure out how to modify this for my needs. my PDF is organized as:

[1|3]         [1]
[2|4]         [2]
-----         [3]
[5|6]    ⇒    [4]
[7|8]         [5]
----          [6]...

If possible, would be great to have it process only a range of page numbers, but that's only icing. I've found a java application with gui called [briss here][1] to achieve what I need, but it would be great to learn a little python to be able to automate this task.

So I've gotten as far as this being able to output what I need, but I'm unsure how to setup the script to accept page number range

import copy
import math
from PyPDF2 import PdfFileReader, PdfFileWriter
import argparse

def split_pages2(src, dst):
    src_f = file(src, 'r+b')
    dst_f = file(dst, 'w+b')

    input = PdfFileReader(src_f)
    output = PdfFileWriter()

    for i in range(input.getNumPages()):
        # make two copies of the input page
        pp = input.getPage(i)
        p = copy.copy(pp)
        q = copy.copy(pp)
        r = copy.copy(pp)
        s = copy.copy(pp)

        # the new media boxes are the previous crop boxes
        p.mediaBox = copy.copy(p.cropBox)
        q.mediaBox = copy.copy(p.cropBox)

        #x1, x2 = p.mediaBox.lowerLeft
    x1, x2 = 72, 71.5
        #x3, x4 = p.mediaBox.upperRight
    x3, x4 = 540.7, 727 

        x1, x2 = math.floor(x1), math.floor(x2)
        x3, x4 = math.floor(x3), math.floor(x4)
        #x5, x6 = x1+math.floor((x3-x1)/2), x2+math.floor((x4-x2)/2)
        x5, x6 = 306, 396
        # vertical
        p.mediaBox.upperRight = (x5, x6)
        p.mediaBox.lowerLeft = (x1, x2)

        q.mediaBox.upperRight = (x5, x4)
        q.mediaBox.lowerLeft = (x1, x6)

        r.mediaBox.upperRight = (x3, x6)
        r.mediaBox.lowerLeft = (x5, x2)

        s.mediaBox.upperRight = (x3, x4)
        s.mediaBox.lowerLeft = (x5, x6)


        p.artBox = p.mediaBox
        p.bleedBox = p.mediaBox
        p.cropBox = p.mediaBox

        q.artBox = q.mediaBox
        q.bleedBox = q.mediaBox
        q.cropBox = q.mediaBox
        
        r.artBox = r.mediaBox
        r.bleedBox = r.mediaBox
        r.cropBox = r.mediaBox
        
        s.artBox = s.mediaBox
        s.bleedBox = s.mediaBox
        s.cropBox = s.mediaBox

        output.addPage(q)
        output.addPage(p)
        output.addPage(s)
        output.addPage(r)


    output.write(dst_f)
    src_f.close()
    dst_f.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=("Split up pdf from 4-up "
                     "to single page"))
    parser.add_argument("src", help="Source file")
    parser.add_argument("dst", help="Destination file")
    args = parser.parse_args()
    split_pages2(args.src, args.dst)
stuck
  • 85
  • 1
  • 5
  • maybe use `print()` to see values in variables. Maybe it will help you replace variables in functions to get data in different order. – furas Aug 14 '20 at 22:57
  • 2
    it would be easier if you would add code and example data for tests. – furas Aug 14 '20 at 22:59

1 Answers1

0

Update: refined script to automatically process all pdf files in the folder where the script is located and auto-detection of the 4up pages. So no input filename or other arguments necessary.

Again, I'm completely new to python so I have no idea if all the lines are needed. If my code makes no sense, and need to be cleaned up, please feel free to comment.
pdf4-to-1.py

# -*- coding: utf-8 -*-
#   x-y coordinate reference for 4up
#   x1,y3-------x2,y3-------x3,y3
#    |           |           |              
#    |      1    |      3    |              1
#    |           |           |              2
#   x1,y2-------x2,y2-------x3,y2   =>      3
#    |           |           |              4
#    |      2    |      4    |
#    |           |           |
#   x1,y1-------x2,y1-------x3,y1


import copy
import math
from PyPDF2 import PdfFileReader, PdfFileWriter
import argparse
from pdf2image import convert_from_path
import cv2
from imutils import perspective
from imutils import contours
import numpy as np
import imutils
from operator import itemgetter
import os
#src = "ST.pdf"
curdir = os.path.dirname(os.path.realpath(__file__))
os.chdir(curdir)
files = os.listdir(curdir)
if os.environ.get('OS','') == 'Windows_NT':
    dstdir = os.path.join(curdir, '1up\\')
else:
    dstdir = os.path.join(curdir, '1up/')
if not os.path.exists(dstdir):
   os.makedirs(dstdir)
pdffiles = [f for f in files if f.endswith('.pdf')]
for p in pdffiles: 
    pages=convert_from_path(p)  
    ind = []
    for i in range(len(pages)):
     page = pages[i]
     gray = np.array(page)
     gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
     #gray = cv2.GaussianBlur(gray, (1, 1), 0)
     (thresh, bw) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
     edges = cv2.Canny(bw,0,255)
     #edged = cv2.erode(edges, None, iterations=5)
     minLineLength=1100
     lines = cv2.HoughLinesP(image=edges,rho=5,theta=np.pi/90, threshold=1000,lines=np.array([]), minLineLength=minLineLength,maxLineGap=3)
     linessub = lines[((lines[:,0,0]>750) & (lines[:,0,0]<950))|((lines[:,0,1]>1000) & (lines[:,0,1]<1200))]
     if len(linessub) > 1: ind.append(i)
    startpg = min(ind)
    endpg = max(ind)
    page = pages[startpg]
    image = np.array(page)
    height = int(math.floor(image.shape[0])/2)
    width = int(math.floor(image.shape[1])/2)
    # convert to gray
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (7, 7), 0)
    # perform edge detection, then perform a dilation + erosion to
    # close gaps in between object edges
    edged = cv2.Canny(gray,0,255)
    edged = cv2.dilate(edged, None, iterations=1)
    edged = cv2.erode(edged, None, iterations=1)
    
    # find contours in the edge map
    cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
        cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] #if imutils.is_cv2() else cnts[1]
    
    # loop over the contours individually
    c = max(cnts, key = cv2.contourArea)
    
    # compute the rotated bounding box of the contour
    box = cv2.minAreaRect(c)
    box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
    box = np.array(box, dtype="int")
    
    # order the points in the contour such that they appear
    # in top-left, top-right, bottom-right, and bottom-left
    # order, then draw the outline of the rotated bounding
    # box
    box = perspective.order_points(box)
    
    x1 = round(((min(box, key=itemgetter(0))[0])/2.777777778),1)
    y3 = round(792-((min(box, key=itemgetter(1))[1])/2.77777778),1)
    x3 = round(((max(box, key=itemgetter(0))[0])/2.777777778),1)
    y1 = round(792-((max(box, key=itemgetter(1))[1])/2.77777778),1)
    
    src_f = open(p, 'r+b')
    dst_f = open(dstdir+p, 'w+b')
    input = PdfFileReader(src_f)
    output = PdfFileWriter()
    if startpg >= 1: 
        for x in range(startpg):
            output.addPage(input.getPage(x))  
            
            #print(x)
    
    for i in range(startpg,endpg+1): #input.getNumPages()):
        # make two copies of the input page
        pp = input.getPage(i)
        p = copy.copy(pp)
        q = copy.copy(pp)
        r = copy.copy(pp)
        s = copy.copy(pp)
    
        # the new media boxes are the previous crop boxes
        p.mediaBox = copy.copy(p.cropBox)
        q.mediaBox = copy.copy(p.cropBox)
    
        # x1, y1 = math.floor(x1), math.floor(y1)
        # x3, y3 = math.floor(x3), math.floor(y3)
        x2, y2 = round(x1+math.floor((x3-x1)/2),1), round(y1+math.floor((y3-y1)/2),1)
        #x2, y2 = 306, 396
        # vertical
        p.mediaBox.upperRight = (x2, y3)
        p.mediaBox.lowerLeft = (x1, y2)
    
        q.mediaBox.upperRight = (x2, y2)
        q.mediaBox.lowerLeft = (x1, y1)
    
        r.mediaBox.upperRight = (x3, y3)
        r.mediaBox.lowerLeft = (x2, y2)
    
        s.mediaBox.upperRight = (x3, y2)
        s.mediaBox.lowerLeft = (x2, y1)
    
    
        p.artBox = p.mediaBox
        p.bleedBox = p.mediaBox
        p.cropBox = p.mediaBox
    
        q.artBox = q.mediaBox
        q.bleedBox = q.mediaBox
        q.cropBox = q.mediaBox
        
        r.artBox = r.mediaBox
        r.bleedBox = r.mediaBox
        r.cropBox = r.mediaBox
        
        s.artBox = s.mediaBox
        s.bleedBox = s.mediaBox
        s.cropBox = s.mediaBox
    
        output.addPage(p)
        output.addPage(q)
        output.addPage(r)
        output.addPage(s)
    
    if len(pages) >= endpg: 
        for a in range(endpg+1,len(pages)):
            output.addPage(input.getPage(a))
            
            #print(a)
    
    output.write(dst_f)
    src_f.close()
    dst_f.close()
        #print(x1,y3,x3,y1, x2,y2)
stuck
  • 85
  • 1
  • 5