diff options
Diffstat (limited to 'pdfcat')
-rwxr-xr-x | pdfcat | 80 |
1 files changed, 80 insertions, 0 deletions
@@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" +Concatenate pages from pdf files into a single pdf file. + +Page ranges refer to the previously-named file. +A file not followed by a page range means all the pages of the file. + +PAGE RANGES are like Python slices. + {page_range_help} +EXAMPLES + pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 + Concatenate all of head.pdf, all but page seven of content.pdf, + and the last page of tail.pdf, producing output.pdf. + + pdfcat chapter*.pdf >book.pdf + You can specify the output file by redirection. + + pdfcat chapter?.pdf chapter10.pdf >book.pdf + In case you don't want chapter 10 before chapter 2. +""" +# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>. +# All rights reserved. This software is available under a BSD license; +# see https://github.com/mstamy2/PyPDF2/LICENSE + +from __future__ import print_function +import argparse +from PdfFileTransformer.PyPDF2.pagerange import PAGE_RANGE_HELP + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__.format(page_range_help=PAGE_RANGE_HELP), + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-o", "--output", + metavar="output_file") + parser.add_argument("-v", "--verbose", action="store_true", + help="show page ranges as they are being read") + parser.add_argument("first_filename", nargs=1, + metavar="filename [page range...]") + # argparse chokes on page ranges like "-2:" unless caught like this: + parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER, + metavar="filenames and/or page ranges") + args = parser.parse_args() + args.fn_pgrgs.insert(0, args.first_filename[0]) + return args + + +from sys import stderr, stdout, exit +import os +import traceback +from collections import defaultdict + +from PdfFileTransformer.PyPDF2 import PdfFileMerger, parse_filename_page_ranges + + +if __name__ == "__main__": + args = parse_args() + filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs) + if args.output: + output = open(args.output, "wb") + else: + stdout.flush() + output = os.fdopen(stdout.fileno(), "wb") + + merger = PdfFileMerger() + in_fs = dict() + try: + for (filename, page_range) in filename_page_ranges: + if args.verbose: + print(filename, page_range, file=stderr) + if filename not in in_fs: + in_fs[filename] = open(filename, "rb") + merger.append(in_fs[filename], pages=page_range) + except: + print(traceback.format_exc(), file=stderr) + print("Error while reading " + filename, file=stderr) + exit(1) + merger.write(output) + # In 3.0, input files must stay open until output is written. + # Not closing the in_fs because this script exits now. |