aboutsummaryrefslogtreecommitdiffstats
path: root/pdfcat
diff options
context:
space:
mode:
Diffstat (limited to 'pdfcat')
-rwxr-xr-xpdfcat80
1 files changed, 80 insertions, 0 deletions
diff --git a/pdfcat b/pdfcat
new file mode 100755
index 0000000..115eda7
--- /dev/null
+++ b/pdfcat
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Concatenate pages from pdf files into a single pdf file.
+
+Page ranges refer to the previously-named file.
+A file not followed by a page range means all the pages of the file.
+
+PAGE RANGES are like Python slices.
+ {page_range_help}
+EXAMPLES
+ pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
+ Concatenate all of head.pdf, all but page seven of content.pdf,
+ and the last page of tail.pdf, producing output.pdf.
+
+ pdfcat chapter*.pdf >book.pdf
+ You can specify the output file by redirection.
+
+ pdfcat chapter?.pdf chapter10.pdf >book.pdf
+ In case you don't want chapter 10 before chapter 2.
+"""
+# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+# All rights reserved. This software is available under a BSD license;
+# see https://github.com/mstamy2/PyPDF2/LICENSE
+
+from __future__ import print_function
+import argparse
+from PdfFileTransformer.PyPDF2.pagerange import PAGE_RANGE_HELP
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description=__doc__.format(page_range_help=PAGE_RANGE_HELP),
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument("-o", "--output",
+ metavar="output_file")
+ parser.add_argument("-v", "--verbose", action="store_true",
+ help="show page ranges as they are being read")
+ parser.add_argument("first_filename", nargs=1,
+ metavar="filename [page range...]")
+ # argparse chokes on page ranges like "-2:" unless caught like this:
+ parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER,
+ metavar="filenames and/or page ranges")
+ args = parser.parse_args()
+ args.fn_pgrgs.insert(0, args.first_filename[0])
+ return args
+
+
+from sys import stderr, stdout, exit
+import os
+import traceback
+from collections import defaultdict
+
+from PdfFileTransformer.PyPDF2 import PdfFileMerger, parse_filename_page_ranges
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs)
+ if args.output:
+ output = open(args.output, "wb")
+ else:
+ stdout.flush()
+ output = os.fdopen(stdout.fileno(), "wb")
+
+ merger = PdfFileMerger()
+ in_fs = dict()
+ try:
+ for (filename, page_range) in filename_page_ranges:
+ if args.verbose:
+ print(filename, page_range, file=stderr)
+ if filename not in in_fs:
+ in_fs[filename] = open(filename, "rb")
+ merger.append(in_fs[filename], pages=page_range)
+ except:
+ print(traceback.format_exc(), file=stderr)
+ print("Error while reading " + filename, file=stderr)
+ exit(1)
+ merger.write(output)
+ # In 3.0, input files must stay open until output is written.
+ # Not closing the in_fs because this script exits now.