1 files changed, 152 insertions, 0 deletions
diff --git a/PdfFileTransformer/PyPDF2/pagerange.py b/PdfFileTransformer/PyPDF2/pagerange.py
new file mode 100644
index 0000000..ce96ec5
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/pagerange.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+"""
+Representation and utils for ranges of PDF file pages.
+
+Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+All rights reserved. This software is available under a BSD license;
+see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
+"""
+
+import re
+from .utils import isString
+
+_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
+PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
+# groups:         12     34     5 6     7 8
+
+
+class ParseError(Exception):
+    pass
+
+
+PAGE_RANGE_HELP = """Remember, page indices start with zero.
+        Page range expression examples:
+            :     all pages.                   -1    last page.
+            22    just the 23rd page.          :-1   all but the last page.
+            0:3   the first three pages.       -2    second-to-last page.
+            :3    the first three pages.       -2:   last two pages.
+            5:    from the sixth page onward.  -3:-1 third & second to last.
+        The third, "stride" or "step" number is also recognized.
+            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
+            1:10:2    1 3 5 7 9                2::-1     2 1 0.
+            ::-1      all pages in reverse order.
+"""
+
+
+class PageRange(object):
+    """
+    A slice-like representation of a range of page indices,
+        i.e. page numbers, only starting at zero.
+    The syntax is like what you would put between brackets [ ].
+    The slice is one of the few Python types that can't be subclassed,
+    but this class converts to and from slices, and allows similar use.
+      o  PageRange(str) parses a string representing a page range.
+      o  PageRange(slice) directly "imports" a slice.
+      o  to_slice() gives the equivalent slice.
+      o  str() and repr() allow printing.
+      o  indices(n) is like slice.indices(n).
+    """
+
+    def __init__(self, arg):
+        """
+        Initialize with either a slice -- giving the equivalent page range,
+        or a PageRange object -- making a copy,
+        or a string like
+            "int", "[int]:[int]" or "[int]:[int]:[int]",
+            where the brackets indicate optional ints.
+        {page_range_help}
+        Note the difference between this notation and arguments to slice():
+            slice(3) means the first three pages;
+            PageRange("3") means the range of only the fourth page.
+            However PageRange(slice(3)) means the first three pages.
+        """
+        if isinstance(arg, slice):
+            self._slice = arg
+            return
+
+        if isinstance(arg, PageRange):
+            self._slice = arg.to_slice()
+            return
+
+        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
+        if not m:
+            raise ParseError(arg)
+        elif m.group(2):
+            # Special case: just an int means a range of one page.
+            start = int(m.group(2))
+            stop = start + 1 if start != -1 else None
+            self._slice = slice(start, stop)
+        else:
+            self._slice = slice(*[int(g) if g else None
+                                  for g in m.group(4, 6, 8)])
+
+    # Just formatting this when there is __doc__ for __init__
+    if __init__.__doc__:
+        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
+
+    @staticmethod
+    def valid(input):
+        """ True if input is a valid initializer for a PageRange. """
+        return isinstance(input, slice)  or \
+               isinstance(input, PageRange) or \
+               (isString(input)
+                and bool(re.match(PAGE_RANGE_RE, input)))
+
+    def to_slice(self):
+        """ Return the slice equivalent of this page range. """
+        return self._slice
+
+    def __str__(self):
+        """ A string like "1:2:3". """
+        s = self._slice
+        if s.step == None:
+            if s.start != None  and  s.stop == s.start + 1:
+                return str(s.start)
+
+            indices = s.start, s.stop
+        else:
+            indices = s.start, s.stop, s.step
+        return ':'.join("" if i == None else str(i) for i in indices)
+
+    def __repr__(self):
+        """ A string like "PageRange('1:2:3')". """
+        return "PageRange(" + repr(str(self)) + ")"
+
+    def indices(self, n):
+        """
+        n is the length of the list of pages to choose from.
+        Returns arguments for range().  See help(slice.indices).
+        """
+        return self._slice.indices(n)
+
+
+PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
+
+
+def parse_filename_page_ranges(args):
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
+    First arg must be a filename; other ags are filenames, page-range
+    expressions, slice objects, or PageRange objects.
+    A filename not followed by a page range indicates all pages of the file.
+    """
+    pairs = []
+    pdf_filename = None
+    did_page_range = False
+    for arg in args + [None]:
+        if PageRange.valid(arg):
+            if not pdf_filename:
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")
+
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
+        else:
+            # New filename or end of list--do all of the previous file?
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
+
+            pdf_filename = arg
+            did_page_range = False
+    return pairs