aboutsummaryrefslogtreecommitdiffstats
path: root/PdfFileTransformer/PyPDF2/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'PdfFileTransformer/PyPDF2/utils.py')
-rw-r--r--PdfFileTransformer/PyPDF2/utils.py309
1 files changed, 309 insertions, 0 deletions
diff --git a/PdfFileTransformer/PyPDF2/utils.py b/PdfFileTransformer/PyPDF2/utils.py
new file mode 100644
index 0000000..2120c70
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/utils.py
@@ -0,0 +1,309 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+import sys
+
+try:
+ import __builtin__ as builtins
+except ImportError: # Py3
+ import builtins
+
+
+xrange_fn = getattr(builtins, "xrange", range)
+_basestring = getattr(builtins, "basestring", str)
+
+bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
+string_type = getattr(builtins, "unicode", str)
+int_types = (int, long) if sys.version_info[0] < 3 else (int,)
+
+
+# Make basic type tests more consistent
+def isString(s):
+ """Test if arg is a string. Compatible with Python 2 and 3."""
+ return isinstance(s, _basestring)
+
+
+def isInt(n):
+ """Test if arg is an int. Compatible with Python 2 and 3."""
+ return isinstance(n, int_types)
+
+
+def isBytes(b):
+ """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
+ return isinstance(b, bytes_type)
+
+
+#custom implementation of warnings.formatwarning
+def formatWarning(message, category, filename, lineno, line=None):
+ file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
+ return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
+
+
+def readUntilWhitespace(stream, maxchars=None):
+ """
+ Reads non-whitespace characters and returns them.
+ Stops upon encountering whitespace or when maxchars is reached.
+ """
+ txt = b_("")
+ while True:
+ tok = stream.read(1)
+ if tok.isspace() or not tok:
+ break
+ txt += tok
+ if len(txt) == maxchars:
+ break
+ return txt
+
+
+def readNonWhitespace(stream):
+ """
+ Finds and reads the next non-whitespace character (ignores whitespace).
+ """
+ tok = WHITESPACES[0]
+ while tok in WHITESPACES:
+ tok = stream.read(1)
+ return tok
+
+
+def skipOverWhitespace(stream):
+ """
+ Similar to readNonWhitespace, but returns a Boolean if more than
+ one whitespace character was read.
+ """
+ tok = WHITESPACES[0]
+ cnt = 0;
+ while tok in WHITESPACES:
+ tok = stream.read(1)
+ cnt+=1
+ return (cnt > 1)
+
+
+def skipOverComment(stream):
+ tok = stream.read(1)
+ stream.seek(-1, 1)
+ if tok == b_('%'):
+ while tok not in (b_('\n'), b_('\r')):
+ tok = stream.read(1)
+
+
+def readUntilRegex(stream, regex, ignore_eof=False):
+ """
+ Reads until the regular expression pattern matched (ignore the match)
+ Raise PdfStreamError on premature end-of-file.
+ :param bool ignore_eof: If true, ignore end-of-line and return immediately
+ """
+ name = b_('')
+ while True:
+ tok = stream.read(16)
+ if not tok:
+ # stream has truncated prematurely
+ if ignore_eof == True:
+ return name
+ else:
+ raise PdfStreamError("Stream has ended unexpectedly")
+ m = regex.search(tok)
+ if m is not None:
+ name += tok[:m.start()]
+ stream.seek(m.start()-len(tok), 1)
+ break
+ name += tok
+ return name
+
+
+class ConvertFunctionsToVirtualList(object):
+ def __init__(self, lengthFunction, getFunction):
+ self.lengthFunction = lengthFunction
+ self.getFunction = getFunction
+
+ def __len__(self):
+ return self.lengthFunction()
+
+ def __getitem__(self, index):
+ if isinstance(index, slice):
+ indices = xrange_fn(*index.indices(len(self)))
+ cls = type(self)
+ return cls(indices.__len__, lambda idx: self[indices[idx]])
+ if not isInt(index):
+ raise TypeError("sequence indices must be integers")
+ len_self = len(self)
+ if index < 0:
+ # support negative indexes
+ index = len_self + index
+ if index < 0 or index >= len_self:
+ raise IndexError("sequence index out of range")
+ return self.getFunction(index)
+
+
+def RC4_encrypt(key, plaintext):
+ S = [i for i in range(256)]
+ j = 0
+ for i in range(256):
+ j = (j + S[i] + ord_(key[i % len(key)])) % 256
+ S[i], S[j] = S[j], S[i]
+ i, j = 0, 0
+ retval = []
+ for x in range(len(plaintext)):
+ i = (i + 1) % 256
+ j = (j + S[i]) % 256
+ S[i], S[j] = S[j], S[i]
+ t = S[(S[i] + S[j]) % 256]
+ retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
+ return b_("").join(retval)
+
+
+def matrixMultiply(a, b):
+ return [[sum([float(i)*float(j)
+ for i, j in zip(row, col)]
+ ) for col in zip(*b)]
+ for row in a]
+
+
+def markLocation(stream):
+ """Creates text file showing current location in context."""
+ # Mainly for debugging
+ RADIUS = 5000
+ stream.seek(-RADIUS, 1)
+ outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
+ outputDoc.write(stream.read(RADIUS))
+ outputDoc.write('HERE')
+ outputDoc.write(stream.read(RADIUS))
+ outputDoc.close()
+ stream.seek(-RADIUS, 1)
+
+
+class PyPdfError(Exception):
+ pass
+
+
+class PdfReadError(PyPdfError):
+ pass
+
+
+class PageSizeNotDefinedError(PyPdfError):
+ pass
+
+
+class PdfReadWarning(UserWarning):
+ pass
+
+
+class PdfStreamError(PdfReadError):
+ pass
+
+
+if sys.version_info[0] < 3:
+ def b_(s):
+ return s
+else:
+ B_CACHE = {}
+
+ def b_(s):
+ bc = B_CACHE
+ if s in bc:
+ return bc[s]
+ if type(s) == bytes:
+ return s
+ else:
+ r = s.encode('latin-1')
+ if len(s) < 2:
+ bc[s] = r
+ return r
+
+
+def u_(s):
+ if sys.version_info[0] < 3:
+ return unicode(s, 'unicode_escape')
+ else:
+ return s
+
+
+def str_(b):
+ if sys.version_info[0] < 3:
+ return b
+ else:
+ if type(b) == bytes:
+ return b.decode('latin-1')
+ else:
+ return b
+
+
+def ord_(b):
+ if sys.version_info[0] < 3 or type(b) == str:
+ return ord(b)
+ else:
+ return b
+
+
+def chr_(c):
+ if sys.version_info[0] < 3:
+ return c
+ else:
+ return chr(c)
+
+
+def barray(b):
+ if sys.version_info[0] < 3:
+ return b
+ else:
+ return bytearray(b)
+
+
+def hexencode(b):
+ if sys.version_info[0] < 3:
+ return b.encode('hex')
+ else:
+ import codecs
+ coder = codecs.getencoder('hex_codec')
+ return coder(b)[0]
+
+
+def hexStr(num):
+ return hex(num).replace('L', '')
+
+
+WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
+
+
+def paethPredictor(left, up, up_left):
+ p = left + up - up_left
+ dist_left = abs(p - left)
+ dist_up = abs(p - up)
+ dist_up_left = abs(p - up_left)
+
+ if dist_left <= dist_up and dist_left <= dist_up_left:
+ return left
+ elif dist_up <= dist_up_left:
+ return up
+ else:
+ return up_left