From f57654b84b4cf0ffa1287034fc9f66ba200bb259 Mon Sep 17 00:00:00 2001
From: ben
Date: Tue, 18 Sep 2018 10:52:38 +0200
Subject: First public commit

---
 PdfFileTransformer/PyPDF2/__init__.py         |    5 +
 PdfFileTransformer/PyPDF2/_version.py         |    1 +
 PdfFileTransformer/PyPDF2/filters.py          |  424 ++++
 PdfFileTransformer/PyPDF2/generic.py          | 1228 ++++++++++
 PdfFileTransformer/PyPDF2/merger.py           |  553 +++++
 PdfFileTransformer/PyPDF2/pagerange.py        |  152 ++
 PdfFileTransformer/PyPDF2/pdf.py              | 3074 +++++++++++++++++++++++++
 PdfFileTransformer/PyPDF2/utils.py            |  309 +++
 PdfFileTransformer/PyPDF2/xmp.py              |  358 +++
 PdfFileTransformer/__init__.py                |    4 +
 PdfFileTransformer/pdf.py                     |  352 +++
 PolyglotFile/__init__.py                      |    7 +
 PolyglotFile/polyglotpdfzip.py                |   55 +
 PolyglotFile/polyglotszippdf.py               |  110 +
 PolyglotFile/polyglotzippdf.py                |   37 +
 README.md                                     |    3 +
 ZipFileTransformer/__init__.py                |    4 +
 ZipFileTransformer/zip.py                     |  227 ++
 ZipFileTransformer/zipfile.py                 | 2133 +++++++++++++++++
 caradoc                                       |  Bin 0 -> 3752888 bytes
 pdfcat                                        |   80 +
 tests/samples/descriptions.txt                |    7 +
 tests/samples/test1.pdf                       |  Bin 0 -> 247680 bytes
 tests/samples/test1.zip                       |  Bin 0 -> 1425748 bytes
 tests/samples/test1_normalized.pdf            |  Bin 0 -> 166153 bytes
 tests/test_pdf_add_data.py                    |   21 +
 tests/test_pdf_normalisation.py               |   26 +
 tests/test_pdf_rebuild.py                     |   19 +
 tests/test_polyglot_pdfzip.py                 |   23 +
 tests/test_rebuild_zip.py                     |   20 +
 tests/test_zip.py                             |   21 +
 truepolyglot                                  |   74 +
 website/css/styles.css                        |   63 +
 website/css/styles2.css                       |   61 +
 website/favicon.ico                           |  Bin 0 -> 318 bytes
 website/gen_pocs.sh                           |   21 +
 website/index.html                            |  249 ++
 website/robots.txt                            |    2 +
 website/samples/pdfzip/poc1/archive.zip       |  Bin 0 -> 75451 bytes
 website/samples/pdfzip/poc1/doc.pdf           |  Bin 0 -> 39031 bytes
 website/samples/pdfzip/poc1/polyglot.pdf      |  Bin 0 -> 114280 bytes
 website/samples/pdfzip/poc2/file-FILE5_32.zip |  Bin 0 -> 625801 bytes
 website/samples/pdfzip/poc2/orwell_1984.pdf   |  Bin 0 -> 684772 bytes
 website/samples/pdfzip/poc2/polyglot.pdf      |  Bin 0 -> 1294892 bytes
 website/samples/pdfzip/poc3/fasmw17304.zip    |  Bin 0 -> 1071623 bytes
 website/samples/pdfzip/poc3/polyglot.pdf      |  Bin 0 -> 1878303 bytes
 website/samples/pdfzip/poc3/x86asm.pdf        |  Bin 0 -> 806073 bytes
 website/samples/pdfzip/poc6/eicar.zip         |  Bin 0 -> 229 bytes
 website/samples/pdfzip/poc6/hexinator.pdf     |  Bin 0 -> 3011364 bytes
 website/samples/pdfzip/poc6/polyglot.pdf      |  Bin 0 -> 2960704 bytes
 website/samples/szippdf/poc5/electronics.pdf  |  Bin 0 -> 796587 bytes
 website/samples/szippdf/poc5/hello_world.jar  |  Bin 0 -> 864 bytes
 website/samples/szippdf/poc5/polyglot.pdf     |  Bin 0 -> 794936 bytes
 website/samples/zippdf/poc4/archive.zip       |  Bin 0 -> 75451 bytes
 website/samples/zippdf/poc4/doc.pdf           |  Bin 0 -> 39031 bytes
 website/samples/zippdf/poc4/polyglot.pdf      |  Bin 0 -> 114187 bytes
 website/start_server.sh                       |    3 +
 website/update.sh                             |    2 +
 58 files changed, 9728 insertions(+)
 create mode 100644 PdfFileTransformer/PyPDF2/__init__.py
 create mode 100644 PdfFileTransformer/PyPDF2/_version.py
 create mode 100644 PdfFileTransformer/PyPDF2/filters.py
 create mode 100644 PdfFileTransformer/PyPDF2/generic.py
 create mode 100644 PdfFileTransformer/PyPDF2/merger.py
 create mode 100644 PdfFileTransformer/PyPDF2/pagerange.py
 create mode 100644 PdfFileTransformer/PyPDF2/pdf.py
 create mode 100644 PdfFileTransformer/PyPDF2/utils.py
 create mode 100644 PdfFileTransformer/PyPDF2/xmp.py
 create mode 100644 PdfFileTransformer/__init__.py
 create mode 100644 PdfFileTransformer/pdf.py
 create mode 100644 PolyglotFile/__init__.py
 create mode 100644 PolyglotFile/polyglotpdfzip.py
 create mode 100644 PolyglotFile/polyglotszippdf.py
 create mode 100644 PolyglotFile/polyglotzippdf.py
 create mode 100644 README.md
 create mode 100644 ZipFileTransformer/__init__.py
 create mode 100644 ZipFileTransformer/zip.py
 create mode 100644 ZipFileTransformer/zipfile.py
 create mode 100755 caradoc
 create mode 100755 pdfcat
 create mode 100644 tests/samples/descriptions.txt
 create mode 100644 tests/samples/test1.pdf
 create mode 100644 tests/samples/test1.zip
 create mode 100644 tests/samples/test1_normalized.pdf
 create mode 100755 tests/test_pdf_add_data.py
 create mode 100755 tests/test_pdf_normalisation.py
 create mode 100755 tests/test_pdf_rebuild.py
 create mode 100755 tests/test_polyglot_pdfzip.py
 create mode 100755 tests/test_rebuild_zip.py
 create mode 100755 tests/test_zip.py
 create mode 100755 truepolyglot
 create mode 100644 website/css/styles.css
 create mode 100644 website/css/styles2.css
 create mode 100644 website/favicon.ico
 create mode 100755 website/gen_pocs.sh
 create mode 100644 website/index.html
 create mode 100644 website/robots.txt
 create mode 100644 website/samples/pdfzip/poc1/archive.zip
 create mode 100644 website/samples/pdfzip/poc1/doc.pdf
 create mode 100644 website/samples/pdfzip/poc1/polyglot.pdf
 create mode 100644 website/samples/pdfzip/poc2/file-FILE5_32.zip
 create mode 100644 website/samples/pdfzip/poc2/orwell_1984.pdf
 create mode 100644 website/samples/pdfzip/poc2/polyglot.pdf
 create mode 100644 website/samples/pdfzip/poc3/fasmw17304.zip
 create mode 100644 website/samples/pdfzip/poc3/polyglot.pdf
 create mode 100644 website/samples/pdfzip/poc3/x86asm.pdf
 create mode 100644 website/samples/pdfzip/poc6/eicar.zip
 create mode 100644 website/samples/pdfzip/poc6/hexinator.pdf
 create mode 100644 website/samples/pdfzip/poc6/polyglot.pdf
 create mode 100644 website/samples/szippdf/poc5/electronics.pdf
 create mode 100644 website/samples/szippdf/poc5/hello_world.jar
 create mode 100644 website/samples/szippdf/poc5/polyglot.pdf
 create mode 100644 website/samples/zippdf/poc4/archive.zip
 create mode 100644 website/samples/zippdf/poc4/doc.pdf
 create mode 100644 website/samples/zippdf/poc4/polyglot.pdf
 create mode 100755 website/start_server.sh
 create mode 100755 website/update.sh

diff --git a/PdfFileTransformer/PyPDF2/__init__.py b/PdfFileTransformer/PyPDF2/__init__.py
new file mode 100644
index 0000000..f458c0e
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/__init__.py
@@ -0,0 +1,5 @@
+from .pdf import PdfFileReader, PdfFileWriter
+from .merger import PdfFileMerger
+from .pagerange import PageRange, parse_filename_page_ranges
+from ._version import __version__
+__all__ = ["pdf", "PdfFileMerger"]
diff --git a/PdfFileTransformer/PyPDF2/_version.py b/PdfFileTransformer/PyPDF2/_version.py
new file mode 100644
index 0000000..5fc7041
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/_version.py
@@ -0,0 +1 @@
+__version__ = '1.26.0'
diff --git a/PdfFileTransformer/PyPDF2/filters.py b/PdfFileTransformer/PyPDF2/filters.py
new file mode 100644
index 0000000..57446f4
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/filters.py
@@ -0,0 +1,424 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters for PDF.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import math
+
+from .utils import PdfReadError, ord_, chr_, paethPredictor
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+    import struct
+
+try:
+    import zlib
+
+    def decompress(data):
+        return zlib.decompress(data)
+
+    def compress(data):
+        return zlib.compress(data)
+
+except ImportError:
+    # Unable to import zlib.  Attempt to use the System.IO.Compression
+    # library from the .NET framework. (IronPython only)
+    import System
+    from System import IO, Collections, Array
+
+    def _string_to_bytearr(buf):
+        retval = Array.CreateInstance(System.Byte, len(buf))
+        for i in range(len(buf)):
+            retval[i] = ord(buf[i])
+        return retval
+
+    def _bytearr_to_string(bytes):
+        retval = ""
+        for i in range(bytes.Length):
+            retval += chr(bytes[i])
+        return retval
+
+    def _read_bytes(stream):
+        ms = IO.MemoryStream()
+        buf = Array.CreateInstance(System.Byte, 2048)
+        while True:
+            bytes = stream.Read(buf, 0, buf.Length)
+            if bytes == 0:
+                break
+            else:
+                ms.Write(buf, 0, bytes)
+        retval = ms.ToArray()
+        ms.Close()
+        return retval
+
+    def decompress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        ms.Write(bytes, 0, bytes.Length)
+        ms.Position = 0  # fseek 0
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
+        bytes = _read_bytes(gz)
+        retval = _bytearr_to_string(bytes)
+        gz.Close()
+        return retval
+
+    def compress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
+        gz.Write(bytes, 0, bytes.Length)
+        gz.Close()
+        ms.Position = 0 # fseek 0
+        bytes = ms.ToArray()
+        retval = _bytearr_to_string(bytes)
+        ms.Close()
+        return retval
+
+
+class FlateDecode(object):
+    def decode(data, decodeParms):
+        data = decompress(data)
+        predictor = 1
+        if decodeParms:
+            try:
+                predictor = decodeParms.get("/Predictor", 1)
+            except AttributeError:
+                pass    # usually an array with a null object was read
+
+        # predictor 1 == no predictor
+        if predictor != 1:
+            columns = decodeParms["/Columns"]
+            # PNG prediction:
+            if predictor >= 10 and predictor <= 15:
+                output = StringIO()
+                # PNG prediction can vary from row to row
+                rowlength = columns + 1
+                assert len(data) % rowlength == 0
+                prev_rowdata = (0,) * rowlength
+                for row in range(len(data) // rowlength):
+                    rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
+                    filterByte = rowdata[0]
+                    if filterByte == 0:
+                        pass
+                    elif filterByte == 1:
+                        for i in range(2, rowlength):
+                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
+                    elif filterByte == 2:
+                        for i in range(1, rowlength):
+                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                    elif filterByte == 3:
+                        for i in range(1, rowlength):
+                            left = rowdata[i-1] if i > 1 else 0
+                            floor = math.floor(left + prev_rowdata[i])/2
+                            rowdata[i] = (rowdata[i] + int(floor)) % 256
+                    elif filterByte == 4:
+                        for i in range(1, rowlength):
+                            left = rowdata[i - 1] if i > 1 else 0
+                            up = prev_rowdata[i]
+                            up_left = prev_rowdata[i - 1] if i > 1 else 0
+                            paeth = paethPredictor(left, up, up_left)
+                            rowdata[i] = (rowdata[i] + paeth) % 256
+                    else:
+                        # unsupported PNG filter
+                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
+                    prev_rowdata = rowdata
+                    output.write(''.join([chr(x) for x in rowdata[1:]]))
+                data = output.getvalue()
+            else:
+                # unsupported predictor
+                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
+        return data
+    decode = staticmethod(decode)
+
+    def encode(data):
+        return compress(data)
+    encode = staticmethod(encode)
+
+
+class ASCIIHexDecode(object):
+    def decode(data, decodeParms=None):
+        retval = ""
+        char = ""
+        x = 0
+        while True:
+            c = data[x]
+            if c == ">":
+                break
+            elif c.isspace():
+                x += 1
+                continue
+            char += c
+            if len(char) == 2:
+                retval += chr(int(char, base=16))
+                char = ""
+            x += 1
+        assert char == ""
+        return retval
+    decode = staticmethod(decode)
+
+
+class LZWDecode(object):
+    """Taken from:
+    http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
+    """
+    class decoder(object):
+        def __init__(self, data):
+            self.STOP=257
+            self.CLEARDICT=256
+            self.data=data
+            self.bytepos=0
+            self.bitpos=0
+            self.dict=[""]*4096
+            for i in range(256):
+                self.dict[i]=chr(i)
+            self.resetDict()
+
+        def resetDict(self):
+            self.dictlen=258
+            self.bitspercode=9
+
+        def nextCode(self):
+            fillbits=self.bitspercode
+            value=0
+            while fillbits>0 :
+                if self.bytepos >= len(self.data):
+                    return -1
+                nextbits=ord_(self.data[self.bytepos])
+                bitsfromhere=8-self.bitpos
+                if bitsfromhere>fillbits:
+                    bitsfromhere=fillbits
+                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
+                           (0xff >> (8-bitsfromhere))) <<
+                          (fillbits-bitsfromhere))
+                fillbits -= bitsfromhere
+                self.bitpos += bitsfromhere
+                if self.bitpos >=8:
+                    self.bitpos=0
+                    self.bytepos = self.bytepos+1
+            return value
+
+        def decode(self):
+            """ algorithm derived from:
+            http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
+            and the PDFReference
+            """
+            cW = self.CLEARDICT;
+            baos=""
+            while True:
+                pW = cW;
+                cW = self.nextCode();
+                if cW == -1:
+                    raise PdfReadError("Missed the stop code in LZWDecode!")
+                if cW == self.STOP:
+                    break;
+                elif cW == self.CLEARDICT:
+                    self.resetDict();
+                elif pW == self.CLEARDICT:
+                    baos+=self.dict[cW]
+                else:
+                    if cW < self.dictlen:
+                        baos += self.dict[cW]
+                        p=self.dict[pW]+self.dict[cW][0]
+                        self.dict[self.dictlen]=p
+                        self.dictlen+=1
+                    else:
+                        p=self.dict[pW]+self.dict[pW][0]
+                        baos+=p
+                        self.dict[self.dictlen] = p;
+                        self.dictlen+=1
+                    if (self.dictlen >= (1 << self.bitspercode) - 1 and
+                        self.bitspercode < 12):
+                        self.bitspercode+=1
+            return baos
+
+    @staticmethod
+    def decode(data,decodeParams=None):
+        return LZWDecode.decoder(data).decode()
+
+
+class ASCII85Decode(object):
+    def decode(data, decodeParms=None):
+        if version_info < ( 3, 0 ):
+            retval = ""
+            group = []
+            x = 0
+            hitEod = False
+            # remove all whitespace from data
+            data = [y for y in data if not (y in ' \n\r\t')]
+            while not hitEod:
+                c = data[x]
+                if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                    x += 2
+                    continue
+                #elif c.isspace():
+                #    x += 1
+                #    continue
+                elif c == 'z':
+                    assert len(group) == 0
+                    retval += '\x00\x00\x00\x00'
+                    x += 1
+                    continue
+                elif c == "~" and data[x+1] == ">":
+                    if len(group) != 0:
+                        # cannot have a final group of just 1 char
+                        assert len(group) > 1
+                        cnt = len(group) - 1
+                        group += [ 85, 85, 85 ]
+                        hitEod = cnt
+                    else:
+                        break
+                else:
+                    c = ord(c) - 33
+                    assert c >= 0 and c < 85
+                    group += [ c ]
+                if len(group) >= 5:
+                    b = group[0] * (85**4) + \
+                        group[1] * (85**3) + \
+                        group[2] * (85**2) + \
+                        group[3] * 85 + \
+                        group[4]
+                    assert b < (2**32 - 1)
+                    c4 = chr((b >> 0) % 256)
+                    c3 = chr((b >> 8) % 256)
+                    c2 = chr((b >> 16) % 256)
+                    c1 = chr(b >> 24)
+                    retval += (c1 + c2 + c3 + c4)
+                    if hitEod:
+                        retval = retval[:-4+hitEod]
+                    group = []
+                x += 1
+            return retval
+        else:
+            if isinstance(data, str):
+                data = data.encode('ascii')
+            n = b = 0
+            out = bytearray()
+            for c in data:
+                if ord('!') <= c and c <= ord('u'):
+                    n += 1
+                    b = b*85+(c-33)
+                    if n == 5:
+                        out += struct.pack(b'>L',b)
+                        n = b = 0
+                elif c == ord('z'):
+                    assert n == 0
+                    out += b'\0\0\0\0'
+                elif c == ord('~'):
+                    if n:
+                        for _ in range(5-n):
+                            b = b*85+84
+                        out += struct.pack(b'>L',b)[:n-1]
+                    break
+            return bytes(out)
+    decode = staticmethod(decode)
+
+class DCTDecode(object):
+    def decode(data, decodeParms=None):
+        return data
+    decode = staticmethod(decode)
+    
+class JPXDecode(object):
+    def decode(data, decodeParms=None):
+        return data
+    decode = staticmethod(decode)
+    
+class CCITTFaxDecode(object):   
+    def decode(data, decodeParms=None, height=0):
+        if decodeParms:
+            if decodeParms.get("/K", 1) == -1:
+                CCITTgroup = 4
+            else:
+                CCITTgroup = 3
+        
+        width = decodeParms["/Columns"]
+        imgSize = len(data)
+        tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
+        tiffHeader = struct.pack(tiff_header_struct,
+                           b'II',  # Byte order indication: Little endian
+                           42,  # Version number (always 42)
+                           8,  # Offset to first IFD
+                           8,  # Number of tags in IFD
+                           256, 4, 1, width,  # ImageWidth, LONG, 1, width
+                           257, 4, 1, height,  # ImageLength, LONG, 1, length
+                           258, 3, 1, 1,  # BitsPerSample, SHORT, 1, 1
+                           259, 3, 1, CCITTgroup,  # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
+                           262, 3, 1, 0,  # Thresholding, SHORT, 1, 0 = WhiteIsZero
+                           273, 4, 1, struct.calcsize(tiff_header_struct),  # StripOffsets, LONG, 1, length of header
+                           278, 4, 1, height,  # RowsPerStrip, LONG, 1, length
+                           279, 4, 1, imgSize,  # StripByteCounts, LONG, 1, size of image
+                           0  # last IFD
+                           )
+        
+        return tiffHeader + data
+    
+    decode = staticmethod(decode)
+    
+def decodeStreamData(stream):
+    from .generic import NameObject
+    filters = stream.get("/Filter", ())
+
+    if len(filters) and not isinstance(filters[0], NameObject):
+        # we have a single filter instance
+        filters = (filters,)
+    data = stream._data
+    # If there is not data to decode we should not try to decode the data.
+    if data:
+        for filterType in filters:
+            if filterType == "/FlateDecode" or filterType == "/Fl":
+                data = FlateDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
+                data = ASCIIHexDecode.decode(data)
+            elif filterType == "/LZWDecode" or filterType == "/LZW":
+                data = LZWDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCII85Decode" or filterType == "/A85":
+                data = ASCII85Decode.decode(data)
+            elif filterType == "/DCTDecode":
+                data = DCTDecode.decode(data)
+            elif filterType == "/JPXDecode":
+                data = JPXDecode.decode(data)
+            elif filterType == "/CCITTFaxDecode":
+                height = stream.get("/Height", ())
+                data = CCITTFaxDecode.decode(data, stream.get("/DecodeParms"), height)
+            elif filterType == "/Crypt":
+                decodeParams = stream.get("/DecodeParams", {})
+                if "/Name" not in decodeParams and "/Type" not in decodeParams:
+                    pass
+                else:
+                    raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
+            else:
+                # unsupported filter
+                raise NotImplementedError("unsupported filter %s" % filterType)
+    return data
diff --git a/PdfFileTransformer/PyPDF2/generic.py b/PdfFileTransformer/PyPDF2/generic.py
new file mode 100644
index 0000000..959957d
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/generic.py
@@ -0,0 +1,1228 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of generic PDF objects (dictionary, number, string, and so on)
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import re
+from .utils import readNonWhitespace, RC4_encrypt, skipOverComment
+from .utils import b_, u_, chr_, ord_
+from .utils import PdfStreamError
+import warnings
+from . import filters
+from . import utils
+import decimal
+import codecs
+import sys
+#import debugging
+
+ObjectPrefix = b_('/<[tf(n%')
+NumberSigns = b_('+-')
+IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
+
+
+def readObject(stream, pdf):
+    tok = stream.read(1)
+    stream.seek(-1, 1) # reset to start
+    idx = ObjectPrefix.find(tok)
+    if idx == 0:
+        # name object
+        return NameObject.readFromStream(stream, pdf)
+    elif idx == 1:
+        # hexadecimal string OR dictionary
+        peek = stream.read(2)
+        stream.seek(-2, 1) # reset to start
+        if peek == b_('<<'):
+            return DictionaryObject.readFromStream(stream, pdf)
+        else:
+            return readHexStringFromStream(stream)
+    elif idx == 2:
+        # array object
+        return ArrayObject.readFromStream(stream, pdf)
+    elif idx == 3 or idx == 4:
+        # boolean object
+        return BooleanObject.readFromStream(stream)
+    elif idx == 5:
+        # string object
+        return readStringFromStream(stream)
+    elif idx == 6:
+        # null object
+        return NullObject.readFromStream(stream)
+    elif idx == 7:
+        # comment
+        while tok not in (b_('\r'), b_('\n')):
+            tok = stream.read(1)
+            # Prevents an infinite loop by raising an error if the stream is at
+            # the EOF
+            if len(tok) <= 0:
+                raise PdfStreamError("File ended unexpectedly.")
+        tok = readNonWhitespace(stream)
+        stream.seek(-1, 1)
+        return readObject(stream, pdf)
+    else:
+        # number object OR indirect reference
+        peek = stream.read(20)
+        stream.seek(-len(peek), 1) # reset to start
+        if IndirectPattern.match(peek) != None:
+            return IndirectObject.readFromStream(stream, pdf)
+        else:
+            return NumberObject.readFromStream(stream)
+
+
+class PdfObject(object):
+    def getObject(self):
+        """Resolves indirect references."""
+        return self
+
+
+class NullObject(PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("null"))
+
+    def readFromStream(stream):
+        nulltxt = stream.read(4)
+        if nulltxt != b_("null"):
+            raise utils.PdfReadError("Could not read Null object")
+        return NullObject()
+    readFromStream = staticmethod(readFromStream)
+
+
+class BooleanObject(PdfObject):
+    def __init__(self, value):
+        self.value = value
+
+    def writeToStream(self, stream, encryption_key):
+        if self.value:
+            stream.write(b_("true"))
+        else:
+            stream.write(b_("false"))
+
+    def readFromStream(stream):
+        word = stream.read(4)
+        if word == b_("true"):
+            return BooleanObject(True)
+        elif word == b_("fals"):
+            stream.read(1)
+            return BooleanObject(False)
+        else:
+            raise utils.PdfReadError('Could not read Boolean object')
+    readFromStream = staticmethod(readFromStream)
+
+
+class ArrayObject(list, PdfObject):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("["))
+        for data in self:
+            stream.write(b_(" "))
+            data.writeToStream(stream, encryption_key)
+        stream.write(b_(" ]"))
+
+    def readFromStream(stream, pdf):
+        arr = ArrayObject()
+        tmp = stream.read(1)
+        if tmp != b_("["):
+            raise utils.PdfReadError("Could not read array")
+        while True:
+            # skip leading whitespace
+            tok = stream.read(1)
+            while tok.isspace():
+                tok = stream.read(1)
+            stream.seek(-1, 1)
+            # check for array ending
+            peekahead = stream.read(1)
+            if peekahead == b_("]"):
+                break
+            stream.seek(-1, 1)
+            # read and append obj
+            arr.append(readObject(stream, pdf))
+        return arr
+    readFromStream = staticmethod(readFromStream)
+
+
+class IndirectObject(PdfObject):
+    def __init__(self, idnum, generation, pdf):
+        self.idnum = idnum
+        self.generation = generation
+        self.pdf = pdf
+
+    def getObject(self):
+        return self.pdf.getObject(self).getObject()
+
+    def __repr__(self):
+        return "IndirectObject(%r, %r)" % (self.idnum, self.generation)
+
+    def __eq__(self, other):
+        return (
+            other != None and
+            isinstance(other, IndirectObject) and
+            self.idnum == other.idnum and
+            self.generation == other.generation and
+            self.pdf is other.pdf
+            )
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("%s %s R" % (self.idnum, self.generation)))
+
+    def readFromStream(stream, pdf):
+        idnum = b_("")
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+            if tok.isspace():
+                break
+            idnum += tok
+        generation = b_("")
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+            if tok.isspace():
+                if not generation:
+                    continue
+                break
+            generation += tok
+        r = readNonWhitespace(stream)
+        if r != b_("R"):
+            raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
+        return IndirectObject(int(idnum), int(generation), pdf)
+    readFromStream = staticmethod(readFromStream)
+
+
+class FloatObject(decimal.Decimal, PdfObject):
+    def __new__(cls, value="0", context=None):
+        try:
+            return decimal.Decimal.__new__(cls, utils.str_(value), context)
+        except:
+            return decimal.Decimal.__new__(cls, str(value))
+
+    def __repr__(self):
+        if self == self.to_integral():
+            return str(self.quantize(decimal.Decimal(1)))
+        else:
+            # Standard formatting adds useless extraneous zeros.
+            o = "%.5f" % self
+            # Remove the zeros.
+            while o and o[-1] == '0':
+                o = o[:-1]
+            return o
+
+    def as_numeric(self):
+        return float(b_(repr(self)))
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(repr(self)))
+
+
+class NumberObject(int, PdfObject):
+    NumberPattern = re.compile(b_('[^+-.0-9]'))
+    ByteDot = b_(".")
+
+    def __new__(cls, value):
+        val = int(value)
+        try:
+            return int.__new__(cls, val)
+        except OverflowError:
+            return int.__new__(cls, 0)
+
+    def as_numeric(self):
+        return int(b_(repr(self)))
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(repr(self)))
+
+    def readFromStream(stream):
+        num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
+        if num.find(NumberObject.ByteDot) != -1:
+            return FloatObject(num)
+        else:
+            return NumberObject(num)
+    readFromStream = staticmethod(readFromStream)
+
+
+##
+# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
+# TextStringObject to represent the string.
+def createStringObject(string):
+    if isinstance(string, utils.string_type):
+        return TextStringObject(string)
+    elif isinstance(string, utils.bytes_type):
+        try:
+            if string.startswith(codecs.BOM_UTF16_BE):
+                retval = TextStringObject(string.decode("utf-16"))
+                retval.autodetect_utf16 = True
+                return retval
+            else:
+                # This is probably a big performance hit here, but we need to
+                # convert string objects into the text/unicode-aware version if
+                # possible... and the only way to check if that's possible is
+                # to try.  Some strings are strings, some are just byte arrays.
+                retval = TextStringObject(decode_pdfdocencoding(string))
+                retval.autodetect_pdfdocencoding = True
+                return retval
+        except UnicodeDecodeError:
+            return ByteStringObject(string)
+    else:
+        raise TypeError("createStringObject should have str or unicode arg")
+
+
+def readHexStringFromStream(stream):
+    stream.read(1)
+    txt = ""
+    x = b_("")
+    while True:
+        tok = readNonWhitespace(stream)
+        if not tok:
+            # stream has truncated prematurely
+            raise PdfStreamError("Stream has ended unexpectedly")
+        if tok == b_(">"):
+            break
+        x += tok
+        if len(x) == 2:
+            txt += chr(int(x, base=16))
+            x = b_("")
+    if len(x) == 1:
+        x += b_("0")
+    if len(x) == 2:
+        txt += chr(int(x, base=16))
+    return createStringObject(b_(txt))
+
+
+def readStringFromStream(stream):
+    tok = stream.read(1)
+    parens = 1
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if not tok:
+            # stream has truncated prematurely
+            raise PdfStreamError("Stream has ended unexpectedly")
+        if tok == b_("("):
+            parens += 1
+        elif tok == b_(")"):
+            parens -= 1
+            if parens == 0:
+                break
+        elif tok == b_("\\"):
+            tok = stream.read(1)
+            ESCAPE_DICT = {b_("n") : b_("\n"),
+                           b_("r") : b_("\r"),
+                           b_("t") : b_("\t"),
+                           b_("b") : b_("\b"),
+                           b_("f") : b_("\f"),
+                           b_("c") : b_("\c"),
+                           b_("(") : b_("("),
+                           b_(")") : b_(")"),
+                           b_("/") : b_("/"),
+                           b_("\\") : b_("\\"),
+                           b_(" ") : b_(" "),
+                           b_("/") : b_("/"),
+                           b_("%") : b_("%"),
+                           b_("<") : b_("<"),
+                           b_(">") : b_(">"),
+                           b_("[") : b_("["), 
+                           b_("]") : b_("]"),
+                           b_("#") : b_("#"),
+                           b_("_") : b_("_"),
+                           b_("&") : b_("&"),
+                           b_('$') : b_('$'),
+                           }
+            try:
+                tok = ESCAPE_DICT[tok]
+            except KeyError:
+                if tok.isdigit():
+                    # "The number ddd may consist of one, two, or three
+                    # octal digits; high-order overflow shall be ignored.
+                    # Three octal digits shall be used, with leading zeros
+                    # as needed, if the next character of the string is also
+                    # a digit." (PDF reference 7.3.4.2, p 16)
+                    for i in range(2):
+                        ntok = stream.read(1)
+                        if ntok.isdigit():
+                            tok += ntok
+                        else:
+                            break
+                    tok = b_(chr(int(tok, base=8)))
+                elif tok in b_("\n\r"):
+                    # This case is  hit when a backslash followed by a line
+                    # break occurs.  If it's a multi-char EOL, consume the
+                    # second character:
+                    tok = stream.read(1)
+                    if not tok in b_("\n\r"):
+                        stream.seek(-1, 1)
+                    # Then don't add anything to the actual string, since this
+                    # line break was escaped:
+                    tok = b_('')
+                else:
+                    raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
+        txt += tok
+    return createStringObject(txt)
+
+
+##
+# Represents a string object where the text encoding could not be determined.
+# This occurs quite often, as the PDF spec doesn't provide an alternate way to
+# represent strings -- for example, the encryption data stored in files (like
+# /O) is clearly not text, but is still stored in a "String" object.
+class ByteStringObject(utils.bytes_type, PdfObject):
+
+    ##
+    # For compatibility with TextStringObject.original_bytes.  This method
+    # returns self.
+    original_bytes = property(lambda self: self)
+
+    def writeToStream(self, stream, encryption_key):
+        bytearr = self
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+        stream.write(b_("<"))
+        stream.write(utils.hexencode(bytearr))
+        stream.write(b_(">"))
+
+
+##
+# Represents a string object that has been decoded into a real unicode string.
+# If read from a PDF document, this string appeared to match the
+# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
+# occur.
+class TextStringObject(utils.string_type, PdfObject):
+    autodetect_pdfdocencoding = False
+    autodetect_utf16 = False
+
+    ##
+    # It is occasionally possible that a text string object gets created where
+    # a byte string object was expected due to the autodetection mechanism --
+    # if that occurs, this "original_bytes" property can be used to
+    # back-calculate what the original encoded bytes were.
+    original_bytes = property(lambda self: self.get_original_bytes())
+
+    def get_original_bytes(self):
+        # We're a text string object, but the library is trying to get our raw
+        # bytes.  This can happen if we auto-detected this string as text, but
+        # we were wrong.  It's pretty common.  Return the original bytes that
+        # would have been used to create this object, based upon the autodetect
+        # method.
+        if self.autodetect_utf16:
+            return codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        elif self.autodetect_pdfdocencoding:
+            return encode_pdfdocencoding(self)
+        else:
+            raise Exception("no information about original bytes")
+
+    def writeToStream(self, stream, encryption_key):
+        # Try to write the string out as a PDFDocEncoding encoded string.  It's
+        # nicer to look at in the PDF file.  Sadly, we take a performance hit
+        # here for trying...
+        try:
+            bytearr = encode_pdfdocencoding(self)
+        except UnicodeEncodeError:
+            bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
+        if encryption_key:
+            bytearr = RC4_encrypt(encryption_key, bytearr)
+            obj = ByteStringObject(bytearr)
+            obj.writeToStream(stream, None)
+        else:
+            stream.write(b_("("))
+            for c in bytearr:
+                if not chr_(c).isalnum() and c != b_(' '):
+                    stream.write(b_("\\%03o" % ord_(c)))
+                else:
+                    stream.write(b_(chr_(c)))
+            stream.write(b_(")"))
+
+
+class NameObject(str, PdfObject):
+    delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]"))
+    surfix = b_("/")
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_(self))
+
+    def readFromStream(stream, pdf):
+        debug = False
+        if debug: print((stream.tell()))
+        name = stream.read(1)
+        if name != NameObject.surfix:
+            raise utils.PdfReadError("name read error")
+        name += utils.readUntilRegex(stream, NameObject.delimiterPattern, 
+            ignore_eof=True)
+        if debug: print(name)
+        try:
+            return NameObject(name.decode('utf-8'))
+        except (UnicodeEncodeError, UnicodeDecodeError) as e:
+            # Name objects should represent irregular characters
+            # with a '#' followed by the symbol's hex number
+            if not pdf.strict:
+                warnings.warn("Illegal character in Name Object", utils.PdfReadWarning)
+                return NameObject(name)
+            else:
+                raise utils.PdfReadError("Illegal character in Name Object")
+
+    readFromStream = staticmethod(readFromStream)
+
+
+class DictionaryObject(dict, PdfObject):
+    def raw_get(self, key):
+        return dict.__getitem__(self, key)
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, PdfObject):
+            raise ValueError("key must be PdfObject")
+        if not isinstance(value, PdfObject):
+            raise ValueError("value must be PdfObject")
+        return dict.__setitem__(self, key, value)
+
+    def setdefault(self, key, value=None):
+        if not isinstance(key, PdfObject):
+            raise ValueError("key must be PdfObject")
+        if not isinstance(value, PdfObject):
+            raise ValueError("value must be PdfObject")
+        return dict.setdefault(self, key, value)
+
+    def __getitem__(self, key):
+        return dict.__getitem__(self, key).getObject()
+
+    ##
+    # Retrieves XMP (Extensible Metadata Platform) data relevant to the
+    # this object, if available.
+    # <p>
+    # Stability: Added in v1.12, will exist for all future v1.x releases.
+    # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance
+    # that can be used to access XMP metadata from the document.  Can also
+    # return None if no metadata was found on the document root.
+    def getXmpMetadata(self):
+        metadata = self.get("/Metadata", None)
+        if metadata == None:
+            return None
+        metadata = metadata.getObject()
+        from . import xmp
+        if not isinstance(metadata, xmp.XmpInformation):
+            metadata = xmp.XmpInformation(metadata)
+            self[NameObject("/Metadata")] = metadata
+        return metadata
+
+    ##
+    # Read-only property that accesses the {@link
+    # #DictionaryObject.getXmpData getXmpData} function.
+    # <p>
+    # Stability: Added in v1.12, will exist for all future v1.x releases.
+    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        for key, value in list(self.items()):
+            key.writeToStream(stream, encryption_key)
+            stream.write(b_(" "))
+            value.writeToStream(stream, encryption_key)
+            stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+    def readFromStream(stream, pdf):
+        debug = False
+        tmp = stream.read(2)
+        if tmp != b_("<<"):
+            raise utils.PdfReadError("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
+        data = {}
+        while True:
+            tok = readNonWhitespace(stream)
+            if tok == b_('\x00'):
+                continue
+            elif tok == b_('%'):
+                stream.seek(-1, 1)
+                skipOverComment(stream)
+                continue
+            if not tok:
+                # stream has truncated prematurely
+                raise PdfStreamError("Stream has ended unexpectedly")
+
+            if debug: print(("Tok:", tok))
+            if tok == b_(">"):
+                stream.read(1)
+                break
+            stream.seek(-1, 1)
+            key = readObject(stream, pdf)
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            value = readObject(stream, pdf)
+            if not data.get(key):
+                data[key] = value
+            elif pdf.strict:
+                # multiple definitions of key not permitted
+                raise utils.PdfReadError("Multiple definitions in dictionary at byte %s for key %s" \
+                                           % (utils.hexStr(stream.tell()), key))
+            else:
+                warnings.warn("Multiple definitions in dictionary at byte %s for key %s" \
+                                           % (utils.hexStr(stream.tell()), key), utils.PdfReadWarning)
+
+        pos = stream.tell()
+        s = readNonWhitespace(stream)
+        if s == b_('s') and stream.read(5) == b_('tream'):
+            eol = stream.read(1)
+            # odd PDF file output has spaces after 'stream' keyword but before EOL.
+            # patch provided by Danial Sandler
+            while eol == b_(' '):
+                eol = stream.read(1)
+            assert eol in (b_("\n"), b_("\r"))
+            if eol == b_("\r"):
+                # read \n after
+                if stream.read(1)  != b_('\n'):
+                    stream.seek(-1, 1)
+            # this is a stream object, not a dictionary
+            assert "/Length" in data
+            length = data["/Length"]
+            if debug: print(data)
+            if isinstance(length, IndirectObject):
+                t = stream.tell()
+                length = pdf.getObject(length)
+                stream.seek(t, 0)
+            data["__streamdata__"] = stream.read(length)
+            if debug: print("here")
+            #if debug: print(binascii.hexlify(data["__streamdata__"]))
+            e = readNonWhitespace(stream)
+            ndstream = stream.read(8)
+            if (e + ndstream) != b_("endstream"):
+                # (sigh) - the odd PDF file has a length that is too long, so
+                # we need to read backwards to find the "endstream" ending.
+                # ReportLab (unknown version) generates files with this bug,
+                # and Python users into PDF files tend to be our audience.
+                # we need to do this to correct the streamdata and chop off
+                # an extra character.
+                pos = stream.tell()
+                stream.seek(-10, 1)
+                end = stream.read(9)
+                if end == b_("endstream"):
+                    # we found it by looking back one character further.
+                    data["__streamdata__"] = data["__streamdata__"][:-1]
+                else:
+                    if debug: print(("E", e, ndstream, debugging.toHex(end)))
+                    stream.seek(pos, 0)
+                    raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
+        else:
+            stream.seek(pos, 0)
+        if "__streamdata__" in data:
+            return StreamObject.initializeFromDictionary(data)
+        else:
+            retval = DictionaryObject()
+            retval.update(data)
+            return retval
+    readFromStream = staticmethod(readFromStream)
+
+
+class TreeObject(DictionaryObject):
+    def __init__(self):
+        DictionaryObject.__init__(self)
+
+    def hasChildren(self):
+        return '/First' in self
+
+    def __iter__(self):
+        return self.children()
+
+    def children(self):
+        if not self.hasChildren():
+            raise StopIteration
+
+        child = self['/First']
+        while True:
+            yield child
+            if child == self['/Last']:
+                raise StopIteration
+            child = child['/Next']
+
+    def addChild(self, child, pdf):
+        childObj = child.getObject()
+        child = pdf.getReference(childObj)
+        assert isinstance(child, IndirectObject)
+
+        if '/First' not in self:
+            self[NameObject('/First')] = child
+            self[NameObject('/Count')] = NumberObject(0)
+            prev = None
+        else:
+            prev = self['/Last']
+
+        self[NameObject('/Last')] = child
+        self[NameObject('/Count')] = NumberObject(self[NameObject('/Count')] + 1)
+
+        if prev:
+            prevRef = pdf.getReference(prev)
+            assert isinstance(prevRef, IndirectObject)
+            childObj[NameObject('/Prev')] = prevRef
+            prev[NameObject('/Next')] = child
+
+        parentRef = pdf.getReference(self)
+        assert isinstance(parentRef, IndirectObject)
+        childObj[NameObject('/Parent')] = parentRef
+
+    def removeChild(self, child):
+        childObj = child.getObject()
+
+        if NameObject('/Parent') not in childObj:
+            raise ValueError("Removed child does not appear to be a tree item")
+        elif childObj[NameObject('/Parent')] != self:
+            raise ValueError("Removed child is not a member of this tree")
+
+        found = False
+        prevRef = None
+        prev = None
+        curRef = self[NameObject('/First')]
+        cur = curRef.getObject()
+        lastRef = self[NameObject('/Last')]
+        last = lastRef.getObject()
+        while cur != None:
+            if cur == childObj:
+                if prev == None:
+                    if NameObject('/Next') in cur:
+                        # Removing first tree node
+                        nextRef = cur[NameObject('/Next')]
+                        next = nextRef.getObject()
+                        del next[NameObject('/Prev')]
+                        self[NameObject('/First')] = nextRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+
+                    else:
+                        # Removing only tree node
+                        assert self[NameObject('/Count')] == 1
+                        del self[NameObject('/Count')]
+                        del self[NameObject('/First')]
+                        if NameObject('/Last') in self:
+                            del self[NameObject('/Last')]
+                else:
+                    if NameObject('/Next') in cur:
+                        # Removing middle tree node
+                        nextRef = cur[NameObject('/Next')]
+                        next = nextRef.getObject()
+                        next[NameObject('/Prev')] = prevRef
+                        prev[NameObject('/Next')] = nextRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+                    else:
+                        # Removing last tree node
+                        assert cur == last
+                        del prev[NameObject('/Next')]
+                        self[NameObject('/Last')] = prevRef
+                        self[NameObject('/Count')] = self[NameObject('/Count')] - 1
+                found = True
+                break
+
+            prevRef = curRef
+            prev = cur
+            if NameObject('/Next') in cur:
+                curRef = cur[NameObject('/Next')]
+                cur = curRef.getObject()
+            else:
+                curRef = None
+                cur = None
+
+        if not found:
+            raise ValueError("Removal couldn't find item in tree")
+
+        del childObj[NameObject('/Parent')]
+        if NameObject('/Next') in childObj:
+            del childObj[NameObject('/Next')]
+        if NameObject('/Prev') in childObj:
+            del childObj[NameObject('/Prev')]
+
+    def emptyTree(self):
+        for child in self:
+            childObj = child.getObject()
+            del childObj[NameObject('/Parent')]
+            if NameObject('/Next') in childObj:
+                del childObj[NameObject('/Next')]
+            if NameObject('/Prev') in childObj:
+                del childObj[NameObject('/Prev')]
+
+        if NameObject('/Count') in self:
+            del self[NameObject('/Count')]
+        if NameObject('/First') in self:
+            del self[NameObject('/First')]
+        if NameObject('/Last') in self:
+            del self[NameObject('/Last')]
+
+
+class StreamObject(DictionaryObject):
+    def __init__(self):
+        self._data = None
+        self.decodedSelf = None
+
+    def writeToStream(self, stream, encryption_key):
+        self[NameObject("/Length")] = NumberObject(len(self._data))
+        DictionaryObject.writeToStream(self, stream, encryption_key)
+        del self["/Length"]
+        stream.write(b_("\nstream\n"))
+        data = self._data
+        if encryption_key:
+            data = RC4_encrypt(encryption_key, data)
+        stream.write(data)
+        stream.write(b_("\nendstream"))
+
+    def initializeFromDictionary(data):
+        if "/Filter" in data:
+            retval = EncodedStreamObject()
+        else:
+            retval = DecodedStreamObject()
+        retval._data = data["__streamdata__"]
+        del data["__streamdata__"]
+        del data["/Length"]
+        retval.update(data)
+        return retval
+    initializeFromDictionary = staticmethod(initializeFromDictionary)
+
+    def flateEncode(self):
+        if "/Filter" in self:
+            f = self["/Filter"]
+            if isinstance(f, ArrayObject):
+                f.insert(0, NameObject("/FlateDecode"))
+            else:
+                newf = ArrayObject()
+                newf.append(NameObject("/FlateDecode"))
+                newf.append(f)
+                f = newf
+        else:
+            f = NameObject("/FlateDecode")
+        retval = EncodedStreamObject()
+        retval[NameObject("/Filter")] = f
+        retval._data = filters.FlateDecode.encode(self._data)
+        return retval
+
+
+class DecodedStreamObject(StreamObject):
+    def getData(self):
+        return self._data
+
+    def setData(self, data):
+        self._data = data
+
+
+class EncodedStreamObject(StreamObject):
+    def __init__(self):
+        self.decodedSelf = None
+
+    def getData(self):
+        if self.decodedSelf:
+            # cached version of decoded object
+            return self.decodedSelf.getData()
+        else:
+            # create decoded object
+            decoded = DecodedStreamObject()
+
+            decoded._data = filters.decodeStreamData(self)
+            for key, value in list(self.items()):
+                if not key in ("/Length", "/Filter", "/DecodeParms"):
+                    decoded[key] = value
+            self.decodedSelf = decoded
+            return decoded._data
+
+    def setData(self, data):
+        raise utils.PdfReadError("Creating EncodedStreamObject is not currently supported")
+
+
+class RectangleObject(ArrayObject):
+    """
+    This class is used to represent *page boxes* in PyPDF2. These boxes include:
+
+        * :attr:`artBox <PyPDF2.pdf.PageObject.artBox>`
+        * :attr:`bleedBox <PyPDF2.pdf.PageObject.bleedBox>`
+        * :attr:`cropBox <PyPDF2.pdf.PageObject.cropBox>`
+        * :attr:`mediaBox <PyPDF2.pdf.PageObject.mediaBox>`
+        * :attr:`trimBox <PyPDF2.pdf.PageObject.trimBox>`
+    """
+    def __init__(self, arr):
+        # must have four points
+        assert len(arr) == 4
+        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
+        ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr])
+
+    def ensureIsNumber(self, value):
+        if not isinstance(value, (NumberObject, FloatObject)):
+            value = FloatObject(value)
+        return value
+
+    def __repr__(self):
+        return "RectangleObject(%s)" % repr(list(self))
+
+    def getLowerLeft_x(self):
+        return self[0]
+
+    def getLowerLeft_y(self):
+        return self[1]
+
+    def getUpperRight_x(self):
+        return self[2]
+
+    def getUpperRight_y(self):
+        return self[3]
+
+    def getUpperLeft_x(self):
+        return self.getLowerLeft_x()
+
+    def getUpperLeft_y(self):
+        return self.getUpperRight_y()
+
+    def getLowerRight_x(self):
+        return self.getUpperRight_x()
+
+    def getLowerRight_y(self):
+        return self.getLowerLeft_y()
+
+    def getLowerLeft(self):
+        return self.getLowerLeft_x(), self.getLowerLeft_y()
+
+    def getLowerRight(self):
+        return self.getLowerRight_x(), self.getLowerRight_y()
+
+    def getUpperLeft(self):
+        return self.getUpperLeft_x(), self.getUpperLeft_y()
+
+    def getUpperRight(self):
+        return self.getUpperRight_x(), self.getUpperRight_y()
+
+    def setLowerLeft(self, value):
+        self[0], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setLowerRight(self, value):
+        self[2], self[1] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperLeft(self, value):
+        self[0], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    def setUpperRight(self, value):
+        self[2], self[3] = [self.ensureIsNumber(x) for x in value]
+
+    def getWidth(self):
+        return self.getUpperRight_x() - self.getLowerLeft_x()
+
+    def getHeight(self):
+        return self.getUpperRight_y() - self.getLowerLeft_y()
+
+    lowerLeft = property(getLowerLeft, setLowerLeft, None, None)
+    """
+    Property to read and modify the lower left coordinate of this box
+    in (x,y) form.
+    """
+    lowerRight = property(getLowerRight, setLowerRight, None, None)
+    """
+    Property to read and modify the lower right coordinate of this box
+    in (x,y) form.
+    """
+    upperLeft = property(getUpperLeft, setUpperLeft, None, None)
+    """
+    Property to read and modify the upper left coordinate of this box
+    in (x,y) form.
+    """
+    upperRight = property(getUpperRight, setUpperRight, None, None)
+    """
+    Property to read and modify the upper right coordinate of this box
+    in (x,y) form.
+    """
+
+
+class Field(TreeObject):
+    """
+    A class representing a field dictionary. This class is accessed through
+    :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
+    """
+    def __init__(self, data):
+        DictionaryObject.__init__(self)
+        attributes = ("/FT", "/Parent", "/Kids", "/T", "/TU", "/TM", "/Ff",
+                      "/V", "/DV", "/AA")
+        for attr in attributes:
+            try:
+                self[NameObject(attr)] = data[attr]
+            except KeyError:
+                pass
+
+    fieldType = property(lambda self: self.get("/FT"))
+    """
+    Read-only property accessing the type of this field.
+    """
+
+    parent = property(lambda self: self.get("/Parent"))
+    """
+    Read-only property accessing the parent of this field.
+    """
+
+    kids = property(lambda self: self.get("/Kids"))
+    """
+    Read-only property accessing the kids of this field.
+    """
+
+    name = property(lambda self: self.get("/T"))
+    """
+    Read-only property accessing the name of this field.
+    """
+
+    altName = property(lambda self: self.get("/TU"))
+    """
+    Read-only property accessing the alternate name of this field.
+    """
+
+    mappingName = property(lambda self: self.get("/TM"))
+    """
+    Read-only property accessing the mapping name of this field. This
+    name is used by PyPDF2 as a key in the dictionary returned by
+    :meth:`getFields()<PyPDF2.PdfFileReader.getFields>`
+    """
+
+    flags = property(lambda self: self.get("/Ff"))
+    """
+    Read-only property accessing the field flags, specifying various
+    characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
+    """
+
+    value = property(lambda self: self.get("/V"))
+    """
+    Read-only property accessing the value of this field. Format
+    varies based on field type.
+    """
+
+    defaultValue = property(lambda self: self.get("/DV"))
+    """
+    Read-only property accessing the default value of this field.
+    """
+
+    additionalActions = property(lambda self: self.get("/AA"))
+    """
+    Read-only property accessing the additional actions dictionary.
+    This dictionary defines the field's behavior in response to trigger events.
+    See Section 8.5.2 of the PDF 1.7 reference.
+    """
+
+
+class Destination(TreeObject):
+    """
+    A class representing a destination within a PDF file.
+    See section 8.2.1 of the PDF 1.6 reference.
+
+    :param str title: Title of this destination.
+    :param int page: Page number of this destination.
+    :param str typ: How the destination is displayed.
+    :param args: Additional arguments may be necessary depending on the type.
+    :raises PdfReadError: If destination type is invalid.
+
+    Valid ``typ`` arguments (see PDF spec for details):
+             /Fit       No additional arguments
+             /XYZ       [left] [top] [zoomFactor]
+             /FitH      [top]
+             /FitV      [left]
+             /FitR      [left] [bottom] [right] [top]
+             /FitB      No additional arguments
+             /FitBH     [top]
+             /FitBV     [left]
+    """
+    def __init__(self, title, page, typ, *args):
+        DictionaryObject.__init__(self)
+        self[NameObject("/Title")] = title
+        self[NameObject("/Page")] = page
+        self[NameObject("/Type")] = typ
+
+        # from table 8.2 of the PDF 1.7 reference.
+        if typ == "/XYZ":
+            (self[NameObject("/Left")], self[NameObject("/Top")],
+                self[NameObject("/Zoom")]) = args
+        elif typ == "/FitR":
+            (self[NameObject("/Left")], self[NameObject("/Bottom")],
+                self[NameObject("/Right")], self[NameObject("/Top")]) = args
+        elif typ in ["/FitH", "/FitBH"]:
+            self[NameObject("/Top")], = args
+        elif typ in ["/FitV", "/FitBV"]:
+            self[NameObject("/Left")], = args
+        elif typ in ["/Fit", "/FitB"]:
+            pass
+        else:
+            raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
+
+    def getDestArray(self):
+        return ArrayObject([self.raw_get('/Page'), self['/Type']] + [self[x] for x in ['/Left', '/Bottom', '/Right', '/Top', '/Zoom'] if x in self])
+
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        key = NameObject('/D')
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = self.getDestArray()
+        value.writeToStream(stream, encryption_key)
+
+        key = NameObject("/S")
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = NameObject("/GoTo")
+        value.writeToStream(stream, encryption_key)
+
+        stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+    title = property(lambda self: self.get("/Title"))
+    """
+    Read-only property accessing the destination title.
+
+    :rtype: str
+    """
+
+    page = property(lambda self: self.get("/Page"))
+    """
+    Read-only property accessing the destination page number.
+
+    :rtype: int
+    """
+
+    typ = property(lambda self: self.get("/Type"))
+    """
+    Read-only property accessing the destination type.
+
+    :rtype: str
+    """
+
+    zoom = property(lambda self: self.get("/Zoom", None))
+    """
+    Read-only property accessing the zoom factor.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    left = property(lambda self: self.get("/Left", None))
+    """
+    Read-only property accessing the left horizontal coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    right = property(lambda self: self.get("/Right", None))
+    """
+    Read-only property accessing the right horizontal coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    top = property(lambda self: self.get("/Top", None))
+    """
+    Read-only property accessing the top vertical coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+    bottom = property(lambda self: self.get("/Bottom", None))
+    """
+    Read-only property accessing the bottom vertical coordinate.
+
+    :rtype: int, or ``None`` if not available.
+    """
+
+
+class Bookmark(Destination):
+    def writeToStream(self, stream, encryption_key):
+        stream.write(b_("<<\n"))
+        for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if x in self]:
+            key.writeToStream(stream, encryption_key)
+            stream.write(b_(" "))
+            value = self.raw_get(key)
+            value.writeToStream(stream, encryption_key)
+            stream.write(b_("\n"))
+        key = NameObject('/Dest')
+        key.writeToStream(stream, encryption_key)
+        stream.write(b_(" "))
+        value = self.getDestArray()
+        value.writeToStream(stream, encryption_key)
+        stream.write(b_("\n"))
+        stream.write(b_(">>"))
+
+
+def encode_pdfdocencoding(unicode_string):
+    retval = b_('')
+    for c in unicode_string:
+        try:
+            retval += b_(chr(_pdfDocEncoding_rev[c]))
+        except KeyError:
+            raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
+                    "does not exist in translation table")
+    return retval
+
+
+def decode_pdfdocencoding(byte_array):
+    retval = u_('')
+    for b in byte_array:
+        c = _pdfDocEncoding[ord_(b)]
+        if c == u_('\u0000'):
+            raise UnicodeDecodeError("pdfdocencoding", utils.barray(b), -1, -1,
+                    "does not exist in translation table")
+        retval += c
+    return retval
+
+_pdfDocEncoding = (
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'), u_('\u0000'),
+  u_('\u02d8'), u_('\u02c7'), u_('\u02c6'), u_('\u02d9'), u_('\u02dd'), u_('\u02db'), u_('\u02da'), u_('\u02dc'),
+  u_('\u0020'), u_('\u0021'), u_('\u0022'), u_('\u0023'), u_('\u0024'), u_('\u0025'), u_('\u0026'), u_('\u0027'),
+  u_('\u0028'), u_('\u0029'), u_('\u002a'), u_('\u002b'), u_('\u002c'), u_('\u002d'), u_('\u002e'), u_('\u002f'),
+  u_('\u0030'), u_('\u0031'), u_('\u0032'), u_('\u0033'), u_('\u0034'), u_('\u0035'), u_('\u0036'), u_('\u0037'),
+  u_('\u0038'), u_('\u0039'), u_('\u003a'), u_('\u003b'), u_('\u003c'), u_('\u003d'), u_('\u003e'), u_('\u003f'),
+  u_('\u0040'), u_('\u0041'), u_('\u0042'), u_('\u0043'), u_('\u0044'), u_('\u0045'), u_('\u0046'), u_('\u0047'),
+  u_('\u0048'), u_('\u0049'), u_('\u004a'), u_('\u004b'), u_('\u004c'), u_('\u004d'), u_('\u004e'), u_('\u004f'),
+  u_('\u0050'), u_('\u0051'), u_('\u0052'), u_('\u0053'), u_('\u0054'), u_('\u0055'), u_('\u0056'), u_('\u0057'),
+  u_('\u0058'), u_('\u0059'), u_('\u005a'), u_('\u005b'), u_('\u005c'), u_('\u005d'), u_('\u005e'), u_('\u005f'),
+  u_('\u0060'), u_('\u0061'), u_('\u0062'), u_('\u0063'), u_('\u0064'), u_('\u0065'), u_('\u0066'), u_('\u0067'),
+  u_('\u0068'), u_('\u0069'), u_('\u006a'), u_('\u006b'), u_('\u006c'), u_('\u006d'), u_('\u006e'), u_('\u006f'),
+  u_('\u0070'), u_('\u0071'), u_('\u0072'), u_('\u0073'), u_('\u0074'), u_('\u0075'), u_('\u0076'), u_('\u0077'),
+  u_('\u0078'), u_('\u0079'), u_('\u007a'), u_('\u007b'), u_('\u007c'), u_('\u007d'), u_('\u007e'), u_('\u0000'),
+  u_('\u2022'), u_('\u2020'), u_('\u2021'), u_('\u2026'), u_('\u2014'), u_('\u2013'), u_('\u0192'), u_('\u2044'),
+  u_('\u2039'), u_('\u203a'), u_('\u2212'), u_('\u2030'), u_('\u201e'), u_('\u201c'), u_('\u201d'), u_('\u2018'),
+  u_('\u2019'), u_('\u201a'), u_('\u2122'), u_('\ufb01'), u_('\ufb02'), u_('\u0141'), u_('\u0152'), u_('\u0160'),
+  u_('\u0178'), u_('\u017d'), u_('\u0131'), u_('\u0142'), u_('\u0153'), u_('\u0161'), u_('\u017e'), u_('\u0000'),
+  u_('\u20ac'), u_('\u00a1'), u_('\u00a2'), u_('\u00a3'), u_('\u00a4'), u_('\u00a5'), u_('\u00a6'), u_('\u00a7'),
+  u_('\u00a8'), u_('\u00a9'), u_('\u00aa'), u_('\u00ab'), u_('\u00ac'), u_('\u0000'), u_('\u00ae'), u_('\u00af'),
+  u_('\u00b0'), u_('\u00b1'), u_('\u00b2'), u_('\u00b3'), u_('\u00b4'), u_('\u00b5'), u_('\u00b6'), u_('\u00b7'),
+  u_('\u00b8'), u_('\u00b9'), u_('\u00ba'), u_('\u00bb'), u_('\u00bc'), u_('\u00bd'), u_('\u00be'), u_('\u00bf'),
+  u_('\u00c0'), u_('\u00c1'), u_('\u00c2'), u_('\u00c3'), u_('\u00c4'), u_('\u00c5'), u_('\u00c6'), u_('\u00c7'),
+  u_('\u00c8'), u_('\u00c9'), u_('\u00ca'), u_('\u00cb'), u_('\u00cc'), u_('\u00cd'), u_('\u00ce'), u_('\u00cf'),
+  u_('\u00d0'), u_('\u00d1'), u_('\u00d2'), u_('\u00d3'), u_('\u00d4'), u_('\u00d5'), u_('\u00d6'), u_('\u00d7'),
+  u_('\u00d8'), u_('\u00d9'), u_('\u00da'), u_('\u00db'), u_('\u00dc'), u_('\u00dd'), u_('\u00de'), u_('\u00df'),
+  u_('\u00e0'), u_('\u00e1'), u_('\u00e2'), u_('\u00e3'), u_('\u00e4'), u_('\u00e5'), u_('\u00e6'), u_('\u00e7'),
+  u_('\u00e8'), u_('\u00e9'), u_('\u00ea'), u_('\u00eb'), u_('\u00ec'), u_('\u00ed'), u_('\u00ee'), u_('\u00ef'),
+  u_('\u00f0'), u_('\u00f1'), u_('\u00f2'), u_('\u00f3'), u_('\u00f4'), u_('\u00f5'), u_('\u00f6'), u_('\u00f7'),
+  u_('\u00f8'), u_('\u00f9'), u_('\u00fa'), u_('\u00fb'), u_('\u00fc'), u_('\u00fd'), u_('\u00fe'), u_('\u00ff')
+)
+
+assert len(_pdfDocEncoding) == 256
+
+_pdfDocEncoding_rev = {}
+for i in range(256):
+    char = _pdfDocEncoding[i]
+    if char == u_("\u0000"):
+        continue
+    assert char not in _pdfDocEncoding_rev
+    _pdfDocEncoding_rev[char] = i
diff --git a/PdfFileTransformer/PyPDF2/merger.py b/PdfFileTransformer/PyPDF2/merger.py
new file mode 100644
index 0000000..c3373e4
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/merger.py
@@ -0,0 +1,553 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from .generic import *
+from .utils import isString, str_
+from .pdf import PdfFileReader, PdfFileWriter
+from .pagerange import PageRange
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+    StreamIO = StringIO
+else:
+    from io import BytesIO
+    from io import FileIO as file
+    StreamIO = BytesIO
+
+
+class _MergedPage(object):
+    """
+    _MergedPage is used internally by PdfFileMerger to collect necessary
+    information on each page that is being merged.
+    """
+    def __init__(self, pagedata, src, id):
+        self.src = src
+        self.pagedata = pagedata
+        self.out_pagedata = None
+        self.id = id
+
+
+class PdfFileMerger(object):
+    """
+    Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
+    into a single PDF. It can concatenate, slice, insert, or any combination
+    of the above.
+
+    See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
+    and :meth:`write()<write>` for usage information.
+
+    :param bool strict: Determines whether user should be warned of all
+            problems and also causes some correctable problems to be fatal.
+            Defaults to ``True``.
+    """
+
+    def __init__(self, strict=True):
+        self.inputs = []
+        self.pages = []
+        self.output = PdfFileWriter()
+        self.bookmarks = []
+        self.named_dests = []
+        self.id_count = 0
+        self.strict = strict
+
+    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Merges the pages from the given file into the output file at the
+        specified page number.
+
+        :param int position: The *page number* to insert this file. File will
+            be inserted after the given number.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        # This parameter is passed to self.inputs.append and means
+        # that the stream used was created in this method.
+        my_file = False
+
+        # If the fileobj parameter is a string, assume it is a path
+        # and create a file object at that location. If it is a file,
+        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
+        # it is a PdfFileReader, copy that reader's stream into a
+        # BytesIO (or StreamIO) stream.
+        # If fileobj is none of the above types, it is not modified
+        decryption_key = None
+        if isString(fileobj):
+            fileobj = file(fileobj, 'rb')
+            my_file = True
+        elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"):
+            fileobj.seek(0)
+            filecontent = fileobj.read()
+            fileobj = StreamIO(filecontent)
+            my_file = True
+        elif isinstance(fileobj, PdfFileReader):
+            orig_tell = fileobj.stream.tell()
+            fileobj.stream.seek(0)
+            filecontent = StreamIO(fileobj.stream.read())
+            fileobj.stream.seek(orig_tell) # reset the stream to its original location
+            fileobj = filecontent
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
+            my_file = True
+
+        # Create a new PdfFileReader instance using the stream
+        # (either file or BytesIO or StringIO) created above
+        pdfr = PdfFileReader(fileobj, strict=self.strict)
+        if decryption_key is not None:
+            pdfr._decryption_key = decryption_key
+
+        # Find the range of pages to merge.
+        if pages == None:
+            pages = (0, pdfr.getNumPages())
+        elif isinstance(pages, PageRange):
+            pages = pages.indices(pdfr.getNumPages())
+        elif not isinstance(pages, tuple):
+            raise TypeError('"pages" must be a tuple of (start, stop[, step])')
+
+        srcpages = []
+        if bookmark:
+            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
+
+        outline = []
+        if import_bookmarks:
+            outline = pdfr.getOutlines()
+            outline = self._trim_outline(pdfr, outline, pages)
+
+        if bookmark:
+            self.bookmarks += [bookmark, outline]
+        else:
+            self.bookmarks += outline
+
+        dests = pdfr.namedDestinations
+        dests = self._trim_dests(pdfr, dests, pages)
+        self.named_dests += dests
+
+        # Gather all the pages that are going to be merged
+        for i in range(*pages):
+            pg = pdfr.getPage(i)
+
+            id = self.id_count
+            self.id_count += 1
+
+            mp = _MergedPage(pg, pdfr, id)
+
+            srcpages.append(mp)
+
+        self._associate_dests_to_pages(srcpages)
+        self._associate_bookmarks_to_pages(srcpages)
+
+        # Slice to insert the pages at the specified position
+        self.pages[position:position] = srcpages
+
+        # Keep track of our input files so we can close them later
+        self.inputs.append((fileobj, pdfr, my_file))
+
+    def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
+        all pages onto the end of the file instead of specifying a position.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
+
+    def write(self, fileobj):
+        """
+        Writes all data that has been merged to the given output file.
+
+        :param fileobj: Output file. Can be a filename or any kind of
+            file-like object.
+        """
+        my_file = False
+        if isString(fileobj):
+            fileobj = file(fileobj, 'wb')
+            my_file = True
+
+        # Add pages to the PdfFileWriter
+        # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
+        for page in self.pages:
+            self.output.addPage(page.pagedata)
+            page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
+            #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
+            #page.out_pagedata = IndirectObject(idnum, 0, self.output)
+
+        # Once all pages are added, create bookmarks to point at those pages
+        self._write_dests()
+        self._write_bookmarks()
+
+        # Write the output to the file
+        self.output.write(fileobj)
+
+        if my_file:
+            fileobj.close()
+
+    def close(self):
+        """
+        Shuts all file descriptors (input and output) and clears all memory
+        usage.
+        """
+        self.pages = []
+        for fo, pdfr, mine in self.inputs:
+            if mine:
+                fo.close()
+
+        self.inputs = []
+        self.output = None
+
+    def addMetadata(self, infos):
+        """
+        Add custom metadata to the output.
+
+        :param dict infos: a Python dictionary where each key is a field
+            and each value is your new metadata.
+            Example: ``{u'/Title': u'My title'}``
+        """
+        self.output.addMetadata(infos)
+
+    def setPageLayout(self, layout):
+        """
+        Set the page layout
+
+        :param str layout: The page layout to be used
+
+        Valid layouts are:
+             /NoLayout        Layout explicitly not specified
+             /SinglePage      Show one page at a time
+             /OneColumn       Show one column at a time
+             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
+             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
+             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
+             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
+        """
+        self.output.setPageLayout(layout)
+
+    def setPageMode(self, mode):
+        """
+        Set the page mode.
+
+        :param str mode: The page mode to use.
+
+        Valid modes are:
+            /UseNone         Do not show outlines or thumbnails panels
+            /UseOutlines     Show outlines (aka bookmarks) panel
+            /UseThumbs       Show page thumbnails panel
+            /FullScreen      Fullscreen view
+            /UseOC           Show Optional Content Group (OCG) panel
+            /UseAttachments  Show attachments panel
+        """
+        self.output.setPageMode(mode)
+
+    def _trim_dests(self, pdf, dests, pages):
+        """
+        Removes any named destinations that are not a part of the specified
+        page set.
+        """
+        new_dests = []
+        prev_header_added = True
+        for k, o in list(dests.items()):
+            for j in range(*pages):
+                if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                    o[NameObject('/Page')] = o['/Page'].getObject()
+                    assert str_(k) == str_(o['/Title'])
+                    new_dests.append(o)
+                    break
+        return new_dests
+
+    def _trim_outline(self, pdf, outline, pages):
+        """
+        Removes any outline/bookmark entries that are not a part of the
+        specified page set.
+        """
+        new_outline = []
+        prev_header_added = True
+        for i, o in enumerate(outline):
+            if isinstance(o, list):
+                sub = self._trim_outline(pdf, o, pages)
+                if sub:
+                    if not prev_header_added:
+                        new_outline.append(outline[i-1])
+                    new_outline.append(sub)
+            else:
+                prev_header_added = False
+                for j in range(*pages):
+                    if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                        o[NameObject('/Page')] = o['/Page'].getObject()
+                        new_outline.append(o)
+                        prev_header_added = True
+                        break
+        return new_outline
+
+    def _write_dests(self):
+        dests = self.named_dests
+
+        for v in dests:
+            pageno = None
+            pdf = None
+            if '/Page' in v:
+                for i, p in enumerate(self.pages):
+                    if p.id == v['/Page']:
+                        v[NameObject('/Page')] = p.out_pagedata
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                self.output.addNamedDestinationObject(v)
+
+    def _write_bookmarks(self, bookmarks=None, parent=None):
+
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        last_added = None
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._write_bookmarks(b, last_added)
+                continue
+
+            pageno = None
+            pdf = None
+            if '/Page' in b:
+                for i, p in enumerate(self.pages):
+                    if p.id == b['/Page']:
+                        #b[NameObject('/Page')] = p.out_pagedata
+                        args = [NumberObject(p.id), NameObject(b['/Type'])]
+                        #nothing more to add
+                        #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
+                        if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top']
+                        elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left']
+                        elif b['/Type'] == '/XYZ':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
+                                args.append(FloatObject(b['/Zoom']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top'], b['/Zoom'], b['/Left']
+                        elif b['/Type'] == '/FitR':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
+                                args.append(FloatObject(b['/Bottom']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Right' in b and not isinstance(b['/Right'], NullObject):
+                                args.append(FloatObject(b['/Right']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
+
+                        b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
+
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                del b['/Page'], b['/Type']
+                last_added = self.output.addBookmarkDict(b, parent)
+
+    def _associate_dests_to_pages(self, pages):
+        for nd in self.named_dests:
+            pageno = None
+            np = nd['/Page']
+
+            if isinstance(np, NumberObject):
+                continue
+
+            for p in pages:
+                if np.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                nd[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
+
+    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._associate_bookmarks_to_pages(pages, b)
+                continue
+
+            pageno = None
+            bp = b['/Page']
+
+            if isinstance(bp, NumberObject):
+                continue
+
+            for p in pages:
+                if bp.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                b[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
+
+    def findBookmark(self, bookmark, root=None):
+        if root == None:
+            root = self.bookmarks
+
+        for i, b in enumerate(root):
+            if isinstance(b, list):
+                res = self.findBookmark(bookmark, b)
+                if res:
+                    return [i] + res
+            elif b == bookmark or b['/Title'] == bookmark:
+                return [i]
+
+        return None
+
+    def addBookmark(self, title, pagenum, parent=None):
+        """
+        Add a bookmark to this PDF file.
+
+        :param str title: Title to use for this bookmark.
+        :param int pagenum: Page number this bookmark will point to.
+        :param parent: A reference to a parent bookmark to create nested
+            bookmarks.
+        """
+        if parent == None:
+            iloc = [len(self.bookmarks)-1]
+        elif isinstance(parent, list):
+            iloc = parent
+        else:
+            iloc = self.findBookmark(parent)
+
+        dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+
+        if parent == None:
+            self.bookmarks.append(dest)
+        else:
+            bmparent = self.bookmarks
+            for i in iloc[:-1]:
+                bmparent = bmparent[i]
+            npos = iloc[-1]+1
+            if npos < len(bmparent) and isinstance(bmparent[npos], list):
+                bmparent[npos].append(dest)
+            else:
+                bmparent.insert(npos, [dest])
+        return dest
+
+    def addNamedDestination(self, title, pagenum):
+        """
+        Add a destination to the output.
+
+        :param str title: Title to use
+        :param int pagenum: Page number this destination points at.
+        """
+
+        dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+        self.named_dests.append(dest)
+
+
+class OutlinesObject(list):
+    def __init__(self, pdf, tree, parent=None):
+        list.__init__(self)
+        self.tree = tree
+        self.pdf = pdf
+        self.parent = parent
+
+    def remove(self, index):
+        obj = self[index]
+        del self[index]
+        self.tree.removeChild(obj)
+
+    def add(self, title, pagenum):
+        pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
+        action = DictionaryObject()
+        action.update({
+            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
+            NameObject('/S') : NameObject('/GoTo')
+        })
+        actionRef = self.pdf._addObject(action)
+        bookmark = TreeObject()
+
+        bookmark.update({
+            NameObject('/A'): actionRef,
+            NameObject('/Title'): createStringObject(title),
+        })
+
+        self.pdf._addObject(bookmark)
+
+        self.tree.addChild(bookmark)
+
+    def removeAll(self):
+        for child in [x for x in self.tree.children()]:
+            self.tree.removeChild(child)
+            self.pop()
diff --git a/PdfFileTransformer/PyPDF2/pagerange.py b/PdfFileTransformer/PyPDF2/pagerange.py
new file mode 100644
index 0000000..ce96ec5
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/pagerange.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+"""
+Representation and utils for ranges of PDF file pages.
+
+Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+All rights reserved. This software is available under a BSD license;
+see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
+"""
+
+import re
+from .utils import isString
+
+_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
+PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
+# groups:         12     34     5 6     7 8
+
+
+class ParseError(Exception):
+    pass
+
+
+PAGE_RANGE_HELP = """Remember, page indices start with zero.
+        Page range expression examples:
+            :     all pages.                   -1    last page.
+            22    just the 23rd page.          :-1   all but the last page.
+            0:3   the first three pages.       -2    second-to-last page.
+            :3    the first three pages.       -2:   last two pages.
+            5:    from the sixth page onward.  -3:-1 third & second to last.
+        The third, "stride" or "step" number is also recognized.
+            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
+            1:10:2    1 3 5 7 9                2::-1     2 1 0.
+            ::-1      all pages in reverse order.
+"""
+
+
+class PageRange(object):
+    """
+    A slice-like representation of a range of page indices,
+        i.e. page numbers, only starting at zero.
+    The syntax is like what you would put between brackets [ ].
+    The slice is one of the few Python types that can't be subclassed,
+    but this class converts to and from slices, and allows similar use.
+      o  PageRange(str) parses a string representing a page range.
+      o  PageRange(slice) directly "imports" a slice.
+      o  to_slice() gives the equivalent slice.
+      o  str() and repr() allow printing.
+      o  indices(n) is like slice.indices(n).
+    """
+
+    def __init__(self, arg):
+        """
+        Initialize with either a slice -- giving the equivalent page range,
+        or a PageRange object -- making a copy,
+        or a string like
+            "int", "[int]:[int]" or "[int]:[int]:[int]",
+            where the brackets indicate optional ints.
+        {page_range_help}
+        Note the difference between this notation and arguments to slice():
+            slice(3) means the first three pages;
+            PageRange("3") means the range of only the fourth page.
+            However PageRange(slice(3)) means the first three pages.
+        """
+        if isinstance(arg, slice):
+            self._slice = arg
+            return
+
+        if isinstance(arg, PageRange):
+            self._slice = arg.to_slice()
+            return
+
+        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
+        if not m:
+            raise ParseError(arg)
+        elif m.group(2):
+            # Special case: just an int means a range of one page.
+            start = int(m.group(2))
+            stop = start + 1 if start != -1 else None
+            self._slice = slice(start, stop)
+        else:
+            self._slice = slice(*[int(g) if g else None
+                                  for g in m.group(4, 6, 8)])
+
+    # Just formatting this when there is __doc__ for __init__
+    if __init__.__doc__:
+        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
+
+    @staticmethod
+    def valid(input):
+        """ True if input is a valid initializer for a PageRange. """
+        return isinstance(input, slice)  or \
+               isinstance(input, PageRange) or \
+               (isString(input)
+                and bool(re.match(PAGE_RANGE_RE, input)))
+
+    def to_slice(self):
+        """ Return the slice equivalent of this page range. """
+        return self._slice
+
+    def __str__(self):
+        """ A string like "1:2:3". """
+        s = self._slice
+        if s.step == None:
+            if s.start != None  and  s.stop == s.start + 1:
+                return str(s.start)
+
+            indices = s.start, s.stop
+        else:
+            indices = s.start, s.stop, s.step
+        return ':'.join("" if i == None else str(i) for i in indices)
+
+    def __repr__(self):
+        """ A string like "PageRange('1:2:3')". """
+        return "PageRange(" + repr(str(self)) + ")"
+
+    def indices(self, n):
+        """
+        n is the length of the list of pages to choose from.
+        Returns arguments for range().  See help(slice.indices).
+        """
+        return self._slice.indices(n)
+
+
+PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
+
+
+def parse_filename_page_ranges(args):
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
+    First arg must be a filename; other ags are filenames, page-range
+    expressions, slice objects, or PageRange objects.
+    A filename not followed by a page range indicates all pages of the file.
+    """
+    pairs = []
+    pdf_filename = None
+    did_page_range = False
+    for arg in args + [None]:
+        if PageRange.valid(arg):
+            if not pdf_filename:
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")
+
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
+        else:
+            # New filename or end of list--do all of the previous file?
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
+
+            pdf_filename = arg
+            did_page_range = False
+    return pairs
diff --git a/PdfFileTransformer/PyPDF2/pdf.py b/PdfFileTransformer/PyPDF2/pdf.py
new file mode 100644
index 0000000..3bd0066
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/pdf.py
@@ -0,0 +1,3074 @@
+# -*- coding: utf-8 -*-
+#
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A pure-Python PDF library with an increasing number of capabilities.
+See README for links to FAQ, documentation, homepage, etc.
+"""
+
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+__maintainer__ = "Phaseit, Inc."
+__maintainer_email = "PyPDF2@phaseit.net"
+
+import string
+import math
+import struct
+import sys
+import uuid
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+
+if version_info < ( 3, 0 ):
+    BytesIO = StringIO
+else:
+    from io import BytesIO
+
+from . import filters
+from . import utils
+import warnings
+import codecs
+from .generic import *
+from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
+from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning
+
+if version_info < ( 2, 4 ):
+   from sets import ImmutableSet as frozenset
+
+if version_info < ( 2, 5 ):
+    from md5 import md5
+else:
+    from hashlib import md5
+import uuid
+
+
+class PdfFileWriter(object):
+    """
+    This class supports writing PDF files out, given pages produced by another
+    class (typically :class:`PdfFileReader<PdfFileReader>`).
+    """
+    def __init__(self):
+        self._header = b_("%PDF-1.3")
+        self._objects = []  # array of indirect objects
+
+        # The root of our page tree node.
+        pages = DictionaryObject()
+        pages.update({
+                NameObject("/Type"): NameObject("/Pages"),
+                NameObject("/Count"): NumberObject(0),
+                NameObject("/Kids"): ArrayObject(),
+                })
+        self._pages = self._addObject(pages)
+
+        # info object
+        info = DictionaryObject()
+        info.update({
+                NameObject("/Producer"): createStringObject(codecs.BOM_UTF16_BE + u_("PyPDF2").encode('utf-16be'))
+                })
+        self._info = self._addObject(info)
+
+        # root object
+        root = DictionaryObject()
+        root.update({
+            NameObject("/Type"): NameObject("/Catalog"),
+            NameObject("/Pages"): self._pages,
+            })
+        self._root = None
+        self._root_object = root
+
+    def setHeader(self, header):
+        self._header = header
+
+    def _addObject(self, obj):
+        self._objects.append(obj)
+        return IndirectObject(len(self._objects), 0, self)
+
+    def getObject(self, ido):
+        if ido.pdf != self:
+            raise ValueError("pdf must be self")
+        return self._objects[ido.idnum - 1]
+
+    def _addPage(self, page, action):
+        assert page["/Type"] == "/Page"
+        page[NameObject("/Parent")] = self._pages
+        page = self._addObject(page)
+        pages = self.getObject(self._pages)
+        action(pages["/Kids"], page)
+        pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
+
+    def addPage(self, page):
+        """
+        Adds a page to this PDF file.  The page is usually acquired from a
+        :class:`PdfFileReader<PdfFileReader>` instance.
+
+        :param PageObject page: The page to add to the document. Should be
+            an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
+        """
+        self._addPage(page, list.append)
+
+    def insertPage(self, page, index=0):
+        """
+        Insert a page in this PDF file. The page is usually acquired from a
+        :class:`PdfFileReader<PdfFileReader>` instance.
+
+        :param PageObject page: The page to add to the document.  This
+            argument should be an instance of :class:`PageObject<pdf.PageObject>`.
+        :param int index: Position at which the page will be inserted.
+        """
+        self._addPage(page, lambda l, p: l.insert(index, p))
+
+    def getPage(self, pageNumber):
+        """
+        Retrieves a page by number from this PDF file.
+
+        :param int pageNumber: The page number to retrieve
+            (pages begin at zero)
+        :return: the page at the index given by *pageNumber*
+        :rtype: :class:`PageObject<pdf.PageObject>`
+        """
+        pages = self.getObject(self._pages)
+        # XXX: crude hack
+        return pages["/Kids"][pageNumber].getObject()
+
+    def getNumPages(self):
+        """
+        :return: the number of pages.
+        :rtype: int
+        """
+        pages = self.getObject(self._pages)
+        return int(pages[NameObject("/Count")])
+
+    def addBlankPage(self, width=None, height=None):
+        """
+        Appends a blank page to this PDF file and returns it. If no page size
+        is specified, use the size of the last page.
+
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default
+            user space units.
+        :return: the newly appended page
+        :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :raises PageSizeNotDefinedError: if width and height are not defined
+            and previous page does not exist.
+        """
+        page = PageObject.createBlankPage(self, width, height)
+        self.addPage(page)
+        return page
+
+    def insertBlankPage(self, width=None, height=None, index=0):
+        """
+        Inserts a blank page to this PDF file and returns it. If no page size
+        is specified, use the size of the last page.
+
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default
+            user space units.
+        :param int index: Position to add the page.
+        :return: the newly appended page
+        :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :raises PageSizeNotDefinedError: if width and height are not defined
+            and previous page does not exist.
+        """
+        if width is None or height is None and \
+                (self.getNumPages() - 1) >= index:
+            oldpage = self.getPage(index)
+            width = oldpage.mediaBox.getWidth()
+            height = oldpage.mediaBox.getHeight()
+        page = PageObject.createBlankPage(self, width, height)
+        self.insertPage(page, index)
+        return page
+
+    def addJS(self, javascript):
+        """
+        Add Javascript which will launch upon opening this PDF.
+
+        :param str javascript: Your Javascript.
+
+        >>> output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
+        # Example: This will launch the print window when the PDF is opened.
+        """
+        js = DictionaryObject()
+        js.update({
+                NameObject("/Type"): NameObject("/Action"),
+                NameObject("/S"): NameObject("/JavaScript"),
+                NameObject("/JS"): NameObject("(%s)" % javascript)
+                })
+        js_indirect_object = self._addObject(js)
+
+        # We need a name for parameterized javascript in the pdf file, but it can be anything.
+        js_string_name = str(uuid.uuid4())
+
+        js_name_tree = DictionaryObject()
+        js_name_tree.update({
+                NameObject("/JavaScript"): DictionaryObject({
+                  NameObject("/Names"): ArrayObject([createStringObject(js_string_name), js_indirect_object])
+                })
+              })
+        self._addObject(js_name_tree)
+
+        self._root_object.update({
+                NameObject("/OpenAction"): js_indirect_object,
+                NameObject("/Names"): js_name_tree
+                })
+
+    def addAttachment(self, fname, fdata):
+        """
+        Embed a file inside the PDF.
+
+        :param str fname: The filename to display.
+        :param str fdata: The data in the file.
+      
+        Reference:
+        https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
+        Section 7.11.3
+        """
+        
+        # We need 3 entries:
+        # * The file's data
+        # * The /Filespec entry
+        # * The file's name, which goes in the Catalog
+        
+
+        # The entry for the file
+        """ Sample:
+        8 0 obj
+        <<
+         /Length 12
+         /Type /EmbeddedFile
+        >>
+        stream
+        Hello world!
+        endstream
+        endobj        
+        """
+        file_entry = DecodedStreamObject()
+        file_entry.setData(fdata)
+        file_entry.update({
+                NameObject("/Type"): NameObject("/EmbeddedFile")
+                })
+
+        # The Filespec entry
+        """ Sample:
+        7 0 obj
+        <<
+         /Type /Filespec
+         /F (hello.txt)
+         /EF << /F 8 0 R >>
+        >>
+        """
+        efEntry = DictionaryObject()
+        efEntry.update({ NameObject("/F"):file_entry })
+        
+        filespec = DictionaryObject()
+        filespec.update({
+                NameObject("/Type"): NameObject("/Filespec"),
+                NameObject("/F"): createStringObject(fname),  # Perhaps also try TextStringObject
+                NameObject("/EF"): efEntry
+                })
+                
+        # Then create the entry for the root, as it needs a reference to the Filespec
+        """ Sample:
+        1 0 obj
+        <<
+         /Type /Catalog
+         /Outlines 2 0 R
+         /Pages 3 0 R
+         /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
+        >>
+        endobj
+        
+        """
+        embeddedFilesNamesDictionary = DictionaryObject()
+        embeddedFilesNamesDictionary.update({
+                NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])
+                })
+        
+        embeddedFilesDictionary = DictionaryObject()
+        embeddedFilesDictionary.update({
+                NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary
+                })
+        # Update the root
+        self._root_object.update({
+                NameObject("/Names"): embeddedFilesDictionary
+                })
+
+    def appendPagesFromReader(self, reader, after_page_append=None):
+        """
+        Copy pages from reader to writer. Includes an optional callback parameter
+        which is invoked after pages are appended to the writer.
+        
+        :param reader: a PdfFileReader object from which to copy page
+            annotations to this writer object.  The writer's annots
+        will then be updated
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page
+                appended to the writer.
+        """
+        # Get page count from writer and reader
+        reader_num_pages = reader.getNumPages()
+        writer_num_pages = self.getNumPages()
+
+        # Copy pages from reader to writer
+        for rpagenum in range(0, reader_num_pages):
+            reader_page = reader.getPage(rpagenum)
+            self.addPage(reader_page)
+            writer_page = self.getPage(writer_num_pages+rpagenum)
+            # Trigger callback, pass writer page as parameter
+            if callable(after_page_append): after_page_append(writer_page)
+
+    def updatePageFormFieldValues(self, page, fields):
+        '''
+        Update the form field values for a given page from a fields dictionary.
+        Copy field texts and values from fields to page.
+
+        :param page: Page reference from PDF writer where the annotations
+            and field data will be updated.
+        :param fields: a Python dictionary of field names (/T) and text
+            values (/V)
+        '''
+        # Iterate through pages, update field values
+        for j in range(0, len(page['/Annots'])):
+            writer_annot = page['/Annots'][j].getObject()
+            for field in fields:
+                if writer_annot.get('/T') == field:
+                    writer_annot.update({
+                        NameObject("/V"): TextStringObject(fields[field])
+                    })
+
+    def cloneReaderDocumentRoot(self, reader):
+        '''
+        Copy the reader document root to the writer.
+        
+        :param reader:  PdfFileReader from the document root should be copied.
+        :callback after_page_append
+        '''
+        self._root_object = reader.trailer['/Root']
+
+    def cloneDocumentFromReader(self, reader, after_page_append=None):
+        '''
+        Create a copy (clone) of a document from a PDF file reader
+
+        :param reader: PDF file reader instance from which the clone
+            should be created.
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Signature includes a reference to the
+            appended page (delegates to appendPagesFromReader). Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page just
+                appended to the document.
+        '''
+        self.cloneReaderDocumentRoot(reader)
+        self.appendPagesFromReader(reader, after_page_append)
+
+    def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
+        """
+        Encrypt this PDF file with the PDF Standard encryption handler.
+
+        :param str user_pwd: The "user password", which allows for opening
+            and reading the PDF file with the restrictions provided.
+        :param str owner_pwd: The "owner password", which allows for
+            opening the PDF files without any restrictions.  By default,
+            the owner password is the same as the user password.
+        :param bool use_128bit: flag as to whether to use 128bit
+            encryption.  When false, 40bit encryption will be used.  By default,
+            this flag is on.
+        """
+        import time, random
+        if owner_pwd == None:
+            owner_pwd = user_pwd
+        if use_128bit:
+            V = 2
+            rev = 3
+            keylen = int(128 / 8)
+        else:
+            V = 1
+            rev = 2
+            keylen = int(40 / 8)
+        # permit everything:
+        P = -1
+        O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen))
+        ID_1 = ByteStringObject(md5(b_(repr(time.time()))).digest())
+        ID_2 = ByteStringObject(md5(b_(repr(random.random()))).digest())
+        self._ID = ArrayObject((ID_1, ID_2))
+        if rev == 2:
+            U, key = _alg34(user_pwd, O, P, ID_1)
+        else:
+            assert rev == 3
+            U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False)
+        encrypt = DictionaryObject()
+        encrypt[NameObject("/Filter")] = NameObject("/Standard")
+        encrypt[NameObject("/V")] = NumberObject(V)
+        if V == 2:
+            encrypt[NameObject("/Length")] = NumberObject(keylen * 8)
+        encrypt[NameObject("/R")] = NumberObject(rev)
+        encrypt[NameObject("/O")] = ByteStringObject(O)
+        encrypt[NameObject("/U")] = ByteStringObject(U)
+        encrypt[NameObject("/P")] = NumberObject(P)
+        self._encrypt = self._addObject(encrypt)
+        self._encrypt_key = key
+
+    def write(self, stream):
+        """
+        Writes the collection of pages added to this object out as a PDF file.
+
+        :param stream: An object to write the file to.  The object must support
+            the write method and the tell method, similar to a file object.
+        """
+        if hasattr(stream, 'mode') and 'b' not in stream.mode:
+            warnings.warn("File <%s> to write to is not in binary mode. It may not be written to correctly." % stream.name)
+        debug = False
+        import struct
+
+        if not self._root:
+            self._root = self._addObject(self._root_object)
+
+        externalReferenceMap = {}
+
+        # PDF objects sometimes have circular references to their /Page objects
+        # inside their object tree (for example, annotations).  Those will be
+        # indirect references to objects that we've recreated in this PDF.  To
+        # address this problem, PageObject's store their original object
+        # reference number, and we add it to the external reference map before
+        # we sweep for indirect references.  This forces self-page-referencing
+        # trees to reference the correct new object location, rather than
+        # copying in a new copy of the page object.
+        for objIndex in range(len(self._objects)):
+            obj = self._objects[objIndex]
+            if isinstance(obj, PageObject) and obj.indirectRef != None:
+                data = obj.indirectRef
+                if data.pdf not in externalReferenceMap:
+                    externalReferenceMap[data.pdf] = {}
+                if data.generation not in externalReferenceMap[data.pdf]:
+                    externalReferenceMap[data.pdf][data.generation] = {}
+                externalReferenceMap[data.pdf][data.generation][data.idnum] = IndirectObject(objIndex + 1, 0, self)
+
+        self.stack = []
+        if debug: print(("ERM:", externalReferenceMap, "root:", self._root))
+        self._sweepIndirectReferences(externalReferenceMap, self._root)
+        del self.stack
+
+        # Begin writing:
+        object_positions = []
+        stream.write(self._header + b_("\n"))
+        stream.write(b_("%\xE2\xE3\xCF\xD3\n"))
+        for i in range(len(self._objects)):
+            idnum = (i + 1)
+            obj = self._objects[i]
+            object_positions.append(stream.tell())
+            stream.write(b_(str(idnum) + " 0 obj\n"))
+            key = None
+            if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
+                pack1 = struct.pack("<i", i + 1)[:3]
+                pack2 = struct.pack("<i", 0)[:2]
+                key = self._encrypt_key + pack1 + pack2
+                assert len(key) == (len(self._encrypt_key) + 5)
+                md5_hash = md5(key).digest()
+                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
+            obj.writeToStream(stream, key)
+            stream.write(b_("\nendobj\n"))
+
+        # xref table
+        xref_location = stream.tell()
+        stream.write(b_("xref\n"))
+        stream.write(b_("0 %s\n" % (len(self._objects) + 1)))
+        stream.write(b_("%010d %05d f \n" % (0, 65535)))
+        for offset in object_positions:
+            stream.write(b_("%010d %05d n \n" % (offset, 0)))
+
+        # trailer
+        stream.write(b_("trailer\n"))
+        trailer = DictionaryObject()
+        trailer.update({
+                NameObject("/Size"): NumberObject(len(self._objects) + 1),
+                NameObject("/Root"): self._root,
+                NameObject("/Info"): self._info,
+                })
+        if hasattr(self, "_ID"):
+            trailer[NameObject("/ID")] = self._ID
+        if hasattr(self, "_encrypt"):
+            trailer[NameObject("/Encrypt")] = self._encrypt
+        trailer.writeToStream(stream, None)
+
+        # eof
+        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
+
+    def addMetadata(self, infos):
+        """
+        Add custom metadata to the output.
+
+        :param dict infos: a Python dictionary where each key is a field
+            and each value is your new metadata.
+        """
+        args = {}
+        for key, value in list(infos.items()):
+            args[NameObject(key)] = createStringObject(value)
+        self.getObject(self._info).update(args)
+
+    def _sweepIndirectReferences(self, externMap, data):
+        debug = False
+        if debug: print((data, "TYPE", data.__class__.__name__))
+        if isinstance(data, DictionaryObject):
+            for key, value in list(data.items()):
+                origvalue = value
+                value = self._sweepIndirectReferences(externMap, value)
+                if isinstance(value, StreamObject):
+                    # a dictionary value is a stream.  streams must be indirect
+                    # objects, so we need to change this value.
+                    value = self._addObject(value)
+                data[key] = value
+            return data
+        elif isinstance(data, ArrayObject):
+            for i in range(len(data)):
+                value = self._sweepIndirectReferences(externMap, data[i])
+                if isinstance(value, StreamObject):
+                    # an array value is a stream.  streams must be indirect
+                    # objects, so we need to change this value
+                    value = self._addObject(value)
+                data[i] = value
+            return data
+        elif isinstance(data, IndirectObject):
+            # internal indirect references are fine
+            if data.pdf == self:
+                if data.idnum in self.stack:
+                    return data
+                else:
+                    self.stack.append(data.idnum)
+                    realdata = self.getObject(data)
+                    self._sweepIndirectReferences(externMap, realdata)
+                    return data
+            else:
+                if data.pdf.stream.closed:
+                    raise ValueError("I/O operation on closed file: {}".format(data.pdf.stream.name))
+                newobj = externMap.get(data.pdf, {}).get(data.generation, {}).get(data.idnum, None)
+                if newobj == None:
+                    try:
+                        newobj = data.pdf.getObject(data)
+                        self._objects.append(None) # placeholder
+                        idnum = len(self._objects)
+                        newobj_ido = IndirectObject(idnum, 0, self)
+                        if data.pdf not in externMap:
+                            externMap[data.pdf] = {}
+                        if data.generation not in externMap[data.pdf]:
+                            externMap[data.pdf][data.generation] = {}
+                        externMap[data.pdf][data.generation][data.idnum] = newobj_ido
+                        newobj = self._sweepIndirectReferences(externMap, newobj)
+                        self._objects[idnum-1] = newobj
+                        return newobj_ido
+                    except ValueError:
+                        # Unable to resolve the Object, returning NullObject instead.
+                        warnings.warn("Unable to resolve [{}: {}], returning NullObject instead".format(
+                            data.__class__.__name__, data
+                        ))
+                        return NullObject()
+                return newobj
+        else:
+            return data
+
+    def getReference(self, obj):
+        idnum = self._objects.index(obj) + 1
+        ref = IndirectObject(idnum, 0, self)
+        assert ref.getObject() == obj
+        return ref
+
+    def getOutlineRoot(self):
+        if '/Outlines' in self._root_object:
+            outline = self._root_object['/Outlines']
+            idnum = self._objects.index(outline) + 1
+            outlineRef = IndirectObject(idnum, 0, self)
+            assert outlineRef.getObject() == outline
+        else:
+            outline = TreeObject()
+            outline.update({ })
+            outlineRef = self._addObject(outline)
+            self._root_object[NameObject('/Outlines')] = outlineRef
+
+        return outline
+
+    def getNamedDestRoot(self):
+        if '/Names' in self._root_object and isinstance(self._root_object['/Names'], DictionaryObject):
+            names = self._root_object['/Names']
+            idnum = self._objects.index(names) + 1
+            namesRef = IndirectObject(idnum, 0, self)
+            assert namesRef.getObject() == names
+            if '/Dests' in names and isinstance(names['/Dests'], DictionaryObject):
+                dests = names['/Dests']
+                idnum = self._objects.index(dests) + 1
+                destsRef = IndirectObject(idnum, 0, self)
+                assert destsRef.getObject() == dests
+                if '/Names' in dests:
+                    nd = dests['/Names']
+                else:
+                    nd = ArrayObject()
+                    dests[NameObject('/Names')] = nd
+            else:
+                dests = DictionaryObject()
+                destsRef = self._addObject(dests)
+                names[NameObject('/Dests')] = destsRef
+                nd = ArrayObject()
+                dests[NameObject('/Names')] = nd
+
+        else:
+            names = DictionaryObject()
+            namesRef = self._addObject(names)
+            self._root_object[NameObject('/Names')] = namesRef
+            dests = DictionaryObject()
+            destsRef = self._addObject(dests)
+            names[NameObject('/Dests')] = destsRef
+            nd = ArrayObject()
+            dests[NameObject('/Names')] = nd
+
+        return nd
+
+    def addBookmarkDestination(self, dest, parent=None):
+        destRef = self._addObject(dest)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        parent = parent.getObject()
+        #print parent.__class__.__name__
+        parent.addChild(destRef, self)
+
+        return destRef
+
+    def addBookmarkDict(self, bookmark, parent=None):
+        bookmarkObj = TreeObject()
+        for k, v in list(bookmark.items()):
+            bookmarkObj[NameObject(str(k))] = v
+        bookmarkObj.update(bookmark)
+
+        if '/A' in bookmark:
+            action = DictionaryObject()
+            for k, v in list(bookmark['/A'].items()):
+                action[NameObject(str(k))] = v
+            actionRef = self._addObject(action)
+            bookmarkObj[NameObject('/A')] = actionRef
+
+        bookmarkRef = self._addObject(bookmarkObj)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        parent = parent.getObject()
+        parent.addChild(bookmarkRef, self)
+
+        return bookmarkRef
+
+    def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
+        """
+        Add a bookmark to this PDF file.
+
+        :param str title: Title to use for this bookmark.
+        :param int pagenum: Page number this bookmark will point to.
+        :param parent: A reference to a parent bookmark to create nested
+            bookmarks.
+        :param tuple color: Color of the bookmark as a red, green, blue tuple
+            from 0.0 to 1.0
+        :param bool bold: Bookmark is bold
+        :param bool italic: Bookmark is italic
+        :param str fit: The fit of the destination page. See
+            :meth:`addLink()<addLink>` for details.
+        """
+        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
+        action = DictionaryObject()
+        zoomArgs = []
+        for a in args:
+            if a is not None:
+                zoomArgs.append(NumberObject(a))
+            else:
+                zoomArgs.append(NullObject())
+        dest = Destination(NameObject("/"+title + " bookmark"), pageRef, NameObject(fit), *zoomArgs)
+        destArray = dest.getDestArray()
+        action.update({
+            NameObject('/D') : destArray,
+            NameObject('/S') : NameObject('/GoTo')
+        })
+        actionRef = self._addObject(action)
+
+        outlineRef = self.getOutlineRoot()
+
+        if parent == None:
+            parent = outlineRef
+
+        bookmark = TreeObject()
+
+        bookmark.update({
+            NameObject('/A'): actionRef,
+            NameObject('/Title'): createStringObject(title),
+        })
+
+        if color is not None:
+            bookmark.update({NameObject('/C'): ArrayObject([FloatObject(c) for c in color])})
+
+        format = 0
+        if italic:
+            format += 1
+        if bold:
+            format += 2
+        if format:
+            bookmark.update({NameObject('/F'): NumberObject(format)})
+
+        bookmarkRef = self._addObject(bookmark)
+
+        parent = parent.getObject()
+        parent.addChild(bookmarkRef, self)
+
+        return bookmarkRef
+
+    def addNamedDestinationObject(self, dest):
+        destRef = self._addObject(dest)
+
+        nd = self.getNamedDestRoot()
+        nd.extend([dest['/Title'], destRef])
+
+        return destRef
+
+    def addNamedDestination(self, title, pagenum):
+        pageRef = self.getObject(self._pages)['/Kids'][pagenum]
+        dest = DictionaryObject()
+        dest.update({
+            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
+            NameObject('/S') : NameObject('/GoTo')
+        })
+
+        destRef = self._addObject(dest)
+        nd = self.getNamedDestRoot()
+
+        nd.extend([title, destRef])
+
+        return destRef
+
+    def removeLinks(self):
+        """
+        Removes links and annotations from this output.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for page in pages:
+            pageRef = self.getObject(page)
+            if "/Annots" in pageRef:
+                del pageRef['/Annots']
+
+    def removeImages(self, ignoreByteStringObject=False):
+        """
+        Removes images from this output.
+
+        :param bool ignoreByteStringObject: optional parameter
+            to ignore ByteString Objects.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for j in range(len(pages)):
+            page = pages[j]
+            pageRef = self.getObject(page)
+            content = pageRef['/Contents'].getObject()
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, pageRef)
+
+            _operations = []
+            seq_graphics = False
+            for operands, operator in content.operations:
+                if operator == b_('Tj'):
+                    text = operands[0]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_("'"):
+                    text = operands[0]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_('"'):
+                    text = operands[2]
+                    if ignoreByteStringObject:
+                        if not isinstance(text, TextStringObject):
+                            operands[2] = TextStringObject()
+                elif operator == b_("TJ"):
+                    for i in range(len(operands[0])):
+                        if ignoreByteStringObject:
+                            if not isinstance(operands[0][i], TextStringObject):
+                                operands[0][i] = TextStringObject()
+
+                if operator == b_('q'):
+                    seq_graphics = True
+                if operator == b_('Q'):
+                    seq_graphics = False
+                if seq_graphics:
+                    if operator in [b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
+                            b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
+                            b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')]:
+                        continue
+                if operator == b_('re'):
+                    continue
+                _operations.append((operands, operator))
+
+            content.operations = _operations
+            pageRef.__setitem__(NameObject('/Contents'), content)
+
+    def removeText(self, ignoreByteStringObject=False):
+        """
+        Removes images from this output.
+
+        :param bool ignoreByteStringObject: optional parameter
+            to ignore ByteString Objects.
+        """
+        pages = self.getObject(self._pages)['/Kids']
+        for j in range(len(pages)):
+            page = pages[j]
+            pageRef = self.getObject(page)
+            content = pageRef['/Contents'].getObject()
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, pageRef)
+            for operands,operator in content.operations:
+                if operator == b_('Tj'):
+                    text = operands[0]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_("'"):
+                    text = operands[0]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[0] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[0] = TextStringObject()
+                elif operator == b_('"'):
+                    text = operands[2]
+                    if not ignoreByteStringObject:
+                        if isinstance(text, TextStringObject):
+                            operands[2] = TextStringObject()
+                    else:
+                        if isinstance(text, TextStringObject) or \
+                                isinstance(text, ByteStringObject):
+                            operands[2] = TextStringObject()
+                elif operator == b_("TJ"):
+                    for i in range(len(operands[0])):
+                        if not ignoreByteStringObject:
+                            if isinstance(operands[0][i], TextStringObject):
+                                operands[0][i] = TextStringObject()
+                        else:
+                            if isinstance(operands[0][i], TextStringObject) or \
+                                    isinstance(operands[0][i], ByteStringObject):
+                                operands[0][i] = TextStringObject()
+
+            pageRef.__setitem__(NameObject('/Contents'), content)
+
+    def addURI(self, pagenum, uri, rect, border=None):
+        """
+        Add an URI from a rectangular area to the specified page.
+        This uses the basic structure of AddLink
+
+        :param int pagenum: index of the page on which to place the URI action.
+        :param int uri: string -- uri of resource to link to.
+        :param rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four
+            integers specifying the clickable rectangular area
+            ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
+        :param border: if provided, an array describing border-drawing
+            properties. See the PDF spec for details. No border will be
+            drawn if this argument is omitted.
+
+        REMOVED FIT/ZOOM ARG
+        -John Mulligan
+        """
+
+        pageLink = self.getObject(self._pages)['/Kids'][pagenum]
+        pageRef = self.getObject(pageLink)
+
+        if border is not None:
+            borderArr = [NameObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dashPattern = ArrayObject([NameObject(n) for n in border[3]])
+                borderArr.append(dashPattern)
+        else:
+            borderArr = [NumberObject(2)] * 3
+
+        if isString(rect):
+            rect = NameObject(rect)
+        elif isinstance(rect, RectangleObject):
+            pass
+        else:
+            rect = RectangleObject(rect)
+
+        lnk2 = DictionaryObject()
+        lnk2.update({
+        NameObject('/S'): NameObject('/URI'),
+        NameObject('/URI'): TextStringObject(uri)
+        });
+        lnk = DictionaryObject()
+        lnk.update({
+        NameObject('/Type'): NameObject('/Annot'),
+        NameObject('/Subtype'): NameObject('/Link'),
+        NameObject('/P'): pageLink,
+        NameObject('/Rect'): rect,
+        NameObject('/H'): NameObject('/I'),
+        NameObject('/Border'): ArrayObject(borderArr),
+        NameObject('/A'): lnk2
+        })
+        lnkRef = self._addObject(lnk)
+
+        if "/Annots" in pageRef:
+            pageRef['/Annots'].append(lnkRef)
+        else:
+            pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
+
+    def addLink(self, pagenum, pagedest, rect, border=None, fit='/Fit', *args):
+        """
+        Add an internal link from a rectangular area to the specified page.
+
+        :param int pagenum: index of the page on which to place the link.
+        :param int pagedest: index of the page to which the link should go.
+        :param rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four
+            integers specifying the clickable rectangular area
+            ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``.
+        :param border: if provided, an array describing border-drawing
+            properties. See the PDF spec for details. No border will be
+            drawn if this argument is omitted.
+        :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need
+            to be supplied. Passing ``None`` will be read as a null value for that coordinate.
+
+        Valid zoom arguments (see Table 8.2 of the PDF 1.7 reference for details):
+             /Fit       No additional arguments
+             /XYZ       [left] [top] [zoomFactor]
+             /FitH      [top]
+             /FitV      [left]
+             /FitR      [left] [bottom] [right] [top]
+             /FitB      No additional arguments
+             /FitBH     [top]
+             /FitBV     [left]
+        """
+
+        pageLink = self.getObject(self._pages)['/Kids'][pagenum]
+        pageDest = self.getObject(self._pages)['/Kids'][pagedest] #TODO: switch for external link
+        pageRef = self.getObject(pageLink)
+
+        if border is not None:
+            borderArr = [NameObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dashPattern = ArrayObject([NameObject(n) for n in border[3]])
+                borderArr.append(dashPattern)
+        else:
+            borderArr = [NumberObject(0)] * 3
+
+        if isString(rect):
+            rect = NameObject(rect)
+        elif isinstance(rect, RectangleObject):
+            pass
+        else:
+            rect = RectangleObject(rect)
+
+        zoomArgs = []
+        for a in args:
+            if a is not None:
+                zoomArgs.append(NumberObject(a))
+            else:
+                zoomArgs.append(NullObject())
+        dest = Destination(NameObject("/LinkName"), pageDest, NameObject(fit), *zoomArgs) #TODO: create a better name for the link
+        destArray = dest.getDestArray()
+
+        lnk = DictionaryObject()
+        lnk.update({
+            NameObject('/Type'): NameObject('/Annot'),
+            NameObject('/Subtype'): NameObject('/Link'),
+            NameObject('/P'): pageLink,
+            NameObject('/Rect'): rect,
+            NameObject('/Border'): ArrayObject(borderArr),
+            NameObject('/Dest'): destArray
+        })
+        lnkRef = self._addObject(lnk)
+
+        if "/Annots" in pageRef:
+            pageRef['/Annots'].append(lnkRef)
+        else:
+            pageRef[NameObject('/Annots')] = ArrayObject([lnkRef])
+
+    _valid_layouts = ['/NoLayout', '/SinglePage', '/OneColumn', '/TwoColumnLeft', '/TwoColumnRight', '/TwoPageLeft', '/TwoPageRight']
+
+    def getPageLayout(self):
+        """
+        Get the page layout.
+        See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` for a description of valid layouts.
+
+        :return: Page layout currently being used.
+        :rtype: str, None if not specified
+        """
+        try:
+            return self._root_object['/PageLayout']
+        except KeyError:
+            return None
+
+    def setPageLayout(self, layout):
+        """
+        Set the page layout
+
+        :param str layout: The page layout to be used
+
+        Valid layouts are:
+             /NoLayout        Layout explicitly not specified
+             /SinglePage      Show one page at a time
+             /OneColumn       Show one column at a time
+             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
+             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
+             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
+             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
+        """
+        if not isinstance(layout, NameObject):
+            if layout not in self._valid_layouts:
+                warnings.warn("Layout should be one of: {}".format(', '.join(self._valid_layouts)))
+            layout = NameObject(layout)
+        self._root_object.update({NameObject('/PageLayout'): layout})
+
+    pageLayout = property(getPageLayout, setPageLayout)
+    """Read and write property accessing the :meth:`getPageLayout()<PdfFileWriter.getPageLayout>`
+    and :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` methods."""
+
+    _valid_modes = ['/UseNone', '/UseOutlines', '/UseThumbs', '/FullScreen', '/UseOC', '/UseAttachments']
+
+    def getPageMode(self):
+        """
+        Get the page mode.
+        See :meth:`setPageMode()<PdfFileWriter.setPageMode>` for a description
+        of valid modes.
+
+        :return: Page mode currently being used.
+        :rtype: str, None if not specified
+        """
+        try:
+            return self._root_object['/PageMode']
+        except KeyError:
+            return None
+
+    def setPageMode(self, mode):
+        """
+        Set the page mode.
+
+        :param str mode: The page mode to use.
+
+        Valid modes are:
+            /UseNone         Do not show outlines or thumbnails panels
+            /UseOutlines     Show outlines (aka bookmarks) panel
+            /UseThumbs       Show page thumbnails panel
+            /FullScreen      Fullscreen view
+            /UseOC           Show Optional Content Group (OCG) panel
+            /UseAttachments  Show attachments panel
+        """
+        if not isinstance(mode, NameObject):
+            if mode not in self._valid_modes:
+                warnings.warn("Mode should be one of: {}".format(', '.join(self._valid_modes)))
+            mode = NameObject(mode)
+        self._root_object.update({NameObject('/PageMode'): mode})
+
+    pageMode = property(getPageMode, setPageMode)
+    """Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>`
+    and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods."""
+
+
+class PdfFileReader(object):
+    """
+    Initializes a PdfFileReader object.  This operation can take some time, as
+    the PDF stream's cross-reference tables are read into memory.
+
+    :param stream: A File object or an object that supports the standard read
+        and seek methods similar to a File object. Could also be a
+        string representing a path to a PDF file.
+    :param bool strict: Determines whether user should be warned of all
+        problems and also causes some correctable problems to be fatal.
+        Defaults to ``True``.
+    :param warndest: Destination for logging warnings (defaults to
+        ``sys.stderr``).
+    :param bool overwriteWarnings: Determines whether to override Python's
+        ``warnings.py`` module with a custom implementation (defaults to
+        ``True``).
+    """
+    def __init__(self, stream, strict=True, warndest = None, overwriteWarnings = True):
+        if overwriteWarnings:
+            # have to dynamically override the default showwarning since there are no
+            # public methods that specify the 'file' parameter
+            def _showwarning(message, category, filename, lineno, file=warndest, line=None):
+                if file is None:
+                    file = sys.stderr
+                try:
+                    file.write(formatWarning(message, category, filename, lineno, line))
+                except IOError:
+                    pass
+            warnings.showwarning = _showwarning
+        self.strict = strict
+        self.flattenedPages = None
+        self.resolvedObjects = {}
+        self.xrefIndex = 0
+        self._pageId2Num = None # map page IndirectRef number to Page Number
+        if hasattr(stream, 'mode') and 'b' not in stream.mode:
+            warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
+        if isString(stream):
+            fileobj = open(stream, 'rb')
+            stream = BytesIO(b_(fileobj.read()))
+            fileobj.close()
+        self.read(stream)
+        self.stream = stream
+
+        self._override_encryption = False
+
+    def getDocumentInfo(self):
+        """
+        Retrieves the PDF file's document information dictionary, if it exists.
+        Note that some PDF files use metadata streams instead of docinfo
+        dictionaries, and these metadata streams will not be accessed by this
+        function.
+
+        :return: the document information of this PDF file
+        :rtype: :class:`DocumentInformation<pdf.DocumentInformation>` or ``None`` if none exists.
+        """
+        if "/Info" not in self.trailer:
+            return None
+        obj = self.trailer['/Info']
+        retval = DocumentInformation()
+        retval.update(obj)
+        return retval
+
+    documentInfo = property(lambda self: self.getDocumentInfo(), None, None)
+    """Read-only property that accesses the :meth:`getDocumentInfo()<PdfFileReader.getDocumentInfo>` function."""
+
+    def getXmpMetadata(self):
+        """
+        Retrieves XMP (Extensible Metadata Platform) data from the PDF document
+        root.
+
+        :return: a :class:`XmpInformation<xmp.XmpInformation>`
+            instance that can be used to access XMP metadata from the document.
+        :rtype: :class:`XmpInformation<xmp.XmpInformation>` or
+            ``None`` if no metadata was found on the document root.
+        """
+        try:
+            self._override_encryption = True
+            return self.trailer["/Root"].getXmpMetadata()
+        finally:
+            self._override_encryption = False
+
+    xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getXmpMetadata()<PdfFileReader.getXmpMetadata>` function.
+    """
+
+    def getNumPages(self):
+        """
+        Calculates the number of pages in this PDF file.
+
+        :return: number of pages
+        :rtype: int
+        :raises PdfReadError: if file is encrypted and restrictions prevent
+            this action.
+        """
+
+        # Flattened pages will not work on an Encrypted PDF;
+        # the PDF file's page count is used in this case. Otherwise,
+        # the original method (flattened page count) is used.
+        if self.isEncrypted:
+            try:
+                self._override_encryption = True
+                self.decrypt('')
+                return self.trailer["/Root"]["/Pages"]["/Count"]
+            except:
+                raise utils.PdfReadError("File has not been decrypted")
+            finally:
+                self._override_encryption = False
+        else:
+            if self.flattenedPages == None:
+                self._flatten()
+            return len(self.flattenedPages)
+
+    numPages = property(lambda self: self.getNumPages(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getNumPages()<PdfFileReader.getNumPages>` function.
+    """
+
+    def getPage(self, pageNumber):
+        """
+        Retrieves a page by number from this PDF file.
+
+        :param int pageNumber: The page number to retrieve
+            (pages begin at zero)
+        :return: a :class:`PageObject<pdf.PageObject>` instance.
+        :rtype: :class:`PageObject<pdf.PageObject>`
+        """
+        ## ensure that we're not trying to access an encrypted PDF
+        #assert not self.trailer.has_key("/Encrypt")
+        if self.flattenedPages == None:
+            self._flatten()
+        return self.flattenedPages[pageNumber]
+
+    namedDestinations = property(lambda self:
+                                  self.getNamedDestinations(), None, None)
+    """
+    Read-only property that accesses the
+    :meth:`getNamedDestinations()<PdfFileReader.getNamedDestinations>` function.
+    """
+
+    # A select group of relevant field attributes. For the complete list,
+    # see section 8.6.2 of the PDF 1.7 reference.
+
+    def getFields(self, tree = None, retval = None, fileobj = None):
+        """
+        Extracts field data if this PDF contains interactive form fields.
+        The *tree* and *retval* parameters are for recursive use.
+
+        :param fileobj: A file object (usually a text file) to write
+            a report to on all interactive form fields found.
+        :return: A dictionary where each key is a field name, and each
+            value is a :class:`Field<PyPDF2.generic.Field>` object. By
+            default, the mapping name is used for keys.
+        :rtype: dict, or ``None`` if form data could not be located.
+        """
+        fieldAttributes = {"/FT" : "Field Type", "/Parent" : "Parent",
+                       "/T" : "Field Name", "/TU" : "Alternate Field Name",
+                       "/TM" : "Mapping Name", "/Ff" : "Field Flags",
+                       "/V" : "Value", "/DV" : "Default Value"}
+        if retval == None:
+            retval = {}
+            catalog = self.trailer["/Root"]
+            # get the AcroForm tree
+            if "/AcroForm" in catalog:
+                tree = catalog["/AcroForm"]
+            else:
+                return None
+        if tree == None:
+            return retval
+
+        self._checkKids(tree, retval, fileobj)
+        for attr in fieldAttributes:
+            if attr in tree:
+                # Tree is a field
+                self._buildField(tree, retval, fileobj, fieldAttributes)
+                break
+
+        if "/Fields" in tree:
+            fields = tree["/Fields"]
+            for f in fields:
+                field = f.getObject()
+                self._buildField(field, retval, fileobj, fieldAttributes)
+
+        return retval
+
+    def _buildField(self, field, retval, fileobj, fieldAttributes):
+        self._checkKids(field, retval, fileobj)
+        try:
+            key = field["/TM"]
+        except KeyError:
+            try:
+                key = field["/T"]
+            except KeyError:
+                # Ignore no-name field for now
+                return
+        if fileobj:
+            self._writeField(fileobj, field, fieldAttributes)
+            fileobj.write("\n")
+        retval[key] = Field(field)
+
+    def _checkKids(self, tree, retval, fileobj):
+        if "/Kids" in tree:
+            # recurse down the tree
+            for kid in tree["/Kids"]:
+                self.getFields(kid.getObject(), retval, fileobj)
+
+    def _writeField(self, fileobj, field, fieldAttributes):
+        order = ["/TM", "/T", "/FT", "/Parent", "/TU", "/Ff", "/V", "/DV"]
+        for attr in order:
+            attrName = fieldAttributes[attr]
+            try:
+                if attr == "/FT":
+                    # Make the field type value more clear
+                    types = {"/Btn":"Button", "/Tx":"Text", "/Ch": "Choice",
+                             "/Sig":"Signature"}
+                    if field[attr] in types:
+                        fileobj.write(attrName + ": " + types[field[attr]] + "\n")
+                elif attr == "/Parent":
+                    # Let's just write the name of the parent
+                    try:
+                        name = field["/Parent"]["/TM"]
+                    except KeyError:
+                        name = field["/Parent"]["/T"]
+                    fileobj.write(attrName + ": " + name + "\n")
+                else:
+                    fileobj.write(attrName + ": " + str(field[attr]) + "\n")
+            except KeyError:
+                # Field attribute is N/A or unknown, so don't write anything
+                pass
+
+    def getFormTextFields(self):
+        ''' Retrieves form fields from the document with textual data (inputs, dropdowns)
+        '''
+        # Retrieve document form fields
+        formfields = self.getFields()
+        return dict(
+            (formfields[field]['/T'], formfields[field].get('/V')) for field in formfields \
+                if formfields[field].get('/FT') == '/Tx'
+        )
+
+    def getNamedDestinations(self, tree=None, retval=None):
+        """
+        Retrieves the named destinations present in the document.
+
+        :return: a dictionary which maps names to
+            :class:`Destinations<PyPDF2.generic.Destination>`.
+        :rtype: dict
+        """
+        if retval == None:
+            retval = {}
+            catalog = self.trailer["/Root"]
+
+            # get the name tree
+            if "/Dests" in catalog:
+                tree = catalog["/Dests"]
+            elif "/Names" in catalog:
+                names = catalog['/Names']
+                if "/Dests" in names:
+                    tree = names['/Dests']
+
+        if tree == None:
+            return retval
+
+        if "/Kids" in tree:
+            # recurse down the tree
+            for kid in tree["/Kids"]:
+                self.getNamedDestinations(kid.getObject(), retval)
+
+        if "/Names" in tree:
+            names = tree["/Names"]
+            for i in range(0, len(names), 2):
+                key = names[i].getObject()
+                val = names[i+1].getObject()
+                if isinstance(val, DictionaryObject) and '/D' in val:
+                    val = val['/D']
+                dest = self._buildDestination(key, val)
+                if dest != None:
+                    retval[key] = dest
+
+        return retval
+
+    outlines = property(lambda self: self.getOutlines(), None, None)
+    """
+    Read-only property that accesses the
+        :meth:`getOutlines()<PdfFileReader.getOutlines>` function.
+    """
+
+    def getOutlines(self, node=None, outlines=None):
+        """
+        Retrieves the document outline present in the document.
+
+        :return: a nested list of :class:`Destinations<PyPDF2.generic.Destination>`.
+        """
+        if outlines == None:
+            outlines = []
+            catalog = self.trailer["/Root"]
+
+            # get the outline dictionary and named destinations
+            if "/Outlines" in catalog:
+                try:
+                    lines = catalog["/Outlines"]
+                except utils.PdfReadError:
+                    # this occurs if the /Outlines object reference is incorrect
+                    # for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf
+                    # so continue to load the file without the Bookmarks
+                    return outlines
+
+                if "/First" in lines:
+                    node = lines["/First"]
+            self._namedDests = self.getNamedDestinations()
+
+        if node == None:
+          return outlines
+
+        # see if there are any more outlines
+        while True:
+            outline = self._buildOutline(node)
+            if outline:
+                outlines.append(outline)
+
+            # check for sub-outlines
+            if "/First" in node:
+                subOutlines = []
+                self.getOutlines(node["/First"], subOutlines)
+                if subOutlines:
+                    outlines.append(subOutlines)
+
+            if "/Next" not in node:
+                break
+            node = node["/Next"]
+
+        return outlines
+
+    def _getPageNumberByIndirect(self, indirectRef):
+        """Generate _pageId2Num"""
+        if self._pageId2Num is None:
+            id2num = {}
+            for i, x in enumerate(self.pages):
+                id2num[x.indirectRef.idnum] = i
+            self._pageId2Num = id2num
+
+        if isinstance(indirectRef, int):
+            idnum = indirectRef
+        else:
+            idnum = indirectRef.idnum
+
+        ret = self._pageId2Num.get(idnum, -1)
+        return ret
+
+    def getPageNumber(self, page):
+        """
+        Retrieve page number of a given PageObject
+
+        :param PageObject page: The page to get page number. Should be
+            an instance of :class:`PageObject<PyPDF2.pdf.PageObject>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = page.indirectRef
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
+    def getDestinationPageNumber(self, destination):
+        """
+        Retrieve page number of a given Destination object
+
+        :param Destination destination: The destination to get page number.
+             Should be an instance of
+             :class:`Destination<PyPDF2.pdf.Destination>`
+        :return: the page number or -1 if page not found
+        :rtype: int
+        """
+        indirectRef = destination.page
+        ret = self._getPageNumberByIndirect(indirectRef)
+        return ret
+
+    def _buildDestination(self, title, array):
+        page, typ = array[0:2]
+        array = array[2:]
+        return Destination(title, page, typ, *array)
+
+    def _buildOutline(self, node):
+        dest, title, outline = None, None, None
+
+        if "/A" in node and "/Title" in node:
+            # Action, section 8.5 (only type GoTo supported)
+            title  = node["/Title"]
+            action = node["/A"]
+            if action["/S"] == "/GoTo":
+                dest = action["/D"]
+        elif "/Dest" in node and "/Title" in node:
+            # Destination, section 8.2.1
+            title = node["/Title"]
+            dest  = node["/Dest"]
+
+        # if destination found, then create outline
+        if dest:
+            if isinstance(dest, ArrayObject):
+                outline = self._buildDestination(title, dest)
+            elif isString(dest) and dest in self._namedDests:
+                outline = self._namedDests[dest]
+                outline[NameObject("/Title")] = title
+            else:
+                raise utils.PdfReadError("Unexpected destination %r" % dest)
+        return outline
+
+    pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage),
+        None, None)
+    """
+    Read-only property that emulates a list based upon the
+    :meth:`getNumPages()<PdfFileReader.getNumPages>` and
+    :meth:`getPage()<PdfFileReader.getPage>` methods.
+    """
+
+    def getPageLayout(self):
+        """
+        Get the page layout.
+        See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>`
+        for a description of valid layouts.
+
+        :return: Page layout currently being used.
+        :rtype: ``str``, ``None`` if not specified
+        """
+        try:
+            return self.trailer['/Root']['/PageLayout']
+        except KeyError:
+            return None
+
+    pageLayout = property(getPageLayout)
+    """Read-only property accessing the
+    :meth:`getPageLayout()<PdfFileReader.getPageLayout>` method."""
+
+    def getPageMode(self):
+        """
+        Get the page mode.
+        See :meth:`setPageMode()<PdfFileWriter.setPageMode>`
+        for a description of valid modes.
+
+        :return: Page mode currently being used.
+        :rtype: ``str``, ``None`` if not specified
+        """
+        try:
+            return self.trailer['/Root']['/PageMode']
+        except KeyError:
+            return None
+
+    pageMode = property(getPageMode)
+    """Read-only property accessing the
+    :meth:`getPageMode()<PdfFileReader.getPageMode>` method."""
+
+    def _flatten(self, pages=None, inherit=None, indirectRef=None):
+        inheritablePageAttributes = (
+            NameObject("/Resources"), NameObject("/MediaBox"),
+            NameObject("/CropBox"), NameObject("/Rotate")
+            )
+        if inherit == None:
+            inherit = dict()
+        if pages == None:
+            self.flattenedPages = []
+            catalog = self.trailer["/Root"].getObject()
+            pages = catalog["/Pages"].getObject()
+
+        t = "/Pages"
+        if "/Type" in pages:
+            t = pages["/Type"]
+
+        if t == "/Pages":
+            for attr in inheritablePageAttributes:
+                if attr in pages:
+                    inherit[attr] = pages[attr]
+            for page in pages["/Kids"]:
+                addt = {}
+                if isinstance(page, IndirectObject):
+                    addt["indirectRef"] = page
+                self._flatten(page.getObject(), inherit, **addt)
+        elif t == "/Page":
+            for attr, value in list(inherit.items()):
+                # if the page has it's own value, it does not inherit the
+                # parent's value:
+                if attr not in pages:
+                    pages[attr] = value
+            pageObj = PageObject(self, indirectRef)
+            pageObj.update(pages)
+            self.flattenedPages.append(pageObj)
+
+    def _getObjectFromStream(self, indirectReference):
+        # indirect reference to object in object stream
+        # read the entire object stream into memory
+        debug = False
+        stmnum, idx = self.xref_objStm[indirectReference.idnum]
+        if debug: print(("Here1: %s %s"%(stmnum, idx)))
+        objStm = IndirectObject(stmnum, 0, self).getObject()
+        if debug: print(("Here2: objStm=%s.. stmnum=%s data=%s"%(objStm, stmnum, objStm.getData())))
+        # This is an xref to a stream, so its type better be a stream
+        assert objStm['/Type'] == '/ObjStm'
+        # /N is the number of indirect objects in the stream
+        assert idx < objStm['/N']
+        streamData = BytesIO(b_(objStm.getData()))
+        for i in range(objStm['/N']):
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            objnum = NumberObject.readFromStream(streamData)
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            offset = NumberObject.readFromStream(streamData)
+            readNonWhitespace(streamData)
+            streamData.seek(-1, 1)
+            if objnum != indirectReference.idnum:
+                # We're only interested in one object
+                continue
+            if self.strict and idx != i:
+                raise utils.PdfReadError("Object is in wrong index.")
+            streamData.seek(objStm['/First']+offset, 0)
+            if debug:
+                pos = streamData.tell()
+                streamData.seek(0, 0)
+                lines = streamData.readlines()
+                for i in range(0, len(lines)):
+                    print((lines[i]))
+                streamData.seek(pos, 0)
+            try:
+                obj = readObject(streamData, self)
+            except utils.PdfStreamError as e:
+                # Stream object cannot be read. Normally, a critical error, but
+                # Adobe Reader doesn't complain, so continue (in strict mode?)
+                e = sys.exc_info()[1]
+                warnings.warn("Invalid stream (index %d) within object %d %d: %s" % \
+                      (i, indirectReference.idnum, indirectReference.generation, e), utils.PdfReadWarning)
+
+                if self.strict:
+                    raise utils.PdfReadError("Can't read object stream: %s"%e)
+                # Replace with null. Hopefully it's nothing important.
+                obj = NullObject()
+            return obj
+
+        if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.")
+        return NullObject()
+
+    def getObject(self, indirectReference):
+        debug = False
+        if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation))
+        retval = self.cacheGetIndirectObject(indirectReference.generation,
+                                                indirectReference.idnum)
+        if retval != None:
+            return retval
+        if indirectReference.generation == 0 and \
+                        indirectReference.idnum in self.xref_objStm:
+            retval = self._getObjectFromStream(indirectReference)
+        elif indirectReference.generation in self.xref and \
+                indirectReference.idnum in self.xref[indirectReference.generation]:
+            start = self.xref[indirectReference.generation][indirectReference.idnum]
+            if debug: print(("  Uncompressed Object", indirectReference.idnum, indirectReference.generation, ":", start))
+            self.stream.seek(start, 0)
+            idnum, generation = self.readObjectHeader(self.stream)
+            if idnum != indirectReference.idnum and self.xrefIndex:
+                # Xref table probably had bad indexes due to not being zero-indexed
+                if self.strict:
+                    raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed." \
+                                     % (indirectReference.idnum, indirectReference.generation, idnum, generation))
+                else: pass # xref table is corrected in non-strict mode
+            elif idnum != indirectReference.idnum and self.strict:
+                # some other problem
+                raise utils.PdfReadError("Expected object ID (%d %d) does not match actual (%d %d)." \
+                                         % (indirectReference.idnum, indirectReference.generation, idnum, generation))
+            if self.strict:
+                assert generation == indirectReference.generation
+            retval = readObject(self.stream, self)
+
+            # override encryption is used for the /Encrypt dictionary
+            if not self._override_encryption and self.isEncrypted:
+                # if we don't have the encryption key:
+                if not hasattr(self, '_decryption_key'):
+                    raise utils.PdfReadError("file has not been decrypted")
+                # otherwise, decrypt here...
+                import struct
+                pack1 = struct.pack("<i", indirectReference.idnum)[:3]
+                pack2 = struct.pack("<i", indirectReference.generation)[:2]
+                key = self._decryption_key + pack1 + pack2
+                assert len(key) == (len(self._decryption_key) + 5)
+                md5_hash = md5(key).digest()
+                key = md5_hash[:min(16, len(self._decryption_key) + 5)]
+                retval = self._decryptObject(retval, key)
+        else:
+            warnings.warn("Object %d %d not defined."%(indirectReference.idnum,
+                        indirectReference.generation), utils.PdfReadWarning)
+            #if self.strict:
+            raise utils.PdfReadError("Could not find object.")
+        self.cacheIndirectObject(indirectReference.generation,
+                    indirectReference.idnum, retval)
+        return retval
+
+    def _decryptObject(self, obj, key):
+        if isinstance(obj, ByteStringObject) or isinstance(obj, TextStringObject):
+            obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
+        elif isinstance(obj, StreamObject):
+            obj._data = utils.RC4_encrypt(key, obj._data)
+        elif isinstance(obj, DictionaryObject):
+            for dictkey, value in list(obj.items()):
+                obj[dictkey] = self._decryptObject(value, key)
+        elif isinstance(obj, ArrayObject):
+            for i in range(len(obj)):
+                obj[i] = self._decryptObject(obj[i], key)
+        return obj
+
+    def readObjectHeader(self, stream):
+        # Should never be necessary to read out whitespace, since the
+        # cross-reference table should put us in the right spot to read the
+        # object header.  In reality... some files have stupid cross reference
+        # tables that are off by whitespace bytes.
+        extra = False
+        utils.skipOverComment(stream)
+        extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
+        idnum = readUntilWhitespace(stream)
+        extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
+        generation = readUntilWhitespace(stream)
+        obj = stream.read(3)
+        readNonWhitespace(stream)
+        stream.seek(-1, 1)
+        if (extra and self.strict):
+            #not a fatal error
+            warnings.warn("Superfluous whitespace found in object header %s %s" % \
+                          (idnum, generation), utils.PdfReadWarning)
+        return int(idnum), int(generation)
+
+    def cacheGetIndirectObject(self, generation, idnum):
+        debug = False
+        out = self.resolvedObjects.get((generation, idnum))
+        if debug and out: print(("cache hit: %d %d"%(idnum, generation)))
+        elif debug: print(("cache miss: %d %d"%(idnum, generation)))
+        return out
+
+    def cacheIndirectObject(self, generation, idnum, obj):
+        # return None # Sometimes we want to turn off cache for debugging.
+        if (generation, idnum) in self.resolvedObjects:
+            msg = "Overwriting cache for %s %s"%(generation, idnum)
+            if self.strict: raise utils.PdfReadError(msg)
+            else:           warnings.warn(msg)
+        self.resolvedObjects[(generation, idnum)] = obj
+        return obj
+
+    def read(self, stream):
+        debug = False
+        if debug: print(">>read", stream)
+        # start at the end:
+        stream.seek(-1, 2)
+        if not stream.tell():
+            raise utils.PdfReadError('Cannot read an empty file')
+        last1K = stream.tell() - 1024 + 1 # offset of last 1024 bytes of stream
+        line = b_('')
+        while line[:5] != b_("%%EOF"):
+            if stream.tell() < last1K:
+                raise utils.PdfReadError("EOF marker not found")
+            line = self.readNextEndLine(stream)
+            if debug: print("  line:",line)
+
+        # find startxref entry - the location of the xref table
+        line = self.readNextEndLine(stream)
+        try:
+            startxref = int(line)
+        except ValueError:
+            # 'startxref' may be on the same line as the location
+            if not line.startswith(b_("startxref")):
+                raise utils.PdfReadError("startxref not found")
+            startxref = int(line[9:].strip())
+            warnings.warn("startxref on same line as offset")
+        else:
+            line = self.readNextEndLine(stream)
+            if line[:9] != b_("startxref"):
+                raise utils.PdfReadError("startxref not found")
+
+        # read all cross reference tables and their trailers
+        self.xref = {}
+        self.xref_objStm = {}
+        self.trailer = DictionaryObject()
+        while True:
+            # load the xref table
+            stream.seek(startxref, 0)
+            x = stream.read(1)
+            if x == b_("x"):
+                # standard cross-reference table
+                ref = stream.read(4)
+                if ref[:3] != b_("ref"):
+                    raise utils.PdfReadError("xref table read error")
+                readNonWhitespace(stream)
+                stream.seek(-1, 1)
+                firsttime = True; # check if the first time looking at the xref table
+                while True:
+                    num = readObject(stream, self)
+                    if firsttime and num != 0:
+                         self.xrefIndex = num
+                         if self.strict:
+                            warnings.warn("Xref table not zero-indexed. ID numbers for objects will be corrected.", utils.PdfReadWarning)
+                            #if table not zero indexed, could be due to error from when PDF was created
+                            #which will lead to mismatched indices later on, only warned and corrected if self.strict=True
+                    firsttime = False
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    size = readObject(stream, self)
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    cnt = 0
+                    while cnt < size:
+                        line = stream.read(20)
+
+                        # It's very clear in section 3.4.3 of the PDF spec
+                        # that all cross-reference table lines are a fixed
+                        # 20 bytes (as of PDF 1.7). However, some files have
+                        # 21-byte entries (or more) due to the use of \r\n
+                        # (CRLF) EOL's. Detect that case, and adjust the line
+                        # until it does not begin with a \r (CR) or \n (LF).
+                        while line[0] in b_("\x0D\x0A"):
+                            stream.seek(-20 + 1, 1)
+                            line = stream.read(20)
+
+                        # On the other hand, some malformed PDF files
+                        # use a single character EOL without a preceeding
+                        # space.  Detect that case, and seek the stream
+                        # back one character.  (0-9 means we've bled into
+                        # the next xref entry, t means we've bled into the
+                        # text "trailer"):
+                        if line[-1] in b_("0123456789t"):
+                            stream.seek(-1, 1)
+
+                        offset, generation = line[:16].split(b_(" "))
+                        offset, generation = int(offset), int(generation)
+                        if generation not in self.xref:
+                            self.xref[generation] = {}
+                        if num in self.xref[generation]:
+                            # It really seems like we should allow the last
+                            # xref table in the file to override previous
+                            # ones. Since we read the file backwards, assume
+                            # any existing key is already set correctly.
+                            pass
+                        else:
+                            self.xref[generation][num] = offset
+                        cnt += 1
+                        num += 1
+                    readNonWhitespace(stream)
+                    stream.seek(-1, 1)
+                    trailertag = stream.read(7)
+                    if trailertag != b_("trailer"):
+                        # more xrefs!
+                        stream.seek(-7, 1)
+                    else:
+                        break
+                readNonWhitespace(stream)
+                stream.seek(-1, 1)
+                newTrailer = readObject(stream, self)
+                for key, value in list(newTrailer.items()):
+                    if key not in self.trailer:
+                        self.trailer[key] = value
+                if "/Prev" in newTrailer:
+                    startxref = newTrailer["/Prev"]
+                else:
+                    break
+            elif x.isdigit():
+                # PDF 1.5+ Cross-Reference Stream
+                stream.seek(-1, 1)
+                idnum, generation = self.readObjectHeader(stream)
+                xrefstream = readObject(stream, self)
+                assert xrefstream["/Type"] == "/XRef"
+                self.cacheIndirectObject(generation, idnum, xrefstream)
+                streamData = BytesIO(b_(xrefstream.getData()))
+                # Index pairs specify the subsections in the dictionary. If
+                # none create one subsection that spans everything.
+                idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
+                if debug: print(("read idx_pairs=%s"%list(self._pairs(idx_pairs))))
+                entrySizes = xrefstream.get("/W")
+                assert len(entrySizes) >= 3
+                if self.strict and len(entrySizes) > 3:
+                    raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes)
+
+                def getEntry(i):
+                    # Reads the correct number of bytes for each entry. See the
+                    # discussion of the W parameter in PDF spec table 17.
+                    if entrySizes[i] > 0:
+                        d = streamData.read(entrySizes[i])
+                        return convertToInt(d, entrySizes[i])
+
+                    # PDF Spec Table 17: A value of zero for an element in the
+                    # W array indicates...the default value shall be used
+                    if i == 0:  return 1 # First value defaults to 1
+                    else:       return 0
+
+                def used_before(num, generation):
+                    # We move backwards through the xrefs, don't replace any.
+                    return num in self.xref.get(generation, []) or \
+                            num in self.xref_objStm
+
+                # Iterate through each subsection
+                last_end = 0
+                for start, size in self._pairs(idx_pairs):
+                    # The subsections must increase
+                    assert start >= last_end
+                    last_end = start + size
+                    for num in range(start, start+size):
+                        # The first entry is the type
+                        xref_type = getEntry(0)
+                        # The rest of the elements depend on the xref_type
+                        if xref_type == 0:
+                            # linked list of free objects
+                            next_free_object = getEntry(1)
+                            next_generation = getEntry(2)
+                        elif xref_type == 1:
+                            # objects that are in use but are not compressed
+                            byte_offset = getEntry(1)
+                            generation = getEntry(2)
+                            if generation not in self.xref:
+                                self.xref[generation] = {}
+                            if not used_before(num, generation):
+                                self.xref[generation][num] = byte_offset
+                                if debug: print(("XREF Uncompressed: %s %s"%(
+                                                num, generation)))
+                        elif xref_type == 2:
+                            # compressed objects
+                            objstr_num = getEntry(1)
+                            obstr_idx = getEntry(2)
+                            generation = 0 # PDF spec table 18, generation is 0
+                            if not used_before(num, generation):
+                                if debug: print(("XREF Compressed: %s %s %s"%(
+                                        num, objstr_num, obstr_idx)))
+                                self.xref_objStm[num] = (objstr_num, obstr_idx)
+                        elif self.strict:
+                            raise utils.PdfReadError("Unknown xref type: %s"%
+                                                        xref_type)
+
+                trailerKeys = "/Root", "/Encrypt", "/Info", "/ID"
+                for key in trailerKeys:
+                    if key in xrefstream and key not in self.trailer:
+                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
+                if "/Prev" in xrefstream:
+                    startxref = xrefstream["/Prev"]
+                else:
+                    break
+            else:
+                # bad xref character at startxref.  Let's see if we can find
+                # the xref table nearby, as we've observed this error with an
+                # off-by-one before.
+                stream.seek(-11, 1)
+                tmp = stream.read(20)
+                xref_loc = tmp.find(b_("xref"))
+                if xref_loc != -1:
+                    startxref -= (10 - xref_loc)
+                    continue
+                # No explicit xref table, try finding a cross-reference stream.
+                stream.seek(startxref, 0)
+                found = False
+                for look in range(5):
+                    if stream.read(1).isdigit():
+                        # This is not a standard PDF, consider adding a warning
+                        startxref += look
+                        found = True
+                        break
+                if found:
+                    continue
+                # no xref table found at specified location
+                raise utils.PdfReadError("Could not find xref table at specified location")
+        #if not zero-indexed, verify that the table is correct; change it if necessary
+        if self.xrefIndex and not self.strict:
+            loc = stream.tell()
+            for gen in self.xref:
+                if gen == 65535: continue
+                for id in self.xref[gen]:
+                    stream.seek(self.xref[gen][id], 0)
+                    try:
+                        pid, pgen = self.readObjectHeader(stream)
+                    except ValueError:
+                        break
+                    if pid == id - self.xrefIndex:
+                        self._zeroXref(gen)
+                        break
+                    #if not, then either it's just plain wrong, or the non-zero-index is actually correct
+            stream.seek(loc, 0) #return to where it was
+
+    def _zeroXref(self, generation):
+        self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) )
+
+    def _pairs(self, array):
+        i = 0
+        while True:
+            yield array[i], array[i+1]
+            i += 2
+            if (i+1) >= len(array):
+                break
+
+    def readNextEndLine(self, stream):
+        debug = False
+        if debug: print(">>readNextEndLine")
+        line = b_("")
+        while True:
+            # Prevent infinite loops in malformed PDFs
+            if stream.tell() == 0:
+                raise utils.PdfReadError("Could not read malformed PDF file")
+            x = stream.read(1)
+            if debug: print(("  x:", x, "%x"%ord(x)))
+            if stream.tell() < 2:
+                raise utils.PdfReadError("EOL marker not found")
+            stream.seek(-2, 1)
+            if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR
+                crlf = False
+                while x == b_('\n') or x == b_('\r'):
+                    if debug:
+                        if ord(x) == 0x0D: print("  x is CR 0D")
+                        elif ord(x) == 0x0A: print("  x is LF 0A")
+                    x = stream.read(1)
+                    if x == b_('\n') or x == b_('\r'): # account for CR+LF
+                        stream.seek(-1, 1)
+                        crlf = True
+                    if stream.tell() < 2:
+                        raise utils.PdfReadError("EOL marker not found")
+                    stream.seek(-2, 1)
+                stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1
+                break
+            else:
+                if debug: print("  x is neither")
+                line = x + line
+                if debug: print(("  RNEL line:", line))
+        if debug: print("leaving RNEL")
+        return line
+
+    def decrypt(self, password):
+        """
+        When using an encrypted / secured PDF file with the PDF Standard
+        encryption handler, this function will allow the file to be decrypted.
+        It checks the given password against the document's user password and
+        owner password, and then stores the resulting decryption key if either
+        password is correct.
+
+        It does not matter which password was matched.  Both passwords provide
+        the correct decryption key that will allow the document to be used with
+        this library.
+
+        :param str password: The password to match.
+        :return: ``0`` if the password failed, ``1`` if the password matched the user
+            password, and ``2`` if the password matched the owner password.
+        :rtype: int
+        :raises NotImplementedError: if document uses an unsupported encryption
+            method.
+        """
+
+        self._override_encryption = True
+        try:
+            return self._decrypt(password)
+        finally:
+            self._override_encryption = False
+
+    def _decrypt(self, password):
+        encrypt = self.trailer['/Encrypt'].getObject()
+        if encrypt['/Filter'] != '/Standard':
+            raise NotImplementedError("only Standard PDF encryption handler is available")
+        if not (encrypt['/V'] in (1, 2)):
+            raise NotImplementedError("only algorithm code 1 and 2 are supported. This PDF uses code %s" % encrypt['/V'])
+        user_password, key = self._authenticateUserPassword(password)
+        if user_password:
+            self._decryption_key = key
+            return 1
+        else:
+            rev = encrypt['/R'].getObject()
+            if rev == 2:
+                keylen = 5
+            else:
+                keylen = encrypt['/Length'].getObject() // 8
+            key = _alg33_1(password, rev, keylen)
+            real_O = encrypt["/O"].getObject()
+            if rev == 2:
+                userpass = utils.RC4_encrypt(key, real_O)
+            else:
+                val = real_O
+                for i in range(19, -1, -1):
+                    new_key = b_('')
+                    for l in range(len(key)):
+                        new_key += b_(chr(utils.ord_(key[l]) ^ i))
+                    val = utils.RC4_encrypt(new_key, val)
+                userpass = val
+            owner_password, key = self._authenticateUserPassword(userpass)
+            if owner_password:
+                self._decryption_key = key
+                return 2
+        return 0
+
+    def _authenticateUserPassword(self, password):
+        encrypt = self.trailer['/Encrypt'].getObject()
+        rev = encrypt['/R'].getObject()
+        owner_entry = encrypt['/O'].getObject()
+        p_entry = encrypt['/P'].getObject()
+        id_entry = self.trailer['/ID'].getObject()
+        id1_entry = id_entry[0].getObject()
+        real_U = encrypt['/U'].getObject().original_bytes
+        if rev == 2:
+            U, key = _alg34(password, owner_entry, p_entry, id1_entry)
+        elif rev >= 3:
+            U, key = _alg35(password, rev,
+                    encrypt["/Length"].getObject() // 8, owner_entry,
+                    p_entry, id1_entry,
+                    encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject())
+            U, real_U = U[:16], real_U[:16]
+        return U == real_U, key
+
+    def getIsEncrypted(self):
+        return "/Encrypt" in self.trailer
+
+    isEncrypted = property(lambda self: self.getIsEncrypted(), None, None)
+    """
+    Read-only boolean property showing whether this PDF file is encrypted.
+    Note that this property, if true, will remain true even after the
+    :meth:`decrypt()<PdfFileReader.decrypt>` method is called.
+    """
+
+
+def getRectangle(self, name, defaults):
+    retval = self.get(name)
+    if isinstance(retval, RectangleObject):
+        return retval
+    if retval == None:
+        for d in defaults:
+            retval = self.get(d)
+            if retval != None:
+                break
+    if isinstance(retval, IndirectObject):
+        retval = self.pdf.getObject(retval)
+    retval = RectangleObject(retval)
+    setRectangle(self, name, retval)
+    return retval
+
+
+def setRectangle(self, name, value):
+    if not isinstance(name, NameObject):
+        name = NameObject(name)
+    self[name] = value
+
+
+def deleteRectangle(self, name):
+    del self[name]
+
+
+def createRectangleAccessor(name, fallback):
+    return \
+        property(
+            lambda self: getRectangle(self, name, fallback),
+            lambda self, value: setRectangle(self, name, value),
+            lambda self: deleteRectangle(self, name)
+            )
+
+
+class PageObject(DictionaryObject):
+    """
+    This class represents a single page within a PDF file.  Typically this
+    object will be created by accessing the
+    :meth:`getPage()<PyPDF2.PdfFileReader.getPage>` method of the
+    :class:`PdfFileReader<PyPDF2.PdfFileReader>` class, but it is
+    also possible to create an empty page with the
+    :meth:`createBlankPage()<PageObject.createBlankPage>` static method.
+
+    :param pdf: PDF file the page belongs to.
+    :param indirectRef: Stores the original indirect reference to
+        this object in its source PDF
+    """
+    def __init__(self, pdf=None, indirectRef=None):
+        DictionaryObject.__init__(self)
+        self.pdf = pdf
+        self.indirectRef = indirectRef
+
+    def createBlankPage(pdf=None, width=None, height=None):
+        """
+        Returns a new blank page.
+        If ``width`` or ``height`` is ``None``, try to get the page size
+        from the last page of *pdf*.
+
+        :param pdf: PDF file the page belongs to
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default user
+            space units.
+        :return: the new blank page:
+        :rtype: :class:`PageObject<PageObject>`
+        :raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
+            no page
+        """
+        page = PageObject(pdf)
+
+        # Creates a new page (cf PDF Reference  7.7.3.3)
+        page.__setitem__(NameObject('/Type'), NameObject('/Page'))
+        page.__setitem__(NameObject('/Parent'), NullObject())
+        page.__setitem__(NameObject('/Resources'), DictionaryObject())
+        if width is None or height is None:
+            if pdf is not None and pdf.getNumPages() > 0:
+                lastpage = pdf.getPage(pdf.getNumPages() - 1)
+                width = lastpage.mediaBox.getWidth()
+                height = lastpage.mediaBox.getHeight()
+            else:
+                raise utils.PageSizeNotDefinedError()
+        page.__setitem__(NameObject('/MediaBox'),
+            RectangleObject([0, 0, width, height]))
+
+        return page
+    createBlankPage = staticmethod(createBlankPage)
+
+    def rotateClockwise(self, angle):
+        """
+        Rotates a page clockwise by increments of 90 degrees.
+
+        :param int angle: Angle to rotate the page.  Must be an increment
+            of 90 deg.
+        """
+        assert angle % 90 == 0
+        self._rotate(angle)
+        return self
+
+    def rotateCounterClockwise(self, angle):
+        """
+        Rotates a page counter-clockwise by increments of 90 degrees.
+
+        :param int angle: Angle to rotate the page.  Must be an increment
+            of 90 deg.
+        """
+        assert angle % 90 == 0
+        self._rotate(-angle)
+        return self
+
+    def _rotate(self, angle):
+        rotateObj = self.get("/Rotate", 0)
+        currentAngle = rotateObj if isinstance(rotateObj, int) else rotateObj.getObject()
+        self[NameObject("/Rotate")] = NumberObject(currentAngle + angle)
+
+    def _mergeResources(res1, res2, resource):
+        newRes = DictionaryObject()
+        newRes.update(res1.get(resource, DictionaryObject()).getObject())
+        page2Res = res2.get(resource, DictionaryObject()).getObject()
+        renameRes = {}
+        for key in list(page2Res.keys()):
+            if key in newRes and newRes.raw_get(key) != page2Res.raw_get(key):
+                newname = NameObject(key + str(uuid.uuid4()))
+                renameRes[key] = newname
+                newRes[newname] = page2Res[key]
+            elif key not in newRes:
+                newRes[key] = page2Res.raw_get(key)
+        return newRes, renameRes
+    _mergeResources = staticmethod(_mergeResources)
+
+    def _contentStreamRename(stream, rename, pdf):
+        if not rename:
+            return stream
+        stream = ContentStream(stream, pdf)
+        for operands, operator in stream.operations:
+            for i in range(len(operands)):
+                op = operands[i]
+                if isinstance(op, NameObject):
+                    operands[i] = rename.get(op,op)
+        return stream
+    _contentStreamRename = staticmethod(_contentStreamRename)
+
+    def _pushPopGS(contents, pdf):
+        # adds a graphics state "push" and "pop" to the beginning and end
+        # of a content stream.  This isolates it from changes such as
+        # transformation matricies.
+        stream = ContentStream(contents, pdf)
+        stream.operations.insert(0, [[], "q"])
+        stream.operations.append([[], "Q"])
+        return stream
+    _pushPopGS = staticmethod(_pushPopGS)
+
+    def _addTransformationMatrix(contents, pdf, ctm):
+        # adds transformation matrix at the beginning of the given
+        # contents stream.
+        a, b, c, d, e, f = ctm
+        contents = ContentStream(contents, pdf)
+        contents.operations.insert(0, [[FloatObject(a), FloatObject(b),
+            FloatObject(c), FloatObject(d), FloatObject(e),
+            FloatObject(f)], " cm"])
+        return contents
+    _addTransformationMatrix = staticmethod(_addTransformationMatrix)
+
+    def getContents(self):
+        """
+        Accesses the page contents.
+
+        :return: the ``/Contents`` object, or ``None`` if it doesn't exist.
+            ``/Contents`` is optional, as described in PDF Reference  7.7.3.3
+        """
+        if "/Contents" in self:
+            return self["/Contents"].getObject()
+        else:
+            return None
+
+    def mergePage(self, page2):
+        """
+        Merges the content streams of two pages into one.  Resource references
+        (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc
+        of this page are not altered.  The parameter page's content stream will
+        be added to the end of this page's content stream, meaning that it will
+        be drawn after, or "on top" of this page.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        """
+        self._mergePage(page2)
+
+    def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False):
+        # First we work on merging the resource dictionaries.  This allows us
+        # to find out what symbols in the content streams we might need to
+        # rename.
+
+        newResources = DictionaryObject()
+        rename = {}
+        originalResources = self["/Resources"].getObject()
+        page2Resources = page2["/Resources"].getObject()
+        newAnnots = ArrayObject()
+
+        for page in (self, page2):
+            if "/Annots" in page:
+                annots = page["/Annots"]
+                if isinstance(annots, ArrayObject):
+                    for ref in annots:
+                        newAnnots.append(ref)
+
+        for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading", "/Properties":
+            new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
+            if new:
+                newResources[NameObject(res)] = new
+                rename.update(newrename)
+
+        # Combine /ProcSet sets.
+        newResources[NameObject("/ProcSet")] = ArrayObject(
+            frozenset(originalResources.get("/ProcSet", ArrayObject()).getObject()).union(
+                frozenset(page2Resources.get("/ProcSet", ArrayObject()).getObject())
+            )
+        )
+
+        newContentArray = ArrayObject()
+
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContentArray.append(PageObject._pushPopGS(
+                  originalContent, self.pdf))
+
+        page2Content = page2.getContents()
+        if page2Content is not None:
+            if page2transformation is not None:
+                page2Content = page2transformation(page2Content)
+            page2Content = PageObject._contentStreamRename(
+                page2Content, rename, self.pdf)
+            page2Content = PageObject._pushPopGS(page2Content, self.pdf)
+            newContentArray.append(page2Content)
+
+        # if expanding the page to fit a new page, calculate the new media box size
+        if expand:
+            corners1 = [self.mediaBox.getLowerLeft_x().as_numeric(), self.mediaBox.getLowerLeft_y().as_numeric(),
+                        self.mediaBox.getUpperRight_x().as_numeric(), self.mediaBox.getUpperRight_y().as_numeric()]
+            corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(),
+                        page2.mediaBox.getUpperLeft_x().as_numeric(), page2.mediaBox.getUpperLeft_y().as_numeric(),
+                        page2.mediaBox.getUpperRight_x().as_numeric(), page2.mediaBox.getUpperRight_y().as_numeric(),
+                        page2.mediaBox.getLowerRight_x().as_numeric(), page2.mediaBox.getLowerRight_y().as_numeric()]
+            if ctm is not None:
+                ctm = [float(x) for x in ctm]
+                new_x = [ctm[0]*corners2[i] + ctm[2]*corners2[i+1] + ctm[4] for i in range(0, 8, 2)]
+                new_y = [ctm[1]*corners2[i] + ctm[3]*corners2[i+1] + ctm[5] for i in range(0, 8, 2)]
+            else:
+                new_x = corners2[0:8:2]
+                new_y = corners2[1:8:2]
+            lowerleft = [min(new_x), min(new_y)]
+            upperright = [max(new_x), max(new_y)]
+            lowerleft = [min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1])]
+            upperright = [max(corners1[2], upperright[0]), max(corners1[3], upperright[1])]
+
+            self.mediaBox.setLowerLeft(lowerleft)
+            self.mediaBox.setUpperRight(upperright)
+
+        self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf)
+        self[NameObject('/Resources')] = newResources
+        self[NameObject('/Annots')] = newAnnots
+
+    def mergeTransformedPage(self, page2, ctm, expand=False):
+        """
+        This is similar to mergePage, but a transformation matrix is
+        applied to the merged stream.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param tuple ctm: a 6-element tuple containing the operands of the
+            transformation matrix
+        :param bool expand: Whether the page should be expanded to fit the dimensions
+            of the page to be merged.
+        """
+        self._mergePage(page2, lambda page2Content:
+            PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm), ctm, expand)
+
+    def mergeScaledPage(self, page2, scale, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is scaled
+        by appling a transformation matrix.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        # CTM to scale : [ sx 0 0 sy 0 0 ]
+        return self.mergeTransformedPage(page2, [scale, 0,
+                                                 0,      scale,
+                                                 0,      0], expand)
+
+    def mergeRotatedPage(self, page2, rotation, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        rotation = math.radians(rotation)
+        return self.mergeTransformedPage(page2,
+            [math.cos(rotation),  math.sin(rotation),
+             -math.sin(rotation), math.cos(rotation),
+             0,                   0], expand)
+
+    def mergeTranslatedPage(self, page2, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated
+        by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        return self.mergeTransformedPage(page2, [1,  0,
+                                                 0,  1,
+                                                 tx, ty], expand)
+
+    def mergeRotatedTranslatedPage(self, page2, rotation, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        and translated by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [-tx, -ty, 1]]
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        rtranslation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        ctm = utils.matrixMultiply(translation, rotating)
+        ctm = utils.matrixMultiply(ctm, rtranslation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    def mergeRotatedScaledPage(self, page2, rotation, scale, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is rotated
+        and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+
+        return self.mergeTransformedPage(page2,
+                                         [ctm[0][0], ctm[0][1],
+                                          ctm[1][0], ctm[1][1],
+                                          ctm[2][0], ctm[2][1]], expand)
+
+    def mergeScaledTranslatedPage(self, page2, scale, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated
+        and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(scaling, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expand=False):
+        """
+        This is similar to mergePage, but the stream to be merged is translated,
+        rotated and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+        """
+        translation = [[1, 0, 0],
+                       [0, 1, 0],
+                       [tx, ty, 1]]
+        rotation = math.radians(rotation)
+        rotating = [[math.cos(rotation), math.sin(rotation), 0],
+                    [-math.sin(rotation), math.cos(rotation), 0],
+                    [0,                  0,                  1]]
+        scaling = [[scale, 0,    0],
+                   [0,    scale, 0],
+                   [0,    0,    1]]
+        ctm = utils.matrixMultiply(rotating, scaling)
+        ctm = utils.matrixMultiply(ctm, translation)
+
+        return self.mergeTransformedPage(page2, [ctm[0][0], ctm[0][1],
+                                                 ctm[1][0], ctm[1][1],
+                                                 ctm[2][0], ctm[2][1]], expand)
+
+    ##
+    # Applys a transformation matrix the page.
+    #
+    # @param ctm   A 6 elements tuple containing the operands of the
+    #              transformation matrix
+    def addTransformation(self, ctm):
+        """
+        Applies a transformation matrix to the page.
+
+        :param tuple ctm: A 6-element tuple containing the operands of the
+            transformation matrix.
+        """
+        originalContent = self.getContents()
+        if originalContent is not None:
+            newContent = PageObject._addTransformationMatrix(
+                originalContent, self.pdf, ctm)
+            newContent = PageObject._pushPopGS(newContent, self.pdf)
+            self[NameObject('/Contents')] = newContent
+
+    def scale(self, sx, sy):
+        """
+        Scales a page by the given factors by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float sx: The scaling factor on horizontal axis.
+        :param float sy: The scaling factor on vertical axis.
+        """
+        self.addTransformation([sx, 0,
+                                0,  sy,
+                                0,  0])
+        self.mediaBox = RectangleObject([
+            float(self.mediaBox.getLowerLeft_x()) * sx,
+            float(self.mediaBox.getLowerLeft_y()) * sy,
+            float(self.mediaBox.getUpperRight_x()) * sx,
+            float(self.mediaBox.getUpperRight_y()) * sy])
+        if "/VP" in self:
+            viewport = self["/VP"]
+            if isinstance(viewport, ArrayObject):
+                bbox = viewport[0]["/BBox"]
+            else:
+                bbox = viewport["/BBox"]
+            scaled_bbox = RectangleObject([
+                float(bbox[0]) * sx,
+                float(bbox[1]) * sy,
+                float(bbox[2]) * sx,
+                float(bbox[3]) * sy])
+            if isinstance(viewport, ArrayObject):
+                self[NameObject("/VP")][NumberObject(0)][NameObject("/BBox")] = scaled_bbox
+            else:
+                self[NameObject("/VP")][NameObject("/BBox")] = scaled_bbox
+
+    def scaleBy(self, factor):
+        """
+        Scales a page by the given factor by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float factor: The scaling factor (for both X and Y axis).
+        """
+        self.scale(factor, factor)
+
+    def scaleTo(self, width, height):
+        """
+        Scales a page to the specified dimentions by appling a
+        transformation matrix to its content and updating the page size.
+
+        :param float width: The new width.
+        :param float height: The new heigth.
+        """
+        sx = width / float(self.mediaBox.getUpperRight_x() -
+                      self.mediaBox.getLowerLeft_x ())
+        sy = height / float(self.mediaBox.getUpperRight_y() -
+                       self.mediaBox.getLowerLeft_y ())
+        self.scale(sx, sy)
+
+    def compressContentStreams(self):
+        """
+        Compresses the size of this page by joining all content streams and
+        applying a FlateDecode filter.
+
+        However, it is possible that this function will perform no action if
+        content stream compression becomes "automatic" for some reason.
+        """
+        content = self.getContents()
+        if content is not None:
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, self.pdf)
+            self[NameObject("/Contents")] = content.flateEncode()
+
+    def extractText(self):
+        """
+        Locate all text drawing commands, in the order they are provided in the
+        content stream, and extract the text.  This works well for some PDF
+        files, but poorly for others, depending on the generator used.  This will
+        be refined in the future.  Do not rely on the order of text coming out of
+        this function, as it will change if this function is made more
+        sophisticated.
+
+        :return: a unicode string object.
+        """
+        text = u_("")
+        content = self["/Contents"].getObject()
+        if not isinstance(content, ContentStream):
+            content = ContentStream(content, self.pdf)
+        # Note: we check all strings are TextStringObjects.  ByteStringObjects
+        # are strings where the byte->string encoding was unknown, so adding
+        # them to the text here would be gibberish.
+        for operands, operator in content.operations:
+            if operator == b_("Tj"):
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += _text
+                    text += "\n"
+            elif operator == b_("T*"):
+                text += "\n"
+            elif operator == b_("'"):
+                text += "\n"
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += operands[0]
+            elif operator == b_('"'):
+                _text = operands[2]
+                if isinstance(_text, TextStringObject):
+                    text += "\n"
+                    text += _text
+            elif operator == b_("TJ"):
+                for i in operands[0]:
+                    if isinstance(i, TextStringObject):
+                        text += i
+                text += "\n"
+        return text
+
+    mediaBox = createRectangleAccessor("/MediaBox", ())
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the boundaries of the physical medium on which the page is
+    intended to be displayed or printed.
+    """
+
+    cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the visible region of default user space.  When the page is
+    displayed or printed, its contents are to be clipped (cropped) to this
+    rectangle and then imposed on the output medium in some
+    implementation-defined manner.  Default value: same as :attr:`mediaBox<mediaBox>`.
+    """
+
+    bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the region to which the contents of the page should be clipped
+    when output in a production enviroment.
+    """
+
+    trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the intended dimensions of the finished page after trimming.
+    """
+
+    artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox"))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the extent of the page's meaningful content as intended by the
+    page's creator.
+    """
+
+
+class ContentStream(DecodedStreamObject):
+    def __init__(self, stream, pdf):
+        self.pdf = pdf
+        self.operations = []
+        # stream may be a StreamObject or an ArrayObject containing
+        # multiple StreamObjects to be cat'd together.
+        stream = stream.getObject()
+        if isinstance(stream, ArrayObject):
+            data = b_("")
+            for s in stream:
+                data += b_(s.getObject().getData())
+            stream = BytesIO(b_(data))
+        else:
+            stream = BytesIO(b_(stream.getData()))
+        self.__parseContentStream(stream)
+
+    def __parseContentStream(self, stream):
+        # file("f:\\tmp.txt", "w").write(stream.read())
+        stream.seek(0, 0)
+        operands = []
+        while True:
+            peek = readNonWhitespace(stream)
+            if peek == b_('') or ord_(peek) == 0:
+                break
+            stream.seek(-1, 1)
+            if peek.isalpha() or peek == b_("'") or peek == b_('"'):
+                operator = utils.readUntilRegex(stream,
+                        NameObject.delimiterPattern, True)
+                if operator == b_("BI"):
+                    # begin inline image - a completely different parsing
+                    # mechanism is required, of course... thanks buddy...
+                    assert operands == []
+                    ii = self._readInlineImage(stream)
+                    self.operations.append((ii, b_("INLINE IMAGE")))
+                else:
+                    self.operations.append((operands, operator))
+                    operands = []
+            elif peek == b_('%'):
+                # If we encounter a comment in the content stream, we have to
+                # handle it here.  Typically, readObject will handle
+                # encountering a comment -- but readObject assumes that
+                # following the comment must be the object we're trying to
+                # read.  In this case, it could be an operator instead.
+                while peek not in (b_('\r'), b_('\n')):
+                    peek = stream.read(1)
+            else:
+                operands.append(readObject(stream, None))
+
+    def _readInlineImage(self, stream):
+        # begin reading just after the "BI" - begin image
+        # first read the dictionary of settings.
+        settings = DictionaryObject()
+        while True:
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            if tok == b_("I"):
+                # "ID" - begin of image data
+                break
+            key = readObject(stream, self.pdf)
+            tok = readNonWhitespace(stream)
+            stream.seek(-1, 1)
+            value = readObject(stream, self.pdf)
+            settings[key] = value
+        # left at beginning of ID
+        tmp = stream.read(3)
+        assert tmp[:2] == b_("ID")
+        data = b_("")
+        while True:
+            # Read the inline image, while checking for EI (End Image) operator.
+            tok = stream.read(1)
+            if tok == b_("E"):
+                # Check for End Image
+                tok2 = stream.read(1)
+                if tok2 == b_("I"):
+                    # Data can contain EI, so check for the Q operator.
+                    tok3 = stream.read(1)
+                    info = tok + tok2
+                    # We need to find whitespace between EI and Q.
+                    has_q_whitespace = False
+                    while tok3 in utils.WHITESPACES:
+                        has_q_whitespace = True
+                        info += tok3
+                        tok3 = stream.read(1)
+                    if tok3 == b_("Q") and has_q_whitespace:
+                        stream.seek(-1, 1)
+                        break
+                    else:
+                        stream.seek(-1,1)
+                        data += info
+                else:
+                    stream.seek(-1, 1)
+                    data += tok
+            else:
+                data += tok
+        return {"settings": settings, "data": data}
+
+    def _getData(self):
+        newdata = BytesIO()
+        for operands, operator in self.operations:
+            if operator == b_("INLINE IMAGE"):
+                newdata.write(b_("BI"))
+                dicttext = BytesIO()
+                operands["settings"].writeToStream(dicttext, None)
+                newdata.write(dicttext.getvalue()[2:-2])
+                newdata.write(b_("ID "))
+                newdata.write(operands["data"])
+                newdata.write(b_("EI"))
+            else:
+                for op in operands:
+                    op.writeToStream(newdata, None)
+                    newdata.write(b_(" "))
+                newdata.write(b_(operator))
+            newdata.write(b_("\n"))
+        return newdata.getvalue()
+
+    def _setData(self, value):
+        self.__parseContentStream(BytesIO(b_(value)))
+
+    _data = property(_getData, _setData)
+
+
+class DocumentInformation(DictionaryObject):
+    """
+    A class representing the basic document metadata provided in a PDF File.
+    This class is accessible through
+    :meth:`getDocumentInfo()<PyPDF2.PdfFileReader.getDocumentInfo()>`
+
+    All text properties of the document metadata have
+    *two* properties, eg. author and author_raw. The non-raw property will
+    always return a ``TextStringObject``, making it ideal for a case where
+    the metadata is being displayed. The raw property can sometimes return
+    a ``ByteStringObject``, if PyPDF2 was unable to decode the string's
+    text encoding; this requires additional safety in the caller and
+    therefore is not as commonly accessed.
+    """
+
+    def __init__(self):
+        DictionaryObject.__init__(self)
+
+    def getText(self, key):
+        retval = self.get(key, None)
+        if isinstance(retval, TextStringObject):
+            return retval
+        return None
+
+    title = property(lambda self: self.getText("/Title"))
+    """Read-only property accessing the document's **title**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the title is not specified."""
+    title_raw = property(lambda self: self.get("/Title"))
+    """The "raw" version of title; can return a ``ByteStringObject``."""
+
+    author = property(lambda self: self.getText("/Author"))
+    """Read-only property accessing the document's **author**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the author is not specified."""
+    author_raw = property(lambda self: self.get("/Author"))
+    """The "raw" version of author; can return a ``ByteStringObject``."""
+
+    subject = property(lambda self: self.getText("/Subject"))
+    """Read-only property accessing the document's **subject**.
+    Returns a unicode string (``TextStringObject``) or ``None``
+    if the subject is not specified."""
+    subject_raw = property(lambda self: self.get("/Subject"))
+    """The "raw" version of subject; can return a ``ByteStringObject``."""
+
+    creator = property(lambda self: self.getText("/Creator"))
+    """Read-only property accessing the document's **creator**. If the
+    document was converted to PDF from another format, this is the name of the
+    application (e.g. OpenOffice) that created the original document from
+    which it was converted. Returns a unicode string (``TextStringObject``)
+    or ``None`` if the creator is not specified."""
+    creator_raw = property(lambda self: self.get("/Creator"))
+    """The "raw" version of creator; can return a ``ByteStringObject``."""
+
+    producer = property(lambda self: self.getText("/Producer"))
+    """Read-only property accessing the document's **producer**.
+    If the document was converted to PDF from another format, this is
+    the name of the application (for example, OSX Quartz) that converted
+    it to PDF. Returns a unicode string (``TextStringObject``)
+    or ``None`` if the producer is not specified."""
+    producer_raw = property(lambda self: self.get("/Producer"))
+    """The "raw" version of producer; can return a ``ByteStringObject``."""
+
+
+def convertToInt(d, size):
+    if size > 8:
+        raise utils.PdfReadError("invalid size in convertToInt")
+    d = b_("\x00\x00\x00\x00\x00\x00\x00\x00") + b_(d)
+    d = d[-8:]
+    return struct.unpack(">q", d)[0]
+
+# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
+_encryption_padding = b_('\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
+        b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
+        b_('\xa9\xfe\x64\x53\x69\x7a')
+
+
+# Implementation of algorithm 3.2 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
+    # 1. Pad or truncate the password string to exactly 32 bytes.  If the
+    # password string is more than 32 bytes long, use only its first 32 bytes;
+    # if it is less than 32 bytes long, pad it by appending the required number
+    # of additional bytes from the beginning of the padding string
+    # (_encryption_padding).
+    password = b_((str_(password) + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    import struct
+    m = md5(password)
+    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
+    # function.
+    m.update(owner_entry.original_bytes)
+    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
+    # these bytes to the MD5 hash function, low-order byte first.
+    p_entry = struct.pack('<i', p_entry)
+    m.update(p_entry)
+    # 5. Pass the first element of the file's file identifier array to the MD5
+    # hash function.
+    m.update(id1_entry.original_bytes)
+    # 6. (Revision 3 or greater) If document metadata is not being encrypted,
+    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
+    if rev >= 3 and not metadata_encrypt:
+        m.update(b_("\xff\xff\xff\xff"))
+    # 7. Finish the hash.
+    md5_hash = m.digest()
+    # 8. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass the first n bytes of the output as
+    # input into a new MD5 hash, where n is the number of bytes of the
+    # encryption key as defined by the value of the encryption dictionary's
+    # /Length entry.
+    if rev >= 3:
+        for i in range(50):
+            md5_hash = md5(md5_hash[:keylen]).digest()
+    # 9. Set the encryption key to the first n bytes of the output from the
+    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
+    # greater, depends on the value of the encryption dictionary's /Length
+    # entry.
+    return md5_hash[:keylen]
+
+
+# Implementation of algorithm 3.3 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg33(owner_pwd, user_pwd, rev, keylen):
+    # steps 1 - 4
+    key = _alg33_1(owner_pwd, rev, keylen)
+    # 5. Pad or truncate the user password string as described in step 1 of
+    # algorithm 3.2.
+    user_pwd = b_((user_pwd + str_(_encryption_padding))[:32])
+    # 6. Encrypt the result of step 5, using an RC4 encryption function with
+    # the encryption key obtained in step 4.
+    val = utils.RC4_encrypt(key, user_pwd)
+    # 7. (Revision 3 or greater) Do the following 19 times: Take the output
+    # from the previous invocation of the RC4 function and pass it as input to
+    # a new invocation of the function; use an encryption key generated by
+    # taking each byte of the encryption key obtained in step 4 and performing
+    # an XOR operation between that byte and the single-byte value of the
+    # iteration counter (from 1 to 19).
+    if rev >= 3:
+        for i in range(1, 20):
+            new_key = ''
+            for l in range(len(key)):
+                new_key += chr(ord_(key[l]) ^ i)
+            val = utils.RC4_encrypt(new_key, val)
+    # 8. Store the output from the final invocation of the RC4 as the value of
+    # the /O entry in the encryption dictionary.
+    return val
+
+
+# Steps 1-4 of algorithm 3.3
+def _alg33_1(password, rev, keylen):
+    # 1. Pad or truncate the owner password string as described in step 1 of
+    # algorithm 3.2.  If there is no owner password, use the user password
+    # instead.
+    password = b_((password + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    m = md5(password)
+    # 3. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass it as input into a new MD5 hash.
+    md5_hash = m.digest()
+    if rev >= 3:
+        for i in range(50):
+            md5_hash = md5(md5_hash).digest()
+    # 4. Create an RC4 encryption key using the first n bytes of the output
+    # from the final MD5 hash, where n is always 5 for revision 2 but, for
+    # revision 3 or greater, depends on the value of the encryption
+    # dictionary's /Length entry.
+    key = md5_hash[:keylen]
+    return key
+
+
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg34(password, owner_entry, p_entry, id1_entry):
+    # 1. Create an encryption key based on the user password string, as
+    # described in algorithm 3.2.
+    key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
+    # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
+    # using an RC4 encryption function with the encryption key from the
+    # preceding step.
+    U = utils.RC4_encrypt(key, _encryption_padding)
+    # 3. Store the result of step 2 as the value of the /U entry in the
+    # encryption dictionary.
+    return U, key
+
+
+# Implementation of algorithm 3.4 of the PDF standard security handler,
+# section 3.5.2 of the PDF 1.6 reference.
+def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
+    # 1. Create an encryption key based on the user password string, as
+    # described in Algorithm 3.2.
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
+    # shown in step 1 of Algorithm 3.2 as input to this function.
+    m = md5()
+    m.update(_encryption_padding)
+    # 3. Pass the first element of the file's file identifier array (the value
+    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
+    # page 73) to the hash function and finish the hash.  (See implementation
+    # note 25 in Appendix H.)
+    m.update(id1_entry.original_bytes)
+    md5_hash = m.digest()
+    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
+    # function with the encryption key from step 1.
+    val = utils.RC4_encrypt(key, md5_hash)
+    # 5. Do the following 19 times: Take the output from the previous
+    # invocation of the RC4 function and pass it as input to a new invocation
+    # of the function; use an encryption key generated by taking each byte of
+    # the original encryption key (obtained in step 2) and performing an XOR
+    # operation between that byte and the single-byte value of the iteration
+    # counter (from 1 to 19).
+    for i in range(1, 20):
+        new_key = b_('')
+        for l in range(len(key)):
+            new_key += b_(chr(ord_(key[l]) ^ i))
+        val = utils.RC4_encrypt(new_key, val)
+    # 6. Append 16 bytes of arbitrary padding to the output from the final
+    # invocation of the RC4 function and store the 32-byte result as the value
+    # of the U entry in the encryption dictionary.
+    # (implementator note: I don't know what "arbitrary padding" is supposed to
+    # mean, so I have used null bytes.  This seems to match a few other
+    # people's implementations)
+    return val + (b_('\x00') * 16), key
diff --git a/PdfFileTransformer/PyPDF2/utils.py b/PdfFileTransformer/PyPDF2/utils.py
new file mode 100644
index 0000000..2120c70
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/utils.py
@@ -0,0 +1,309 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+import sys
+
+try:
+    import __builtin__ as builtins
+except ImportError:  # Py3
+    import builtins
+
+
+xrange_fn = getattr(builtins, "xrange", range)
+_basestring = getattr(builtins, "basestring", str)
+
+bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
+string_type = getattr(builtins, "unicode", str)
+int_types = (int, long) if sys.version_info[0] < 3 else (int,)
+
+
+# Make basic type tests more consistent
+def isString(s):
+    """Test if arg is a string. Compatible with Python 2 and 3."""
+    return isinstance(s, _basestring)
+
+
+def isInt(n):
+    """Test if arg is an int. Compatible with Python 2 and 3."""
+    return isinstance(n, int_types)
+
+
+def isBytes(b):
+    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
+    return isinstance(b, bytes_type)
+
+
+#custom implementation of warnings.formatwarning
+def formatWarning(message, category, filename, lineno, line=None):
+    file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
+    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
+
+
+def readUntilWhitespace(stream, maxchars=None):
+    """
+    Reads non-whitespace characters and returns them.
+    Stops upon encountering whitespace or when maxchars is reached.
+    """
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def readNonWhitespace(stream):
+    """
+    Finds and reads the next non-whitespace character (ignores whitespace).
+    """
+    tok = WHITESPACES[0]
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skipOverWhitespace(stream):
+    """
+    Similar to readNonWhitespace, but returns a Boolean if more than
+    one whitespace character was read.
+    """
+    tok = WHITESPACES[0]
+    cnt = 0;
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+        cnt+=1
+    return (cnt > 1)
+
+
+def skipOverComment(stream):
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b_('%'):
+        while tok not in (b_('\n'), b_('\r')):
+            tok = stream.read(1)
+
+
+def readUntilRegex(stream, regex, ignore_eof=False):
+    """
+    Reads until the regular expression pattern matched (ignore the match)
+    Raise PdfStreamError on premature end-of-file.
+    :param bool ignore_eof: If true, ignore end-of-line and return immediately
+    """
+    name = b_('')
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            # stream has truncated prematurely
+            if ignore_eof == True:
+                return name
+            else:
+                raise PdfStreamError("Stream has ended unexpectedly")
+        m = regex.search(tok)
+        if m is not None:
+            name += tok[:m.start()]
+            stream.seek(m.start()-len(tok), 1)
+            break
+        name += tok
+    return name
+
+
+class ConvertFunctionsToVirtualList(object):
+    def __init__(self, lengthFunction, getFunction):
+        self.lengthFunction = lengthFunction
+        self.getFunction = getFunction
+
+    def __len__(self):
+        return self.lengthFunction()
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            indices = xrange_fn(*index.indices(len(self)))
+            cls = type(self)
+            return cls(indices.__len__, lambda idx: self[indices[idx]])
+        if not isInt(index):
+            raise TypeError("sequence indices must be integers")
+        len_self = len(self)
+        if index < 0:
+            # support negative indexes
+            index = len_self + index
+        if index < 0 or index >= len_self:
+            raise IndexError("sequence index out of range")
+        return self.getFunction(index)
+
+
+def RC4_encrypt(key, plaintext):
+    S = [i for i in range(256)]
+    j = 0
+    for i in range(256):
+        j = (j + S[i] + ord_(key[i % len(key)])) % 256
+        S[i], S[j] = S[j], S[i]
+    i, j = 0, 0
+    retval = []
+    for x in range(len(plaintext)):
+        i = (i + 1) % 256
+        j = (j + S[i]) % 256
+        S[i], S[j] = S[j], S[i]
+        t = S[(S[i] + S[j]) % 256]
+        retval.append(b_(chr(ord_(plaintext[x]) ^ t)))
+    return b_("").join(retval)
+
+
+def matrixMultiply(a, b):
+    return [[sum([float(i)*float(j)
+                  for i, j in zip(row, col)]
+                ) for col in zip(*b)]
+            for row in a]
+
+
+def markLocation(stream):
+    """Creates text file showing current location in context."""
+    # Mainly for debugging
+    RADIUS = 5000
+    stream.seek(-RADIUS, 1)
+    outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.write('HERE')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.close()
+    stream.seek(-RADIUS, 1)
+
+
+class PyPdfError(Exception):
+    pass
+
+
+class PdfReadError(PyPdfError):
+    pass
+
+
+class PageSizeNotDefinedError(PyPdfError):
+    pass
+
+
+class PdfReadWarning(UserWarning):
+    pass
+
+
+class PdfStreamError(PdfReadError):
+    pass
+
+
+if sys.version_info[0] < 3:
+    def b_(s):
+        return s
+else:
+    B_CACHE = {}
+
+    def b_(s):
+        bc = B_CACHE
+        if s in bc:
+            return bc[s]
+        if type(s) == bytes:
+            return s
+        else:
+            r = s.encode('latin-1')
+            if len(s) < 2:
+                bc[s] = r
+            return r
+
+
+def u_(s):
+    if sys.version_info[0] < 3:
+        return unicode(s, 'unicode_escape')
+    else:
+        return s
+
+
+def str_(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        if type(b) == bytes:
+            return b.decode('latin-1')
+        else:
+            return b
+
+
+def ord_(b):
+    if sys.version_info[0] < 3 or type(b) == str:
+        return ord(b)
+    else:
+        return b
+
+
+def chr_(c):
+    if sys.version_info[0] < 3:
+        return c
+    else:
+        return chr(c)
+
+
+def barray(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        return bytearray(b)
+
+
+def hexencode(b):
+    if sys.version_info[0] < 3:
+        return b.encode('hex')
+    else:
+        import codecs
+        coder = codecs.getencoder('hex_codec')
+        return coder(b)[0]
+
+
+def hexStr(num):
+    return hex(num).replace('L', '')
+
+
+WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
+
+
+def paethPredictor(left, up, up_left):
+    p = left + up - up_left
+    dist_left = abs(p - left)
+    dist_up = abs(p - up)
+    dist_up_left = abs(p - up_left)
+
+    if dist_left <= dist_up and dist_left <= dist_up_left:
+        return left
+    elif dist_up <= dist_up_left:
+        return up
+    else:
+        return up_left
diff --git a/PdfFileTransformer/PyPDF2/xmp.py b/PdfFileTransformer/PyPDF2/xmp.py
new file mode 100644
index 0000000..7ba62f0
--- /dev/null
+++ b/PdfFileTransformer/PyPDF2/xmp.py
@@ -0,0 +1,358 @@
+import re
+import datetime
+import decimal
+from .generic import PdfObject
+from xml.dom import getDOMImplementation
+from xml.dom.minidom import parseString
+from .utils import u_
+
+RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
+XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
+PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
+XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
+
+# What is the PDFX namespace, you might ask?  I might ask that too.  It's
+# a completely undocumented namespace used to place "custom metadata"
+# properties, which are arbitrary metadata properties with no semantic or
+# documented meaning.  Elements in the namespace are key/value-style storage,
+# where the element name is the key and the content is the value.  The keys
+# are transformed into valid XML identifiers by substituting an invalid
+# identifier character with \u2182 followed by the unicode hex ID of the
+# original character.  A key like "my car" is therefore "my\u21820020car".
+#
+# \u2182, in case you're wondering, is the unicode character
+# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
+# escaping characters.
+#
+# Intentional users of the pdfx namespace should be shot on sight.  A
+# custom data schema and sensical XML elements could be used instead, as is
+# suggested by Adobe's own documentation on XMP (under "Extensibility of
+# Schemas").
+#
+# Information presented here on the /pdfx/ schema is a result of limited
+# reverse engineering, and does not constitute a full specification.
+PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
+
+iso8601 = re.compile("""
+        (?P<year>[0-9]{4})
+        (-
+            (?P<month>[0-9]{2})
+            (-
+                (?P<day>[0-9]+)
+                (T
+                    (?P<hour>[0-9]{2}):
+                    (?P<minute>[0-9]{2})
+                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
+                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
+                )?
+            )?
+        )?
+        """, re.VERBOSE)
+
+
+class XmpInformation(PdfObject):
+    """
+    An object that represents Adobe XMP metadata.
+    Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
+    """
+
+    def __init__(self, stream):
+        self.stream = stream
+        docRoot = parseString(self.stream.getData())
+        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
+        self.cache = {}
+
+    def writeToStream(self, stream, encryption_key):
+        self.stream.writeToStream(stream, encryption_key)
+
+    def getElement(self, aboutUri, namespace, name):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                attr = desc.getAttributeNodeNS(namespace, name)
+                if attr != None:
+                    yield attr
+                for element in desc.getElementsByTagNameNS(namespace, name):
+                    yield element
+
+    def getNodesInNamespace(self, aboutUri, namespace):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                for i in range(desc.attributes.length):
+                    attr = desc.attributes.item(i)
+                    if attr.namespaceURI == namespace:
+                        yield attr
+                for child in desc.childNodes:
+                    if child.namespaceURI == namespace:
+                        yield child
+
+    def _getText(self, element):
+        text = ""
+        for child in element.childNodes:
+            if child.nodeType == child.TEXT_NODE:
+                text += child.data
+        return text
+
+    def _converter_string(value):
+        return value
+
+    def _converter_date(value):
+        m = iso8601.match(value)
+        year = int(m.group("year"))
+        month = int(m.group("month") or "1")
+        day = int(m.group("day") or "1")
+        hour = int(m.group("hour") or "0")
+        minute = int(m.group("minute") or "0")
+        second = decimal.Decimal(m.group("second") or "0")
+        seconds = second.to_integral(decimal.ROUND_FLOOR)
+        milliseconds = (second - seconds) * 1000000
+        tzd = m.group("tzd") or "Z"
+        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
+        if tzd != "Z":
+            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
+            tzd_hours *= -1
+            if tzd_hours < 0:
+                tzd_minutes *= -1
+            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
+        return dt
+    _test_converter_date = staticmethod(_converter_date)
+
+    def _getter_bag(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
+                if len(bags):
+                    for bag in bags:
+                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_seq(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
+                if len(seqs):
+                    for seq in seqs:
+                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+                else:
+                    value = converter(self._getText(element))
+                    retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_langalt(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = {}
+            for element in self.getElement("", namespace, name):
+                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
+                if len(alts):
+                    for alt in alts:
+                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval[item.getAttribute("xml:lang")] = value
+                else:
+                    retval["x-default"] = converter(self._getText(element))
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_single(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            value = None
+            for element in self.getElement("", namespace, name):
+                if element.nodeType == element.ATTRIBUTE_NODE:
+                    value = element.nodeValue
+                else:
+                    value = self._getText(element)
+                break
+            if value != None:
+                value = converter(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = value
+            return value
+        return get
+
+    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
+    """
+    Contributors to the resource (other than the authors). An unsorted
+    array of names.
+    """
+
+    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
+    """
+    Text describing the extent or scope of the resource.
+    """
+
+    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
+    """
+    A sorted array of names of the authors of the resource, listed in order
+    of precedence.
+    """
+
+    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
+    """
+    A sorted array of dates (datetime.datetime instances) of signifigance to
+    the resource.  The dates and times are in UTC.
+    """
+
+    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the content of the
+    resource.
+    """
+
+    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
+    """
+    The mime-type of the resource.
+    """
+
+    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
+    """
+    Unique identifier of the resource.
+    """
+
+    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
+    """
+    An unordered array specifying the languages used in the resource.
+    """
+
+    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
+    """
+    An unordered array of publisher names.
+    """
+
+    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
+    """
+    An unordered array of text descriptions of relationships to other
+    documents.
+    """
+
+    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the rights the
+    user has to this resource.
+    """
+
+    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
+    """
+    Unique identifier of the work from which this resource was derived.
+    """
+
+    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
+    """
+    An unordered array of descriptive phrases or keywrods that specify the
+    topic of the content of the resource.
+    """
+
+    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
+    """
+    A language-keyed dictionary of the title of the resource.
+    """
+
+    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
+    """
+    An unordered array of textual descriptions of the document type.
+    """
+
+    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
+    """
+    An unformatted text string representing document keywords.
+    """
+
+    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
+    """
+    The PDF file version, for example 1.0, 1.3.
+    """
+
+    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
+    """
+    The name of the tool that created the PDF document.
+    """
+
+    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
+    """
+    The date and time the resource was originally created.  The date and
+    time are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
+    """
+    The date and time the resource was last modified.  The date and time
+    are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
+    """
+    The date and time that any metadata for this resource was last
+    changed.  The date and time are returned as a UTC datetime.datetime
+    object.
+    """
+
+    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
+    """
+    The name of the first known tool used to create the resource.
+    """
+
+    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
+    """
+    The common identifier for all versions and renditions of this resource.
+    """
+
+    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
+    """
+    An identifier for a specific incarnation of a document, updated each
+    time a file is saved.
+    """
+
+    def custom_properties(self):
+        if not hasattr(self, "_custom_properties"):
+            self._custom_properties = {}
+            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
+                key = node.localName
+                while True:
+                    # see documentation about PDFX_NAMESPACE earlier in file
+                    idx = key.find(u_("\u2182"))
+                    if idx == -1:
+                        break
+                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
+                if node.nodeType == node.ATTRIBUTE_NODE:
+                    value = node.nodeValue
+                else:
+                    value = self._getText(node)
+                self._custom_properties[key] = value
+        return self._custom_properties
+
+    custom_properties = property(custom_properties)
+    """
+    Retrieves custom metadata properties defined in the undocumented pdfx
+    metadata schema.
+
+    :return: a dictionary of key/value items for custom metadata properties.
+    :rtype: dict
+    """
diff --git a/PdfFileTransformer/__init__.py b/PdfFileTransformer/__init__.py
new file mode 100644
index 0000000..f6d0a4d
--- /dev/null
+++ b/PdfFileTransformer/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+
+from .PyPDF2 import PdfFileReader, PdfFileWriter
+from .pdf import Pdf
\ No newline at end of file
diff --git a/PdfFileTransformer/pdf.py b/PdfFileTransformer/pdf.py
new file mode 100644
index 0000000..c93fb61
--- /dev/null
+++ b/PdfFileTransformer/pdf.py
@@ -0,0 +1,352 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import re
+import tempfile
+from .PyPDF2 import PdfFileWriter, PdfFileReader
+
+
+class Pdf:
+
+    def __init__(self, filename):
+        self.filename = filename
+        self.buffer = bytearray()
+        self.objects = []  # [(7,0,b"data"), (8,0,b"data2"), ..]
+        self.trailer = {}  # {Root: (7, 0), Info: (5, 0)}
+        self.translation_table = {}  # {(6,0):7, (5,0): 8}, ..]
+        self.original_xref_offset = 0
+        self.original_first_obj_offset = 0
+        self.file_offset = 0
+
+        self.clean_and_read_pdf()
+        self.check_pdf_header()
+        self.parse_xref_offset()
+        self.parse_xref_table()
+        self.parse_objects()
+        self.parse_trailer()
+
+    def clean_and_read_pdf(self):
+        f_input = open(self.filename, "rb")
+        pdf_header = f_input.read(8)
+        f_input.seek(0)
+        filename_output = tempfile.mktemp()
+        logging.info("Use " + filename_output + " for normalisation output")
+        f_ouput = open(filename_output, "wb")
+        writer = PdfFileWriter()
+        reader = PdfFileReader(f_input)
+        info = reader.getDocumentInfo()
+        if info.producer is not None:
+            writer.addMetadata({u'/Producer': info.producer})
+        else:
+            writer.addMetadata({u'/Producer': u'TruePolyglot'})
+        if info.creator is not None:
+            writer.addMetadata({u'/Creator': info.creator})
+        else:
+            writer.addMetadata({u'/Creator': u'TruePolyglot'})
+        writer.appendPagesFromReader(reader)
+        writer.setHeader(pdf_header)
+        writer.write(f_ouput)
+        f_input.close()
+        f_ouput.close()
+        f_norm = open(filename_output, "rb")
+        self.buffer = bytearray(f_norm.read())
+        self.size = len(self.buffer)
+        f_norm.close()
+
+    def check_pdf_header(self):
+        if self.buffer[0:5] == b"%PDF-":
+            pdf_version = self.buffer[5:8].decode("utf-8")
+            logging.info("PDF Header found: " + pdf_version)
+        else:
+            raise Exception("PDF Header not found")
+
+    def parse_xref_offset(self):
+        r = re.compile(b'startxref\n([0-9]+)')
+        m = r.search(self.buffer)
+        if m is None:
+            raise Exception('Unable to find xref offset')
+        self.original_xref_offset = int(m.group(1))
+        logging.info("Xref offset found at: " + hex(self.original_xref_offset))
+
+    def parse_xref_table(self):
+        xref_table = []
+        r = re.compile(b'xref\n([0-9]+) ([0-9]+)')
+        offset = self.original_xref_offset
+        s = r.search(self.buffer[offset:offset + 32])
+        nb_xtable_object = int(s.group(2))
+        logging.info("Nb objects in Xref table: " + str(nb_xtable_object))
+        xref_header_size = s.end()
+        r = re.compile(b'([0-9]+) ([0-9]+) ([f|n])')
+        x = 0
+        for i in range(nb_xtable_object):
+            s = r.search(
+                self.buffer[self.original_xref_offset + xref_header_size + x:])
+            if s is not None:
+                x = x + s.end()
+                xref_table.append((int(s.group(1)),
+                                   int(s.group(2)),
+                                   s.group(3)))
+        logging.debug("Xref table:")
+        for i in xref_table:
+            logging.debug(str(i[0]) + " " +
+                          str(i[1]) + " " +
+                          i[2].decode("utf-8"))
+
+    def parse_objects(self):
+        r_begin = re.compile(b'([0-9]+) ([0-9]+) obj\n')
+        r_end = re.compile(b'\nendobj\n')
+
+        offset_buffer = 0
+        obj = ()
+        while offset_buffer < self.size:
+            m_begin = r_begin.match(
+                self.buffer[offset_buffer:offset_buffer + 32])
+            obj_nb_index = 0
+            obj_nb_offset = 0
+            obj_offset_start = 0
+            obj_offset_end = 0
+            if m_begin is not None:
+                if self.original_first_obj_offset == 0:
+                    self.original_first_obj_offset = (offset_buffer +
+                                                      m_begin.start())
+                obj_nb_index = int(m_begin.group(1))
+                obj_nb_offset = int(m_begin.group(2))
+                obj_data_start = m_begin.end()
+                obj_offset_start = offset_buffer + m_begin.start()
+                while offset_buffer < self.size:
+                    m_end = r_end.match(
+                        self.buffer[offset_buffer:offset_buffer + 8])
+                    if m_end is not None:
+                        obj_offset_end = offset_buffer + m_end.end() - 2
+                        break
+                    else:
+                        offset_buffer = offset_buffer + 1
+            else:
+                offset_buffer = offset_buffer + 1
+
+            if (obj_offset_start != 0 and
+                    obj_offset_end != 0):
+                a = obj_offset_start + obj_data_start
+                b = obj_offset_end - 6
+                obj = (obj_nb_index, obj_nb_offset,
+                       self.buffer[a:b])
+                logging.debug("Objects: (" + str(obj_nb_index) +
+                              ", " + str(obj_nb_offset) +
+                              ", " + hex(obj_offset_start) +
+                              ", " + hex(obj_offset_end))
+                self.objects.append(obj)
+
+    def parse_trailer(self):
+        r_begin = re.compile(b'trailer\n')
+        s_begin = r_begin.search(self.buffer[self.original_xref_offset:])
+        start = self.original_xref_offset + s_begin.start()
+        logging.info("Trailer found at:" + hex(start))
+
+        r_root = re.compile(b'/Root ([0-9]+) ([0-9]+) R')
+        s_root = r_root.search(self.buffer[self.original_xref_offset:])
+        if s_root is None:
+            raise Exception('Root not found')
+        else:
+            self.trailer["Root"] = (int(s_root.group(1)), int(s_root.group(2)))
+
+        r_info = re.compile(b'/Info ([0-9]+) ([0-9]+) R')
+        s_info = r_info.search(self.buffer[self.original_xref_offset:])
+        if s_info is not None:
+            self.trailer["Info"] = (int(s_info.group(1)), int(s_info.group(2)))
+
+    def get_file_header(self):
+        return self.buffer[:self.original_first_obj_offset]
+
+    def get_xref_table(self):
+        offset_xref = 0
+        buf = (b'xref\n' +
+               str(offset_xref).encode('utf-8') + b' ' +
+               str(len(self.objects) + 1).encode('utf-8') + b'\n' +
+               str(0).zfill(10).encode('utf-8') + b' ' +
+               str(65535).zfill(5).encode('utf-8') + b' f \n')
+
+        for i in range(len(self.objects)):
+            obj_start = self.get_object_offset(i)
+            logging.info("Obj %d at %d" % (self.objects[i][0], obj_start))
+            buf = (buf +
+                   (str(obj_start).zfill(10)).encode('utf-8') + b' ' +
+                   str(0).zfill(5).encode('utf-8') + b' ' +
+                   b'n' + b' \n')
+        return buf
+
+    def get_trailer(self):
+        trailer_data = (b"trailer\n<<\n/Size " +
+                        str(len(self.objects) + 1).encode("utf-8") +
+                        b"\n/Root " +
+                        str(self.trailer["Root"][0]).encode("utf-8") +
+                        b" " +
+                        str(self.trailer["Root"][1]).encode("utf-8") +
+                        b" R\n")
+        if "Info" in self.trailer:
+            trailer_data = (trailer_data +
+                            b"/Info " +
+                            str(self.trailer["Info"][0]).encode("utf-8") +
+                            b" " +
+                            str(self.trailer["Info"][1]).encode("utf-8") +
+                            b" R\n")
+        trailer_data = trailer_data + b">>"
+        return trailer_data
+
+    def get_xref_offset(self):
+        return self.get_end_of_last_object() + 1
+
+    def get_eof(self):
+        s = (b'startxref\n' +
+             str(self.get_xref_offset()).encode("utf-8") +
+             b'\n%%EOF\n')
+        return s
+
+    def build_object(self, obj):
+        buf = (str(obj[0]).encode("utf-8") +
+               b' ' +
+               str(obj[1]).encode("utf-8") +
+               b' obj\n' +
+               obj[2] +
+               b'\nendobj')
+        return buf
+
+    def get_build_buffer(self):
+        b_buffer = bytearray()
+        b_buffer = b_buffer + self.get_file_header()
+        for obj in self.objects:
+            b_buffer = b_buffer + self.build_object(obj) + b'\n'
+        b_buffer = b_buffer + self.get_xref_table()
+        b_buffer = b_buffer + self.get_trailer() + b'\n'
+        b_buffer = b_buffer + self.get_eof()
+        return b_buffer
+
+    def get_obj(self, nb):
+        for obj in self.objects:
+            if obj[0] == nb:
+                return obj
+
+    def get_end_of_last_object(self):
+        offset = self.get_last_object_offset()
+        offset = offset + len(self.build_object(self.objects[-1]))
+        return offset
+
+    def generate_stream_obj_data(self, data):
+        buf = (b'<<\n/Filter /FlateDecode\n/Length ' +
+               str(len(data)).encode("utf-8") +
+               b'\n>>\nstream\n' +
+               data +
+               b'\nendstream')
+        return buf
+
+    def insert_new_obj_stream_at(self, position, stream_data):
+        '''
+        Return offset of stream data
+        '''
+        logging.info("Insert obj at %d" % position)
+        obj_nb = position
+        obj_off = 0
+        data = self.generate_stream_obj_data(stream_data)
+        obj = (obj_nb, obj_off, data)
+
+        obj_data = self.build_object(obj)
+        full_obj_size = len(obj_data)
+        logging.info("New object full size is: " + str(full_obj_size))
+
+        obj = (obj_nb, obj_off, data)
+        self.objects.insert(position, obj)
+
+        self.reorder_objects()
+        self.fix_trailer_ref()
+
+    def get_first_stream_offset(self):
+        offset = self.file_offset + len(self.get_file_header())
+        r = re.compile(b'stream\n')
+        m = r.search(self.objects[0][2])
+        offset = offset + len(b"1 0 obj\n") + m.end()
+        return offset
+
+    def get_last_stream_offset(self):
+        offset = self.file_offset + self.get_last_object_offset()
+        r = re.compile(b'stream\n')
+        m = r.search(self.build_object(self.objects[-1]))
+        return offset + m.end()
+
+    def get_object_offset(self, index):
+        offset = self.file_offset + len(self.get_file_header())
+        for obj in self.objects[:index]:
+            offset = offset + len(self.build_object(obj)) + 1
+        return offset
+
+    def get_last_object_offset(self):
+        offset = self.get_object_offset(len(self.objects) - 1)
+        return offset
+
+    def insert_new_obj_stream_at_start(self, data):
+        return self.insert_new_obj_stream_at(0, data)
+
+    def insert_new_obj_stream_at_end(self, data):
+        return self.insert_new_obj_stream_at(len(self.objects) + 1,
+                                             data)
+
+    def generate_translation_table(self):
+        for i in range(len(self.objects)):
+            self.translation_table[(self.objects[i][0],
+                                    self.objects[i][1])] = i + 1
+        logging.info(self.translation_table)
+
+    def replace_ref(self, ibuffer):
+        '''
+        Exemple:
+        in: AZERTY 6 0 R -- BGT 88 0 R HYT
+        out: AZERTY 77 0 R -- BGT 9 0 R HYT
+        '''
+        index = 0
+        obuffer = bytearray()
+        while True:
+            r = re.compile(b'([0-9]+) ([0-9]+) R')
+            s = r.search(ibuffer[index:])
+            if s is None:
+                obuffer = obuffer + ibuffer[index:]
+                break
+            o_old = int(s.group(1))
+            p_old = int(s.group(2))
+            o_new = self.translation_table[(o_old, p_old)]
+            p_new = p_old
+
+            newref = (str(o_new).encode("utf-8") +
+                      b" " +
+                      str(p_new).encode("utf-8") +
+                      b" R")
+
+            nbuffer = ibuffer[index:index + s.start()] + newref
+            obuffer = obuffer + nbuffer
+            index = index + s.end()
+        return obuffer
+
+    def reorder_objects(self):
+        self.generate_translation_table()
+        offset_obj = len(self.get_file_header())
+        for i in range(len(self.objects)):
+            buf = self.objects[i][2]
+            new_buf = self.replace_ref(buf)
+            obj_nb = self.objects[i][0]
+            new_obj_nb = self.translation_table[(obj_nb, 0)]
+            new_obj_start = offset_obj
+            size_obj = len(self.build_object((new_obj_nb,
+                                              0,
+                                              new_buf)))
+            new_obj_end = new_obj_start + size_obj
+
+            offset_obj = new_obj_end + 1
+            obj = (new_obj_nb,
+                   0,
+                   new_buf)
+            self.objects[i] = obj
+
+    def fix_trailer_ref(self):
+        new_obj_nb = self.translation_table[self.trailer["Root"]]
+        self.trailer["Root"] = (new_obj_nb, 0)
+
+        if "Info" in self.trailer:
+            new_obj_nb = self.translation_table[self.trailer["Info"]]
+            self.trailer["Info"] = (new_obj_nb, 0)
diff --git a/PolyglotFile/__init__.py b/PolyglotFile/__init__.py
new file mode 100644
index 0000000..4261a1a
--- /dev/null
+++ b/PolyglotFile/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
+
+from .polyglotpdfzip import PolyglotPdfZip
+from .polyglotzippdf import PolyglotZipPdf
+from .polyglotszippdf import PolyglotSZipPdf
+
diff --git a/PolyglotFile/polyglotpdfzip.py b/PolyglotFile/polyglotpdfzip.py
new file mode 100644
index 0000000..81c3f06
--- /dev/null
+++ b/PolyglotFile/polyglotpdfzip.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+import logging
+
+'''
+    |-------------------------------|    -
+    |--------- PDF Header ----------K1   | J1
+    |-------------------------------|    -
+    |----- PDF OBJ 1 = ZIP Data ----K2   |
+    |-------------------------------|    -
+    |---- Original PDF Ojbects -----K3   | J2
+    |-------------------------------|    -
+    |--- Last OBJ = End Zip Data ---K4   |
+    |-------------------------------|    |
+    |---------- Xref Table ---------|    |
+    |-------------------------------K5   |
+    |----------- Trailer -----------|    |
+    |-------------------------------|    |
+'''
+
+
+class PolyglotPdfZip():
+    from PdfFileTransformer import Pdf
+    from ZipFileTransformer import Zip
+
+    def __init__(self, Pdf, Zip):
+        self.buffer = bytearray()
+        self.pdf = Pdf
+        self.zip = Zip
+        self.buffer = bytearray()
+
+    def generate(self):
+        k2_stream = self.zip.buffer[:self.zip.end_of_data]
+        size_k2_stream = len(k2_stream)
+        self.pdf.insert_new_obj_stream_at_start(k2_stream)
+        offset_k2_stream = self.pdf.get_first_stream_offset()
+
+        k4_stream = self.zip.buffer[self.zip.central_dir_file_header:]
+        size_k4_stream = len(k4_stream)
+        self.pdf.insert_new_obj_stream_at_end(k4_stream)
+        offset_k4_stream = self.pdf.get_last_stream_offset()
+
+        pdf_buffer = self.pdf.get_build_buffer()
+
+        j1 = pdf_buffer[0:offset_k2_stream]
+        j2 = pdf_buffer[offset_k2_stream + size_k2_stream:offset_k4_stream]
+        self.zip.add_data_to_file(j1, j2, True)
+
+        k5 = pdf_buffer[offset_k4_stream + size_k4_stream:]
+        self.buffer = self.zip.buffer + k5
+
+    def write(self, filename):
+        fd = open(filename, "wb")
+        fd.write(self.buffer)
+        fd.close()
diff --git a/PolyglotFile/polyglotszippdf.py b/PolyglotFile/polyglotszippdf.py
new file mode 100644
index 0000000..0796946
--- /dev/null
+++ b/PolyglotFile/polyglotszippdf.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+
+from .polyglotpdfzip import PolyglotPdfZip
+import logging
+import tempfile
+from ZipFileTransformer import ZipFile
+from ZipFileTransformer import Zip
+from PdfFileTransformer import Pdf
+
+'''
+    |-----------------------------------|  -
+    |--------- ZIP Data[0] = -----------|  |
+    |- PDF Header + PDF Obj[0] Header --|  |
+    |-----------------------------------|  | K2
+    |------- PDF Obj[0] stream =  ------|  |
+    |--------- ZIP Data LF [1:] --------|  |
+    |-----------------------------------|  -
+    |------ Original PDF Ojbects -------|  |
+    |-----------------------------------|  |
+    |------------ Xref Table -----------|  |
+    |-----------------------------------|  | J2
+    |------------- Trailer -------------|  |
+    |-----------------------------------|  -
+    |---------- End Zip Data -----------|
+    |-----------------------------------|
+'''
+
+
+class PolyglotSZipPdf(PolyglotPdfZip):
+
+    def __init__(self, Pdf, Zip):
+        super().__init__(Pdf, Zip)
+
+    def get_rebuild_zip_first_part_size(self):
+
+        zo_path = tempfile.mkstemp()[1]
+        logging.info("use tmp file zip: " + zo_path)
+        zo = ZipFile(zo_path, 'a')
+        zi = ZipFile(self.zip.filename, 'r')
+        for zipinfo in zi.infolist():
+            zo.writestr(zipinfo, zi.read(zipinfo))
+        zi.close()
+        zo.close()
+
+        rebuild_zip = Zip(zo_path)
+
+        p = rebuild_zip.end_of_data
+        k2_stream = rebuild_zip.buffer[:p]
+
+        size_k2_stream = len(k2_stream)
+
+        return size_k2_stream
+
+    def get_pdf_header(self):
+        return self.pdf.get_file_header()
+
+    def generate_zip_with_pdf_part(self, filename, pdf_data):
+
+        zo = ZipFile(filename, 'a')
+        zi = ZipFile(self.zip.filename, 'r')
+        zo.writestr(' ', pdf_data, 0)
+        for zipinfo in zi.infolist():
+            zo.writestr(zipinfo, zi.read(zipinfo))
+        zi.close()
+        zo.close()
+
+    def get_rebuild_pdf(self, zo_path, offset):
+        '''
+            Generate polyglot with final zip.
+        '''
+        new_zip = Zip(zo_path)
+        new_pdf = Pdf(self.pdf.filename)
+
+        p1 = new_zip.end_of_first_local_file_header
+        p2 = new_zip.end_of_data
+        k2_stream = new_zip.buffer[p1:p2]
+
+        size_k2_stream = len(k2_stream)
+        new_pdf.insert_new_obj_stream_at_start(k2_stream)
+        k2_stream_offset = new_pdf.get_first_stream_offset()
+
+        new_pdf.file_offset = offset
+        pdf_buffer = new_pdf.get_build_buffer()
+        j2 = pdf_buffer[k2_stream_offset + size_k2_stream:]
+        new_zip.add_data_to_file(b'', j2, True)
+
+        return new_zip.buffer
+
+    def get_pdf_offset(self, zipfile):
+
+        f = open(zipfile, "rb")
+        data = f.read()
+        return data.find(b"%PDF")
+
+    def generate(self):
+
+        zip_stream_size = self.get_rebuild_zip_first_part_size()
+        pdf_header = self.get_pdf_header()
+        pdf_header = (pdf_header +
+                      b'1 0 obj\n<<\n/Filter /FlateDecode\n/Length ' +
+                      str(zip_stream_size).encode("utf-8") +
+                      b'\n>>\nstream\n')
+
+        filename = tempfile.mkstemp()[1]
+        logging.info("use tmp file for new zip: " + filename)
+        self.generate_zip_with_pdf_part(filename, pdf_header)
+
+        pdf_offset = self.get_pdf_offset(filename)
+
+        self.buffer = self.get_rebuild_pdf(filename, pdf_offset)
diff --git a/PolyglotFile/polyglotzippdf.py b/PolyglotFile/polyglotzippdf.py
new file mode 100644
index 0000000..2493663
--- /dev/null
+++ b/PolyglotFile/polyglotzippdf.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+from .polyglotpdfzip import PolyglotPdfZip
+
+
+'''
+    |-------------------------------|    -
+    |--------- PDF Header ----------K1   | J1
+    |-------------------------------|    -
+    |----- PDF OBJ 1 = ZIP Data ----K2   |
+    |-------------------------------|    -
+    |---- Original PDF Ojbects -----K3   |
+    |-------------------------------|    |
+    |---------- Xref Table ---------|    |
+    |-------------------------------K4   | J2
+    |----------- Trailer -----------|    |
+    |-------------------------------|    -
+    |-------- End Zip Data ---------|    |
+    |-------------------------------|    |
+'''
+
+
+class PolyglotZipPdf(PolyglotPdfZip):
+
+    def generate(self):
+        k2_stream = self.zip.buffer[:self.zip.end_of_data]
+        size_k2_stream = len(k2_stream)
+        self.pdf.insert_new_obj_stream_at_start(k2_stream)
+        offset_k2_stream = self.pdf.get_first_stream_offset()
+
+        pdf_buffer = self.pdf.get_build_buffer()
+
+        j1 = pdf_buffer[0:offset_k2_stream]
+        j2 = pdf_buffer[offset_k2_stream + size_k2_stream:]
+
+        self.zip.add_data_to_file(j1, j2, True)
+        self.buffer = self.zip.buffer
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..97224cf
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# TruePolyglot
+
+See webiste at https://truepolyglot.hackade.org
diff --git a/ZipFileTransformer/__init__.py b/ZipFileTransformer/__init__.py
new file mode 100644
index 0000000..0b53e27
--- /dev/null
+++ b/ZipFileTransformer/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+
+from .zip import Zip
+from .zipfile import *
diff --git a/ZipFileTransformer/zip.py b/ZipFileTransformer/zip.py
new file mode 100644
index 0000000..91ff4c5
--- /dev/null
+++ b/ZipFileTransformer/zip.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import re
+
+
+class Zip:
+
+    def __init__(self, filename):
+        self.filename = filename
+        self.buffer = bytearray()
+        self.size = 0
+        self.end_central_dir = 0
+        self.first_local_file_header = 0
+        self.offset_local_file = []
+        self.offset_central_directory = []
+        self.end_of_data = 0
+        self.end_of_first_local_file_header = 0
+
+        self.read()
+        self.check_header()
+        self.call_all_parsers()
+        self.check_central_directory()
+        self.parse_central_directories()
+        self.parse_local_file_headers()
+
+    def call_all_parsers(self):
+        self.parse_offset_end_central_dir()
+        self.parse_nb_of_disk()
+        self.parse_start_disk()
+        self.parse_nb_of_central_dir()
+        self.parse_nb_total_of_central_dir()
+        self.parse_size_central_dir()
+        self.parse_central_dir_file_header()
+        self.parse_comment_length()
+
+    def read(self):
+        with open(self.filename, 'rb') as fd:
+            self.buffer = bytearray(fd.read())
+        self.size = len(self.buffer)
+        logging.info("read " + str(self.size) + " bytes from Zip file")
+
+    def check_header(self):
+        if self.buffer[0:4] != b"PK\x03\x04":
+            raise Exception("Zip header not found")
+
+    def parse_offset_end_central_dir(self):
+        r = re.compile(b'\x06\x05KP')
+        s = r.search(self.buffer[::-1])
+        if s is None:
+            raise Exception("Unable to find end of central directory")
+        self.end_central_dir = self.size - s.end()
+        logging.info("Offset end of central directory: " +
+                     hex(self.end_central_dir))
+
+    def parse_nb_of_disk(self):
+        self.nb_of_disk = int.from_bytes(
+            self.buffer[self.end_central_dir + 4:self.end_central_dir + 6],
+            "little")
+        logging.debug("Nb of disk: " + str(self.nb_of_disk))
+
+    def parse_start_disk(self):
+        self.start_disk = int.from_bytes(
+            self.buffer[self.end_central_dir + 6:self.end_central_dir + 8],
+            "little")
+        logging.debug("Start disk: " + str(self.start_disk))
+
+    def parse_nb_of_central_dir(self):
+        self.nb_of_central_dir = int.from_bytes(
+            self.buffer[self.end_central_dir + 8:self.end_central_dir + 10],
+            "little")
+        logging.info("Nb of central directory record: " +
+                     str(self.nb_of_central_dir))
+
+    def parse_nb_total_of_central_dir(self):
+        self.nb_total_of_central_dir = int.from_bytes(
+            self.buffer[self.end_central_dir + 10:self.end_central_dir + 12],
+            "little")
+        logging.info("Nb of total central directory record: " +
+                     str(self.nb_total_of_central_dir))
+
+    def parse_size_central_dir(self):
+        self.size_central_dir = int.from_bytes(
+            self.buffer[self.end_central_dir + 12:self.end_central_dir + 14],
+            "little")
+        logging.info("Size of central directory: " +
+                     str(self.size_central_dir))
+
+    def parse_central_dir_file_header(self):
+        self.central_dir_file_header = int.from_bytes(
+            self.buffer[self.end_central_dir + 16:self.end_central_dir + 20],
+            "little")
+        logging.info("Central directory file header: " +
+                     hex(self.central_dir_file_header))
+
+    def parse_comment_length(self):
+        self.comment_length = int.from_bytes(
+            self.buffer[self.end_central_dir + 20:self.end_central_dir + 22],
+            "little")
+        logging.info("Comment length: " +
+                     str(self.comment_length))
+
+    def check_central_directory(self):
+        offset = self.central_dir_file_header
+        if (self.buffer[offset:offset + 4] !=
+                b'PK\x01\x02'):
+            raise Exception("Unable to find central directory")
+        logging.info("Found central directory")
+
+    def parse_central_directories(self):
+        if (self.buffer[self.central_dir_file_header:
+                        self.central_dir_file_header + 4] !=
+                b'PK\x01\x02'):
+            raise Exception("Unable to find first central directory")
+        logging.info("Found first central directory")
+
+        i = 0
+        size = 0
+        offset = self.central_dir_file_header
+
+        while (self.buffer[size + offset:
+                           size + offset + 4] ==
+                b'PK\x01\x02'):
+
+            logging.info("Parse central directory n°" + str(i))
+            logging.info("Offset: " + hex(offset + size))
+            self.offset_central_directory.append(offset + size)
+            filename_length = int.from_bytes(
+                self.buffer[size + offset + 28:size + offset + 30],
+                "little")
+            logging.info("filename length:" + str(filename_length))
+            extra_field_length = int.from_bytes(
+                self.buffer[size + offset + 30:size + offset + 32],
+                "little")
+            logging.info("extra field length:" + str(extra_field_length))
+            comment_length = int.from_bytes(
+                self.buffer[size + offset + 32:size + offset + 34],
+                "little")
+            logging.info("comment length:" + str(comment_length))
+            local_file_header = int.from_bytes(
+                self.buffer[size + offset + 42:size + offset + 46],
+                "little")
+            if i == 0:
+                self.first_local_file_header = local_file_header
+            logging.info("local file header:" + hex(local_file_header))
+
+            i = i + 1
+            size = (size + filename_length +
+                    extra_field_length + comment_length + 46)
+
+            logging.debug("parse header at:" + hex(offset + size))
+
+    def parse_local_file_headers(self):
+        size = 0
+        offset = self.first_local_file_header
+        for i in range(self.nb_of_central_dir):
+            logging.info("Parse local file n°" + str(i))
+            compressed_data_lenght = int.from_bytes(
+                self.buffer[size + offset + 18:size + offset + 22],
+                "little")
+            logging.info("compressed data length:" +
+                         str(compressed_data_lenght))
+            filename_length = int.from_bytes(
+                self.buffer[size + offset + 26:size + offset + 28],
+                "little")
+            logging.info("filename length:" + str(filename_length))
+            extra_field_length = int.from_bytes(
+                self.buffer[size + offset + 28:size + offset + 30],
+                "little")
+            logging.info("extra field length:" + str(extra_field_length))
+            local_file_size = (compressed_data_lenght +
+                               filename_length + extra_field_length + 30)
+            logging.info("local file length:" + hex(local_file_size))
+            size = size + local_file_size
+            logging.debug("parse header at:" + hex(offset + size))
+            self.offset_local_file.append(offset + size)
+            self.end_of_data = offset + size
+            if i == 0:
+                self.end_of_first_local_file_header = self.end_of_data
+
+    def add_data_to_file(self, data_before_local, data_after_local,
+                         write_buffer=False):
+        logging.info("Add data before local lenght:" +
+                     str(len(data_before_local)))
+        new_buffer = self.buffer
+        for i in self.offset_central_directory:
+            logging.info("parse central directory at: " + hex(i))
+            local_file_header = int.from_bytes(
+                self.buffer[i + 42:i + 46],
+                "little")
+            logging.info("old local file header: " + hex(local_file_header))
+            local_file_header = local_file_header + len(data_before_local)
+            logging.info("new local file header: " + hex(local_file_header))
+            bytes_local_file_header = local_file_header.to_bytes(4, "little")
+            logging.info("change value at:" + hex(i + 42))
+            new_buffer[i + 42:i + 46] = bytes_local_file_header
+
+        logging.info("old central directory header: " +
+                     hex(self.central_dir_file_header))
+        new_central_dir_file_header = (self.central_dir_file_header +
+                                       len(data_after_local) +
+                                       len(data_before_local))
+        logging.info("new central directory header: " +
+                     hex(new_central_dir_file_header))
+        bytes_offset = new_central_dir_file_header.to_bytes(4, "little")
+        new_buffer[self.end_central_dir + 16:
+                   self.end_central_dir + 20] = bytes_offset
+        self.buffer = new_buffer
+
+        if write_buffer:
+            new_buffer = (data_before_local +
+                          new_buffer[:self.end_of_data] +
+                          data_after_local +
+                          new_buffer[self.central_dir_file_header:])
+            self.buffer = new_buffer
+
+    def get_local_file_data(self):
+        return self.buffer[:self.end_of_data]
+
+    def get_data_after_central_directory(self):
+        return self.buffer[self.central_dir_file_header:]
+
+    def get_first_part_length(self):
+        return len(self.get_local_file_data())
+
+    def get_second_part_length(self):
+        return len(self.get_data_after_central_directory())
diff --git a/ZipFileTransformer/zipfile.py b/ZipFileTransformer/zipfile.py
new file mode 100644
index 0000000..2757ce9
--- /dev/null
+++ b/ZipFileTransformer/zipfile.py
@@ -0,0 +1,2133 @@
+"""
+Read and write ZIP files.
+
+XXX references to utf-8 need further investigation.
+"""
+import io
+import os
+import importlib.util
+import sys
+import time
+import stat
+import shutil
+import struct
+import binascii
+import threading
+
+try:
+    import zlib # We may need its compression method
+    crc32 = zlib.crc32
+except ImportError:
+    zlib = None
+    crc32 = binascii.crc32
+
+try:
+    import bz2 # We may need its compression method
+except ImportError:
+    bz2 = None
+
+try:
+    import lzma # We may need its compression method
+except ImportError:
+    lzma = None
+
+__all__ = ["BadZipFile", "BadZipfile", "error",
+           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
+           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
+
+class BadZipFile(Exception):
+    pass
+
+
+class LargeZipFile(Exception):
+    """
+    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
+    and those extensions are disabled.
+    """
+
+error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
+
+
+ZIP64_LIMIT = (1 << 31) - 1
+ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
+ZIP_MAX_COMMENT = (1 << 16) - 1
+
+# constants for Zip file compression methods
+ZIP_STORED = 0
+ZIP_DEFLATED = 8
+ZIP_BZIP2 = 12
+ZIP_LZMA = 14
+# Other ZIP compression methods not supported
+
+DEFAULT_VERSION = 20
+ZIP64_VERSION = 45
+BZIP2_VERSION = 46
+LZMA_VERSION = 63
+# we recognize (but not necessarily support) all features up to that version
+MAX_EXTRACT_VERSION = 63
+
+# Below are some formats and associated data for reading/writing headers using
+# the struct module.  The names and structures of headers/records are those used
+# in the PKWARE description of the ZIP file format:
+#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+# (URL valid as of January 2008)
+
+# The "end of central directory" structure, magic number, size, and indices
+# (section V.I in the format document)
+structEndArchive = b"<4s4H2LH"
+stringEndArchive = b"PK\005\006"
+sizeEndCentDir = struct.calcsize(structEndArchive)
+
+_ECD_SIGNATURE = 0
+_ECD_DISK_NUMBER = 1
+_ECD_DISK_START = 2
+_ECD_ENTRIES_THIS_DISK = 3
+_ECD_ENTRIES_TOTAL = 4
+_ECD_SIZE = 5
+_ECD_OFFSET = 6
+_ECD_COMMENT_SIZE = 7
+# These last two indices are not part of the structure as defined in the
+# spec, but they are used internally by this module as a convenience
+_ECD_COMMENT = 8
+_ECD_LOCATION = 9
+
+# The "central directory" structure, magic number, size, and indices
+# of entries in the structure (section V.F in the format document)
+structCentralDir = "<4s4B4HL2L5H2L"
+stringCentralDir = b"PK\001\002"
+sizeCentralDir = struct.calcsize(structCentralDir)
+
+# indexes of entries in the central directory structure
+_CD_SIGNATURE = 0
+_CD_CREATE_VERSION = 1
+_CD_CREATE_SYSTEM = 2
+_CD_EXTRACT_VERSION = 3
+_CD_EXTRACT_SYSTEM = 4
+_CD_FLAG_BITS = 5
+_CD_COMPRESS_TYPE = 6
+_CD_TIME = 7
+_CD_DATE = 8
+_CD_CRC = 9
+_CD_COMPRESSED_SIZE = 10
+_CD_UNCOMPRESSED_SIZE = 11
+_CD_FILENAME_LENGTH = 12
+_CD_EXTRA_FIELD_LENGTH = 13
+_CD_COMMENT_LENGTH = 14
+_CD_DISK_NUMBER_START = 15
+_CD_INTERNAL_FILE_ATTRIBUTES = 16
+_CD_EXTERNAL_FILE_ATTRIBUTES = 17
+_CD_LOCAL_HEADER_OFFSET = 18
+
+# The "local file header" structure, magic number, size, and indices
+# (section V.A in the format document)
+structFileHeader = "<4s2B4HL2L2H"
+stringFileHeader = b"PK\003\004"
+sizeFileHeader = struct.calcsize(structFileHeader)
+
+_FH_SIGNATURE = 0
+_FH_EXTRACT_VERSION = 1
+_FH_EXTRACT_SYSTEM = 2
+_FH_GENERAL_PURPOSE_FLAG_BITS = 3
+_FH_COMPRESSION_METHOD = 4
+_FH_LAST_MOD_TIME = 5
+_FH_LAST_MOD_DATE = 6
+_FH_CRC = 7
+_FH_COMPRESSED_SIZE = 8
+_FH_UNCOMPRESSED_SIZE = 9
+_FH_FILENAME_LENGTH = 10
+_FH_EXTRA_FIELD_LENGTH = 11
+
+# The "Zip64 end of central directory locator" structure, magic number, and size
+structEndArchive64Locator = "<4sLQL"
+stringEndArchive64Locator = b"PK\x06\x07"
+sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
+
+# The "Zip64 end of central directory" record, magic number, size, and indices
+# (section V.G in the format document)
+structEndArchive64 = "<4sQ2H2L4Q"
+stringEndArchive64 = b"PK\x06\x06"
+sizeEndCentDir64 = struct.calcsize(structEndArchive64)
+
+_CD64_SIGNATURE = 0
+_CD64_DIRECTORY_RECSIZE = 1
+_CD64_CREATE_VERSION = 2
+_CD64_EXTRACT_VERSION = 3
+_CD64_DISK_NUMBER = 4
+_CD64_DISK_NUMBER_START = 5
+_CD64_NUMBER_ENTRIES_THIS_DISK = 6
+_CD64_NUMBER_ENTRIES_TOTAL = 7
+_CD64_DIRECTORY_SIZE = 8
+_CD64_OFFSET_START_CENTDIR = 9
+
+def _check_zipfile(fp):
+    try:
+        if _EndRecData(fp):
+            return True         # file has correct magic number
+    except OSError:
+        pass
+    return False
+
+def is_zipfile(filename):
+    """Quickly see if a file is a ZIP file by checking the magic number.
+
+    The filename argument may be a file or file-like object too.
+    """
+    result = False
+    try:
+        if hasattr(filename, "read"):
+            result = _check_zipfile(fp=filename)
+        else:
+            with open(filename, "rb") as fp:
+                result = _check_zipfile(fp)
+    except OSError:
+        pass
+    return result
+
+def _EndRecData64(fpin, offset, endrec):
+    """
+    Read the ZIP64 end-of-archive records and use that to update endrec
+    """
+    try:
+        fpin.seek(offset - sizeEndCentDir64Locator, 2)
+    except OSError:
+        # If the seek fails, the file is not large enough to contain a ZIP64
+        # end-of-archive record, so just return the end record we were given.
+        return endrec
+
+    data = fpin.read(sizeEndCentDir64Locator)
+    if len(data) != sizeEndCentDir64Locator:
+        return endrec
+    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
+    if sig != stringEndArchive64Locator:
+        return endrec
+
+    if diskno != 0 or disks != 1:
+        raise BadZipFile("zipfiles that span multiple disks are not supported")
+
+    # Assume no 'zip64 extensible data'
+    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
+    data = fpin.read(sizeEndCentDir64)
+    if len(data) != sizeEndCentDir64:
+        return endrec
+    sig, sz, create_version, read_version, disk_num, disk_dir, \
+        dircount, dircount2, dirsize, diroffset = \
+        struct.unpack(structEndArchive64, data)
+    if sig != stringEndArchive64:
+        return endrec
+
+    # Update the original endrec using data from the ZIP64 record
+    endrec[_ECD_SIGNATURE] = sig
+    endrec[_ECD_DISK_NUMBER] = disk_num
+    endrec[_ECD_DISK_START] = disk_dir
+    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
+    endrec[_ECD_ENTRIES_TOTAL] = dircount2
+    endrec[_ECD_SIZE] = dirsize
+    endrec[_ECD_OFFSET] = diroffset
+    return endrec
+
+
+def _EndRecData(fpin):
+    """Return data from the "End of Central Directory" record, or None.
+
+    The data is a list of the nine items in the ZIP "End of central dir"
+    record followed by a tenth item, the file seek offset of this record."""
+
+    # Determine file size
+    fpin.seek(0, 2)
+    filesize = fpin.tell()
+
+    # Check to see if this is ZIP file with no archive comment (the
+    # "end of central directory" structure should be the last item in the
+    # file if this is the case).
+    try:
+        fpin.seek(-sizeEndCentDir, 2)
+    except OSError:
+        return None
+    data = fpin.read()
+    if (len(data) == sizeEndCentDir and
+        data[0:4] == stringEndArchive and
+        data[-2:] == b"\000\000"):
+        # the signature is correct and there's no comment, unpack structure
+        endrec = struct.unpack(structEndArchive, data)
+        endrec=list(endrec)
+
+        # Append a blank comment and record start offset
+        endrec.append(b"")
+        endrec.append(filesize - sizeEndCentDir)
+
+        # Try to read the "Zip64 end of central directory" structure
+        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
+
+    # Either this is not a ZIP file, or it is a ZIP file with an archive
+    # comment.  Search the end of the file for the "end of central directory"
+    # record signature. The comment is the last item in the ZIP file and may be
+    # up to 64K long.  It is assumed that the "end of central directory" magic
+    # number does not appear in the comment.
+    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
+    fpin.seek(maxCommentStart, 0)
+    data = fpin.read()
+    start = data.rfind(stringEndArchive)
+    if start >= 0:
+        # found the magic number; attempt to unpack and interpret
+        recData = data[start:start+sizeEndCentDir]
+        if len(recData) != sizeEndCentDir:
+            # Zip file is corrupted.
+            return None
+        endrec = list(struct.unpack(structEndArchive, recData))
+        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
+        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
+        endrec.append(comment)
+        endrec.append(maxCommentStart + start)
+
+        # Try to read the "Zip64 end of central directory" structure
+        return _EndRecData64(fpin, maxCommentStart + start - filesize,
+                             endrec)
+
+    # Unable to find a valid end of central directory structure
+    return None
+
+
+class ZipInfo (object):
+    """Class with attributes describing each file in the ZIP archive."""
+
+    __slots__ = (
+        'orig_filename',
+        'filename',
+        'date_time',
+        'compress_type',
+        '_compresslevel',
+        'comment',
+        'extra',
+        'create_system',
+        'create_version',
+        'extract_version',
+        'reserved',
+        'flag_bits',
+        'volume',
+        'internal_attr',
+        'external_attr',
+        'header_offset',
+        'CRC',
+        'compress_size',
+        'file_size',
+        '_raw_time',
+    )
+
+    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
+        self.orig_filename = filename   # Original file name in archive
+
+        # Terminate the file name at the first null byte.  Null bytes in file
+        # names are used as tricks by viruses in archives.
+        null_byte = filename.find(chr(0))
+        if null_byte >= 0:
+            filename = filename[0:null_byte]
+        # This is used to ensure paths in generated ZIP files always use
+        # forward slashes as the directory separator, as required by the
+        # ZIP format specification.
+        if os.sep != "/" and os.sep in filename:
+            filename = filename.replace(os.sep, "/")
+
+        self.filename = filename        # Normalized file name
+        self.date_time = date_time      # year, month, day, hour, min, sec
+
+        if date_time[0] < 1980:
+            raise ValueError('ZIP does not support timestamps before 1980')
+
+        # Standard values:
+        self.compress_type = ZIP_STORED # Type of compression for the file
+        self._compresslevel = None      # Level for the compressor
+        self.comment = b""              # Comment for each file
+        self.extra = b""                # ZIP extra data
+        if sys.platform == 'win32':
+            self.create_system = 0          # System which created ZIP archive
+        else:
+            # Assume everything else is unix-y
+            self.create_system = 3          # System which created ZIP archive
+        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
+        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
+        self.reserved = 0               # Must be zero
+        self.flag_bits = 0              # ZIP flag bits
+        self.volume = 0                 # Volume number of file header
+        self.internal_attr = 0          # Internal attributes
+        self.external_attr = 0          # External file attributes
+        # Other attributes are set by class ZipFile:
+        # header_offset         Byte offset to the file header
+        # CRC                   CRC-32 of the uncompressed file
+        # compress_size         Size of the compressed file
+        # file_size             Size of the uncompressed file
+
+    def __repr__(self):
+        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
+        if self.compress_type != ZIP_STORED:
+            result.append(' compress_type=%s' %
+                          compressor_names.get(self.compress_type,
+                                               self.compress_type))
+        hi = self.external_attr >> 16
+        lo = self.external_attr & 0xFFFF
+        if hi:
+            result.append(' filemode=%r' % stat.filemode(hi))
+        if lo:
+            result.append(' external_attr=%#x' % lo)
+        isdir = self.is_dir()
+        if not isdir or self.file_size:
+            result.append(' file_size=%r' % self.file_size)
+        if ((not isdir or self.compress_size) and
+            (self.compress_type != ZIP_STORED or
+             self.file_size != self.compress_size)):
+            result.append(' compress_size=%r' % self.compress_size)
+        result.append('>')
+        return ''.join(result)
+
+    def FileHeader(self, zip64=None):
+        """Return the per-file header as a string."""
+        dt = self.date_time
+        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+        if self.flag_bits & 0x08:
+            # Set these to zero because we write them after the file data
+            CRC = compress_size = file_size = 0
+        else:
+            CRC = self.CRC
+            compress_size = self.compress_size
+            file_size = self.file_size
+
+        extra = self.extra
+
+        min_version = 0
+        if zip64 is None:
+            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
+        if zip64:
+            fmt = '<HHQQ'
+            extra = extra + struct.pack(fmt,
+                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
+        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+            if not zip64:
+                raise LargeZipFile("Filesize would require ZIP64 extensions")
+            # File is larger than what fits into a 4 byte integer,
+            # fall back to the ZIP64 extension
+            file_size = 0xffffffff
+            compress_size = 0xffffffff
+            min_version = ZIP64_VERSION
+
+        if self.compress_type == ZIP_BZIP2:
+            min_version = max(BZIP2_VERSION, min_version)
+        elif self.compress_type == ZIP_LZMA:
+            min_version = max(LZMA_VERSION, min_version)
+
+        self.extract_version = max(min_version, self.extract_version)
+        self.create_version = max(min_version, self.create_version)
+        filename, flag_bits = self._encodeFilenameFlags()
+        header = struct.pack(structFileHeader, stringFileHeader,
+                             self.extract_version, self.reserved, flag_bits,
+                             self.compress_type, dostime, dosdate, CRC,
+                             compress_size, file_size,
+                             len(filename), len(extra))
+        return header + filename + extra
+
+    def _encodeFilenameFlags(self):
+        try:
+            return self.filename.encode('ascii'), self.flag_bits
+        except UnicodeEncodeError:
+            return self.filename.encode('utf-8'), self.flag_bits | 0x800
+
+    def _decodeExtra(self):
+        # Try to decode the extra field.
+        extra = self.extra
+        unpack = struct.unpack
+        while len(extra) >= 4:
+            tp, ln = unpack('<HH', extra[:4])
+            if ln+4 > len(extra):
+                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
+            if tp == 0x0001:
+                if ln >= 24:
+                    counts = unpack('<QQQ', extra[4:28])
+                elif ln == 16:
+                    counts = unpack('<QQ', extra[4:20])
+                elif ln == 8:
+                    counts = unpack('<Q', extra[4:12])
+                elif ln == 0:
+                    counts = ()
+                else:
+                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
+
+                idx = 0
+
+                # ZIP64 extension (large files and/or large archives)
+                if self.file_size in (0xffffffffffffffff, 0xffffffff):
+                    self.file_size = counts[idx]
+                    idx += 1
+
+                if self.compress_size == 0xFFFFFFFF:
+                    self.compress_size = counts[idx]
+                    idx += 1
+
+                if self.header_offset == 0xffffffff:
+                    old = self.header_offset
+                    self.header_offset = counts[idx]
+                    idx+=1
+
+            extra = extra[ln+4:]
+
+    @classmethod
+    def from_file(cls, filename, arcname=None):
+        """Construct an appropriate ZipInfo for a file on the filesystem.
+
+        filename should be the path to a file or directory on the filesystem.
+
+        arcname is the name which it will have within the archive (by default,
+        this will be the same as filename, but without a drive letter and with
+        leading path separators removed).
+        """
+        if isinstance(filename, os.PathLike):
+            filename = os.fspath(filename)
+        st = os.stat(filename)
+        isdir = stat.S_ISDIR(st.st_mode)
+        mtime = time.localtime(st.st_mtime)
+        date_time = mtime[0:6]
+        # Create ZipInfo instance to store file information
+        if arcname is None:
+            arcname = filename
+        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
+        while arcname[0] in (os.sep, os.altsep):
+            arcname = arcname[1:]
+        if isdir:
+            arcname += '/'
+        zinfo = cls(arcname, date_time)
+        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
+        if isdir:
+            zinfo.file_size = 0
+            zinfo.external_attr |= 0x10  # MS-DOS directory flag
+        else:
+            zinfo.file_size = st.st_size
+
+        return zinfo
+
+    def is_dir(self):
+        """Return True if this archive member is a directory."""
+        return self.filename[-1] == '/'
+
+
+# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
+# internal keys. We noticed that a direct implementation is faster than
+# relying on binascii.crc32().
+
+_crctable = None
+def _gen_crc(crc):
+    for j in range(8):
+        if crc & 1:
+            crc = (crc >> 1) ^ 0xEDB88320
+        else:
+            crc >>= 1
+    return crc
+
+# ZIP supports a password-based form of encryption. Even though known
+# plaintext attacks have been found against it, it is still useful
+# to be able to get data out of such a file.
+#
+# Usage:
+#     zd = _ZipDecrypter(mypwd)
+#     plain_bytes = zd(cypher_bytes)
+
+def _ZipDecrypter(pwd):
+    key0 = 305419896
+    key1 = 591751049
+    key2 = 878082192
+
+    global _crctable
+    if _crctable is None:
+        _crctable = list(map(_gen_crc, range(256)))
+    crctable = _crctable
+
+    def crc32(ch, crc):
+        """Compute the CRC32 primitive on one byte."""
+        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
+
+    def update_keys(c):
+        nonlocal key0, key1, key2
+        key0 = crc32(c, key0)
+        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
+        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
+        key2 = crc32(key1 >> 24, key2)
+
+    for p in pwd:
+        update_keys(p)
+
+    def decrypter(data):
+        """Decrypt a bytes object."""
+        result = bytearray()
+        append = result.append
+        for c in data:
+            k = key2 | 2
+            c ^= ((k * (k^1)) >> 8) & 0xFF
+            update_keys(c)
+            append(c)
+        return bytes(result)
+
+    return decrypter
+
+
+class LZMACompressor:
+
+    def __init__(self):
+        self._comp = None
+
+    def _init(self):
+        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
+        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
+            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
+        ])
+        return struct.pack('<BBH', 9, 4, len(props)) + props
+
+    def compress(self, data):
+        if self._comp is None:
+            return self._init() + self._comp.compress(data)
+        return self._comp.compress(data)
+
+    def flush(self):
+        if self._comp is None:
+            return self._init() + self._comp.flush()
+        return self._comp.flush()
+
+
+class LZMADecompressor:
+
+    def __init__(self):
+        self._decomp = None
+        self._unconsumed = b''
+        self.eof = False
+
+    def decompress(self, data):
+        if self._decomp is None:
+            self._unconsumed += data
+            if len(self._unconsumed) <= 4:
+                return b''
+            psize, = struct.unpack('<H', self._unconsumed[2:4])
+            if len(self._unconsumed) <= 4 + psize:
+                return b''
+
+            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
+                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
+                                               self._unconsumed[4:4 + psize])
+            ])
+            data = self._unconsumed[4 + psize:]
+            del self._unconsumed
+
+        result = self._decomp.decompress(data)
+        self.eof = self._decomp.eof
+        return result
+
+
+compressor_names = {
+    0: 'store',
+    1: 'shrink',
+    2: 'reduce',
+    3: 'reduce',
+    4: 'reduce',
+    5: 'reduce',
+    6: 'implode',
+    7: 'tokenize',
+    8: 'deflate',
+    9: 'deflate64',
+    10: 'implode',
+    12: 'bzip2',
+    14: 'lzma',
+    18: 'terse',
+    19: 'lz77',
+    97: 'wavpack',
+    98: 'ppmd',
+}
+
+def _check_compression(compression):
+    if compression == ZIP_STORED:
+        pass
+    elif compression == ZIP_DEFLATED:
+        if not zlib:
+            raise RuntimeError(
+                "Compression requires the (missing) zlib module")
+    elif compression == ZIP_BZIP2:
+        if not bz2:
+            raise RuntimeError(
+                "Compression requires the (missing) bz2 module")
+    elif compression == ZIP_LZMA:
+        if not lzma:
+            raise RuntimeError(
+                "Compression requires the (missing) lzma module")
+    else:
+        raise NotImplementedError("That compression method is not supported")
+
+
+def _get_compressor(compress_type, compresslevel=None):
+    if compress_type == ZIP_DEFLATED:
+        if compresslevel is not None:
+            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
+        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
+    elif compress_type == ZIP_BZIP2:
+        if compresslevel is not None:
+            return bz2.BZ2Compressor(compresslevel)
+        return bz2.BZ2Compressor()
+    # compresslevel is ignored for ZIP_LZMA
+    elif compress_type == ZIP_LZMA:
+        return LZMACompressor()
+    else:
+        return None
+
+
+def _get_decompressor(compress_type):
+    if compress_type == ZIP_STORED:
+        return None
+    elif compress_type == ZIP_DEFLATED:
+        return zlib.decompressobj(-15)
+    elif compress_type == ZIP_BZIP2:
+        return bz2.BZ2Decompressor()
+    elif compress_type == ZIP_LZMA:
+        return LZMADecompressor()
+    else:
+        descr = compressor_names.get(compress_type)
+        if descr:
+            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
+        else:
+            raise NotImplementedError("compression type %d" % (compress_type,))
+
+
+class _SharedFile:
+    def __init__(self, file, pos, close, lock, writing):
+        self._file = file
+        self._pos = pos
+        self._close = close
+        self._lock = lock
+        self._writing = writing
+        self.seekable = file.seekable
+        self.tell = file.tell
+
+    def seek(self, offset, whence=0):
+        with self._lock:
+            if self._writing():
+                raise ValueError("Can't reposition in the ZIP file while "
+                        "there is an open writing handle on it. "
+                        "Close the writing handle before trying to read.")
+            self._file.seek(offset, whence)
+            self._pos = self._file.tell()
+            return self._pos
+
+    def read(self, n=-1):
+        with self._lock:
+            if self._writing():
+                raise ValueError("Can't read from the ZIP file while there "
+                        "is an open writing handle on it. "
+                        "Close the writing handle before trying to read.")
+            self._file.seek(self._pos)
+            data = self._file.read(n)
+            self._pos = self._file.tell()
+            return data
+
+    def close(self):
+        if self._file is not None:
+            fileobj = self._file
+            self._file = None
+            self._close(fileobj)
+
+# Provide the tell method for unseekable stream
+class _Tellable:
+    def __init__(self, fp):
+        self.fp = fp
+        self.offset = 0
+
+    def write(self, data):
+        n = self.fp.write(data)
+        self.offset += n
+        return n
+
+    def tell(self):
+        return self.offset
+
+    def flush(self):
+        self.fp.flush()
+
+    def close(self):
+        self.fp.close()
+
+
+class ZipExtFile(io.BufferedIOBase):
+    """File-like object for reading an archive member.
+       Is returned by ZipFile.open().
+    """
+
+    # Max size supported by decompressor.
+    MAX_N = 1 << 31 - 1
+
+    # Read from compressed files in 4k blocks.
+    MIN_READ_SIZE = 4096
+
+    # Chunk size to read during seek
+    MAX_SEEK_READ = 1 << 24
+
+    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
+                 close_fileobj=False):
+        self._fileobj = fileobj
+        self._decrypter = decrypter
+        self._close_fileobj = close_fileobj
+
+        self._compress_type = zipinfo.compress_type
+        self._compress_left = zipinfo.compress_size
+        self._left = zipinfo.file_size
+
+        self._decompressor = _get_decompressor(self._compress_type)
+
+        self._eof = False
+        self._readbuffer = b''
+        self._offset = 0
+
+        self.newlines = None
+
+        # Adjust read size for encrypted files since the first 12 bytes
+        # are for the encryption/password information.
+        if self._decrypter is not None:
+            self._compress_left -= 12
+
+        self.mode = mode
+        self.name = zipinfo.filename
+
+        if hasattr(zipinfo, 'CRC'):
+            self._expected_crc = zipinfo.CRC
+            self._running_crc = crc32(b'')
+        else:
+            self._expected_crc = None
+
+        self._seekable = False
+        try:
+            if fileobj.seekable():
+                self._orig_compress_start = fileobj.tell()
+                self._orig_compress_size = zipinfo.compress_size
+                self._orig_file_size = zipinfo.file_size
+                self._orig_start_crc = self._running_crc
+                self._seekable = True
+        except AttributeError:
+            pass
+
+    def __repr__(self):
+        result = ['<%s.%s' % (self.__class__.__module__,
+                              self.__class__.__qualname__)]
+        if not self.closed:
+            result.append(' name=%r mode=%r' % (self.name, self.mode))
+            if self._compress_type != ZIP_STORED:
+                result.append(' compress_type=%s' %
+                              compressor_names.get(self._compress_type,
+                                                   self._compress_type))
+        else:
+            result.append(' [closed]')
+        result.append('>')
+        return ''.join(result)
+
+    def readline(self, limit=-1):
+        """Read and return a line from the stream.
+
+        If limit is specified, at most limit bytes will be read.
+        """
+
+        if limit < 0:
+            # Shortcut common case - newline found in buffer.
+            i = self._readbuffer.find(b'\n', self._offset) + 1
+            if i > 0:
+                line = self._readbuffer[self._offset: i]
+                self._offset = i
+                return line
+
+        return io.BufferedIOBase.readline(self, limit)
+
+    def peek(self, n=1):
+        """Returns buffered bytes without advancing the position."""
+        if n > len(self._readbuffer) - self._offset:
+            chunk = self.read(n)
+            if len(chunk) > self._offset:
+                self._readbuffer = chunk + self._readbuffer[self._offset:]
+                self._offset = 0
+            else:
+                self._offset -= len(chunk)
+
+        # Return up to 512 bytes to reduce allocation overhead for tight loops.
+        return self._readbuffer[self._offset: self._offset + 512]
+
+    def readable(self):
+        return True
+
+    def read(self, n=-1):
+        """Read and return up to n bytes.
+        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
+        """
+        if n is None or n < 0:
+            buf = self._readbuffer[self._offset:]
+            self._readbuffer = b''
+            self._offset = 0
+            while not self._eof:
+                buf += self._read1(self.MAX_N)
+            return buf
+
+        end = n + self._offset
+        if end < len(self._readbuffer):
+            buf = self._readbuffer[self._offset:end]
+            self._offset = end
+            return buf
+
+        n = end - len(self._readbuffer)
+        buf = self._readbuffer[self._offset:]
+        self._readbuffer = b''
+        self._offset = 0
+        while n > 0 and not self._eof:
+            data = self._read1(n)
+            if n < len(data):
+                self._readbuffer = data
+                self._offset = n
+                buf += data[:n]
+                break
+            buf += data
+            n -= len(data)
+        return buf
+
+    def _update_crc(self, newdata):
+        # Update the CRC using the given data.
+        if self._expected_crc is None:
+            # No need to compute the CRC if we don't have a reference value
+            return
+        self._running_crc = crc32(newdata, self._running_crc)
+        # Check the CRC if we're at the end of the file
+        if self._eof and self._running_crc != self._expected_crc:
+            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
+
+    def read1(self, n):
+        """Read up to n bytes with at most one read() system call."""
+
+        if n is None or n < 0:
+            buf = self._readbuffer[self._offset:]
+            self._readbuffer = b''
+            self._offset = 0
+            while not self._eof:
+                data = self._read1(self.MAX_N)
+                if data:
+                    buf += data
+                    break
+            return buf
+
+        end = n + self._offset
+        if end < len(self._readbuffer):
+            buf = self._readbuffer[self._offset:end]
+            self._offset = end
+            return buf
+
+        n = end - len(self._readbuffer)
+        buf = self._readbuffer[self._offset:]
+        self._readbuffer = b''
+        self._offset = 0
+        if n > 0:
+            while not self._eof:
+                data = self._read1(n)
+                if n < len(data):
+                    self._readbuffer = data
+                    self._offset = n
+                    buf += data[:n]
+                    break
+                if data:
+                    buf += data
+                    break
+        return buf
+
+    def _read1(self, n):
+        # Read up to n compressed bytes with at most one read() system call,
+        # decrypt and decompress them.
+        if self._eof or n <= 0:
+            return b''
+
+        # Read from file.
+        if self._compress_type == ZIP_DEFLATED:
+            ## Handle unconsumed data.
+            data = self._decompressor.unconsumed_tail
+            if n > len(data):
+                data += self._read2(n - len(data))
+        else:
+            data = self._read2(n)
+
+        if self._compress_type == ZIP_STORED:
+            self._eof = self._compress_left <= 0
+        elif self._compress_type == ZIP_DEFLATED:
+            n = max(n, self.MIN_READ_SIZE)
+            data = self._decompressor.decompress(data, n)
+            self._eof = (self._decompressor.eof or
+                         self._compress_left <= 0 and
+                         not self._decompressor.unconsumed_tail)
+            if self._eof:
+                data += self._decompressor.flush()
+        else:
+            data = self._decompressor.decompress(data)
+            self._eof = self._decompressor.eof or self._compress_left <= 0
+
+        data = data[:self._left]
+        self._left -= len(data)
+        if self._left <= 0:
+            self._eof = True
+        self._update_crc(data)
+        return data
+
+    def _read2(self, n):
+        if self._compress_left <= 0:
+            return b''
+
+        n = max(n, self.MIN_READ_SIZE)
+        n = min(n, self._compress_left)
+
+        data = self._fileobj.read(n)
+        self._compress_left -= len(data)
+        if not data:
+            raise EOFError
+
+        if self._decrypter is not None:
+            data = self._decrypter(data)
+        return data
+
+    def close(self):
+        try:
+            if self._close_fileobj:
+                self._fileobj.close()
+        finally:
+            super().close()
+
+    def seekable(self):
+        return self._seekable
+
+    def seek(self, offset, whence=0):
+        if not self._seekable:
+            raise io.UnsupportedOperation("underlying stream is not seekable")
+        curr_pos = self.tell()
+        if whence == 0: # Seek from start of file
+            new_pos = offset
+        elif whence == 1: # Seek from current position
+            new_pos = curr_pos + offset
+        elif whence == 2: # Seek from EOF
+            new_pos = self._orig_file_size + offset
+        else:
+            raise ValueError("whence must be os.SEEK_SET (0), "
+                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
+
+        if new_pos > self._orig_file_size:
+            new_pos = self._orig_file_size
+
+        if new_pos < 0:
+            new_pos = 0
+
+        read_offset = new_pos - curr_pos
+        buff_offset = read_offset + self._offset
+
+        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
+            # Just move the _offset index if the new position is in the _readbuffer
+            self._offset = buff_offset
+            read_offset = 0
+        elif read_offset < 0:
+            # Position is before the current position. Reset the ZipExtFile
+            self._fileobj.seek(self._orig_compress_start)
+            self._running_crc = self._orig_start_crc
+            self._compress_left = self._orig_compress_size
+            self._left = self._orig_file_size
+            self._readbuffer = b''
+            self._offset = 0
+            self._decompressor = _get_decompressor(self._compress_type)
+            self._eof = False
+            read_offset = new_pos
+
+        while read_offset > 0:
+            read_len = min(self.MAX_SEEK_READ, read_offset)
+            self.read(read_len)
+            read_offset -= read_len
+
+        return self.tell()
+
+    def tell(self):
+        if not self._seekable:
+            raise io.UnsupportedOperation("underlying stream is not seekable")
+        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
+        return filepos
+
+
+class _ZipWriteFile(io.BufferedIOBase):
+    def __init__(self, zf, zinfo, zip64):
+        self._zinfo = zinfo
+        self._zip64 = zip64
+        self._zipfile = zf
+        self._compressor = _get_compressor(zinfo.compress_type,
+                                           zinfo._compresslevel)
+        self._file_size = 0
+        self._compress_size = 0
+        self._crc = 0
+
+    @property
+    def _fileobj(self):
+        return self._zipfile.fp
+
+    def writable(self):
+        return True
+
+    def write(self, data):
+        if self.closed:
+            raise ValueError('I/O operation on closed file.')
+        nbytes = len(data)
+        self._file_size += nbytes
+        self._crc = crc32(data, self._crc)
+        if self._compressor:
+            data = self._compressor.compress(data)
+            self._compress_size += len(data)
+        self._fileobj.write(data)
+        return nbytes
+
+    def close(self):
+        if self.closed:
+            return
+        super().close()
+        # Flush any data from the compressor, and update header info
+        if self._compressor:
+            buf = self._compressor.flush()
+            self._compress_size += len(buf)
+            self._fileobj.write(buf)
+            self._zinfo.compress_size = self._compress_size
+        else:
+            self._zinfo.compress_size = self._file_size
+        self._zinfo.CRC = self._crc
+        self._zinfo.file_size = self._file_size
+
+        # Write updated header info
+        if self._zinfo.flag_bits & 0x08:
+            # Write CRC and file sizes after the file data
+            fmt = '<LQQ' if self._zip64 else '<LLL'
+            self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
+                self._zinfo.compress_size, self._zinfo.file_size))
+            self._zipfile.start_dir = self._fileobj.tell()
+        else:
+            if not self._zip64:
+                if self._file_size > ZIP64_LIMIT:
+                    raise RuntimeError('File size unexpectedly exceeded ZIP64 '
+                                       'limit')
+                if self._compress_size > ZIP64_LIMIT:
+                    raise RuntimeError('Compressed size unexpectedly exceeded '
+                                       'ZIP64 limit')
+            # Seek backwards and write file header (which will now include
+            # correct CRC and file sizes)
+
+            # Preserve current position in file
+            self._zipfile.start_dir = self._fileobj.tell()
+            self._fileobj.seek(self._zinfo.header_offset)
+            self._fileobj.write(self._zinfo.FileHeader(self._zip64))
+            self._fileobj.seek(self._zipfile.start_dir)
+
+        self._zipfile._writing = False
+
+        # Successfully written: Add file to our caches
+        self._zipfile.filelist.append(self._zinfo)
+        self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
+
+class ZipFile:
+    """ Class with methods to open, read, write, close, list zip files.
+
+    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
+                compresslevel=None)
+
+    file: Either the path to the file, or a file-like object.
+          If it is a path, the file will be opened and closed by ZipFile.
+    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
+          or append 'a'.
+    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
+                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
+    allowZip64: if True ZipFile will create files with ZIP64 extensions when
+                needed, otherwise it will raise an exception when this would
+                be necessary.
+    compresslevel: None (default for the given compression type) or an integer
+                   specifying the level to pass to the compressor.
+                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
+                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
+                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
+
+    """
+
+    fp = None                   # Set here since __del__ checks it
+    _windows_illegal_name_trans_table = None
+
+    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
+                 compresslevel=None):
+        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
+        or append 'a'."""
+        if mode not in ('r', 'w', 'x', 'a'):
+            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
+
+        _check_compression(compression)
+
+        self._allowZip64 = allowZip64
+        self._didModify = False
+        self.debug = 0  # Level of printing: 0 through 3
+        self.NameToInfo = {}    # Find file info given name
+        self.filelist = []      # List of ZipInfo instances for archive
+        self.compression = compression  # Method of compression
+        self.compresslevel = compresslevel
+        self.mode = mode
+        self.pwd = None
+        self._comment = b''
+
+        # Check if we were passed a file-like object
+        if isinstance(file, os.PathLike):
+            file = os.fspath(file)
+        if isinstance(file, str):
+            # No, it's a filename
+            self._filePassed = 0
+            self.filename = file
+            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
+                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
+            filemode = modeDict[mode]
+            while True:
+                try:
+                    self.fp = io.open(file, filemode)
+                except OSError:
+                    if filemode in modeDict:
+                        filemode = modeDict[filemode]
+                        continue
+                    raise
+                break
+        else:
+            self._filePassed = 1
+            self.fp = file
+            self.filename = getattr(file, 'name', None)
+        self._fileRefCnt = 1
+        self._lock = threading.RLock()
+        self._seekable = True
+        self._writing = False
+
+        try:
+            if mode == 'r':
+                self._RealGetContents()
+            elif mode in ('w', 'x'):
+                # set the modified flag so central directory gets written
+                # even if no files are added to the archive
+                self._didModify = True
+                try:
+                    self.start_dir = self.fp.tell()
+                except (AttributeError, OSError):
+                    self.fp = _Tellable(self.fp)
+                    self.start_dir = 0
+                    self._seekable = False
+                else:
+                    # Some file-like objects can provide tell() but not seek()
+                    try:
+                        self.fp.seek(self.start_dir)
+                    except (AttributeError, OSError):
+                        self._seekable = False
+            elif mode == 'a':
+                try:
+                    # See if file is a zip file
+                    self._RealGetContents()
+                    # seek to start of directory and overwrite
+                    self.fp.seek(self.start_dir)
+                except BadZipFile:
+                    # file is not a zip file, just append
+                    self.fp.seek(0, 2)
+
+                    # set the modified flag so central directory gets written
+                    # even if no files are added to the archive
+                    self._didModify = True
+                    self.start_dir = self.fp.tell()
+            else:
+                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
+        except:
+            fp = self.fp
+            self.fp = None
+            self._fpclose(fp)
+            raise
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
+    def __repr__(self):
+        result = ['<%s.%s' % (self.__class__.__module__,
+                              self.__class__.__qualname__)]
+        if self.fp is not None:
+            if self._filePassed:
+                result.append(' file=%r' % self.fp)
+            elif self.filename is not None:
+                result.append(' filename=%r' % self.filename)
+            result.append(' mode=%r' % self.mode)
+        else:
+            result.append(' [closed]')
+        result.append('>')
+        return ''.join(result)
+
+    def _RealGetContents(self):
+        """Read in the table of contents for the ZIP file."""
+        fp = self.fp
+        try:
+            endrec = _EndRecData(fp)
+        except OSError:
+            raise BadZipFile("File is not a zip file")
+        if not endrec:
+            raise BadZipFile("File is not a zip file")
+        if self.debug > 1:
+            print(endrec)
+        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
+        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
+        self._comment = endrec[_ECD_COMMENT]    # archive comment
+
+        # "concat" is zero, unless zip was concatenated to another file
+        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
+            # If Zip64 extension structures are present, account for them
+            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+
+        if self.debug > 2:
+            inferred = concat + offset_cd
+            print("given, inferred, offset", offset_cd, inferred, concat)
+        # self.start_dir:  Position of start of central directory
+        self.start_dir = offset_cd + concat
+        fp.seek(self.start_dir, 0)
+        data = fp.read(size_cd)
+        fp = io.BytesIO(data)
+        total = 0
+        while total < size_cd:
+            centdir = fp.read(sizeCentralDir)
+            if len(centdir) != sizeCentralDir:
+                raise BadZipFile("Truncated central directory")
+            centdir = struct.unpack(structCentralDir, centdir)
+            if centdir[_CD_SIGNATURE] != stringCentralDir:
+                raise BadZipFile("Bad magic number for central directory")
+            if self.debug > 2:
+                print(centdir)
+            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
+            flags = centdir[5]
+            if flags & 0x800:
+                # UTF-8 file names extension
+                filename = filename.decode('utf-8')
+            else:
+                # Historical ZIP filename encoding
+                filename = filename.decode('cp437')
+            # Create ZipInfo instance to store file information
+            x = ZipInfo(filename)
+            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
+            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
+            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
+            (x.create_version, x.create_system, x.extract_version, x.reserved,
+             x.flag_bits, x.compress_type, t, d,
+             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
+            if x.extract_version > MAX_EXTRACT_VERSION:
+                raise NotImplementedError("zip file version %.1f" %
+                                          (x.extract_version / 10))
+            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
+            # Convert date/time code to (year, month, day, hour, min, sec)
+            x._raw_time = t
+            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
+                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+
+            x._decodeExtra()
+            x.header_offset = x.header_offset + concat
+            self.filelist.append(x)
+            self.NameToInfo[x.filename] = x
+
+            # update total bytes read from central directory
+            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
+                     + centdir[_CD_EXTRA_FIELD_LENGTH]
+                     + centdir[_CD_COMMENT_LENGTH])
+
+            if self.debug > 2:
+                print("total", total)
+
+
+    def namelist(self):
+        """Return a list of file names in the archive."""
+        return [data.filename for data in self.filelist]
+
+    def infolist(self):
+        """Return a list of class ZipInfo instances for files in the
+        archive."""
+        return self.filelist
+
+    def printdir(self, file=None):
+        """Print a table of contents for the zip file."""
+        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
+              file=file)
+        for zinfo in self.filelist:
+            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
+            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
+                  file=file)
+
+    def testzip(self):
+        """Read all the files and check the CRC."""
+        chunk_size = 2 ** 20
+        for zinfo in self.filelist:
+            try:
+                # Read by chunks, to avoid an OverflowError or a
+                # MemoryError with very large embedded files.
+                with self.open(zinfo.filename, "r") as f:
+                    while f.read(chunk_size):     # Check CRC-32
+                        pass
+            except BadZipFile:
+                return zinfo.filename
+
+    def getinfo(self, name):
+        """Return the instance of ZipInfo given 'name'."""
+        info = self.NameToInfo.get(name)
+        if info is None:
+            raise KeyError(
+                'There is no item named %r in the archive' % name)
+
+        return info
+
+    def setpassword(self, pwd):
+        """Set default password for encrypted files."""
+        if pwd and not isinstance(pwd, bytes):
+            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
+        if pwd:
+            self.pwd = pwd
+        else:
+            self.pwd = None
+
+    @property
+    def comment(self):
+        """The comment text associated with the ZIP file."""
+        return self._comment
+
+    @comment.setter
+    def comment(self, comment):
+        if not isinstance(comment, bytes):
+            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
+        # check for valid comment length
+        if len(comment) > ZIP_MAX_COMMENT:
+            import warnings
+            warnings.warn('Archive comment is too long; truncating to %d bytes'
+                          % ZIP_MAX_COMMENT, stacklevel=2)
+            comment = comment[:ZIP_MAX_COMMENT]
+        self._comment = comment
+        self._didModify = True
+
+    def read(self, name, pwd=None):
+        """Return file bytes (as a string) for name."""
+        with self.open(name, "r", pwd) as fp:
+            return fp.read()
+
+    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
+        """Return file-like object for 'name'.
+
+        name is a string for the file name within the ZIP file, or a ZipInfo
+        object.
+
+        mode should be 'r' to read a file already in the ZIP file, or 'w' to
+        write to a file newly added to the archive.
+
+        pwd is the password to decrypt files (only used for reading).
+
+        When writing, if the file size is not known in advance but may exceed
+        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
+        files.  If the size is known in advance, it is best to pass a ZipInfo
+        instance for name, with zinfo.file_size set.
+        """
+        if mode not in {"r", "w"}:
+            raise ValueError('open() requires mode "r" or "w"')
+        if pwd and not isinstance(pwd, bytes):
+            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
+        if pwd and (mode == "w"):
+            raise ValueError("pwd is only supported for reading files")
+        if not self.fp:
+            raise ValueError(
+                "Attempt to use ZIP archive that was already closed")
+
+        # Make sure we have an info object
+        if isinstance(name, ZipInfo):
+            # 'name' is already an info object
+            zinfo = name
+        elif mode == 'w':
+            zinfo = ZipInfo(name)
+            zinfo.compress_type = self.compression
+            zinfo._compresslevel = self.compresslevel
+        else:
+            # Get info object for name
+            zinfo = self.getinfo(name)
+
+        if mode == 'w':
+            return self._open_to_write(zinfo, force_zip64=force_zip64)
+
+        if self._writing:
+            raise ValueError("Can't read from the ZIP file while there "
+                    "is an open writing handle on it. "
+                    "Close the writing handle before trying to read.")
+
+        # Open for reading:
+        self._fileRefCnt += 1
+        zef_file = _SharedFile(self.fp, zinfo.header_offset,
+                               self._fpclose, self._lock, lambda: self._writing)
+        try:
+            # Skip the file header:
+            fheader = zef_file.read(sizeFileHeader)
+            if len(fheader) != sizeFileHeader:
+                raise BadZipFile("Truncated file header")
+            fheader = struct.unpack(structFileHeader, fheader)
+            if fheader[_FH_SIGNATURE] != stringFileHeader:
+                raise BadZipFile("Bad magic number for file header")
+
+            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
+            if fheader[_FH_EXTRA_FIELD_LENGTH]:
+                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+
+            if zinfo.flag_bits & 0x20:
+                # Zip 2.7: compressed patched data
+                raise NotImplementedError("compressed patched data (flag bit 5)")
+
+            if zinfo.flag_bits & 0x40:
+                # strong encryption
+                raise NotImplementedError("strong encryption (flag bit 6)")
+
+            if zinfo.flag_bits & 0x800:
+                # UTF-8 filename
+                fname_str = fname.decode("utf-8")
+            else:
+                fname_str = fname.decode("cp437")
+
+            if fname_str != zinfo.orig_filename:
+                raise BadZipFile(
+                    'File name in directory %r and header %r differ.'
+                    % (zinfo.orig_filename, fname))
+
+            # check for encrypted flag & handle password
+            is_encrypted = zinfo.flag_bits & 0x1
+            zd = None
+            if is_encrypted:
+                if not pwd:
+                    pwd = self.pwd
+                if not pwd:
+                    raise RuntimeError("File %r is encrypted, password "
+                                       "required for extraction" % name)
+
+                zd = _ZipDecrypter(pwd)
+                # The first 12 bytes in the cypher stream is an encryption header
+                #  used to strengthen the algorithm. The first 11 bytes are
+                #  completely random, while the 12th contains the MSB of the CRC,
+                #  or the MSB of the file time depending on the header type
+                #  and is used to check the correctness of the password.
+                header = zef_file.read(12)
+                h = zd(header[0:12])
+                if zinfo.flag_bits & 0x8:
+                    # compare against the file type from extended local headers
+                    check_byte = (zinfo._raw_time >> 8) & 0xff
+                else:
+                    # compare against the CRC otherwise
+                    check_byte = (zinfo.CRC >> 24) & 0xff
+                if h[11] != check_byte:
+                    raise RuntimeError("Bad password for file %r" % name)
+
+            return ZipExtFile(zef_file, mode, zinfo, zd, True)
+        except:
+            zef_file.close()
+            raise
+
+    def _open_to_write(self, zinfo, force_zip64=False):
+        if force_zip64 and not self._allowZip64:
+            raise ValueError(
+                "force_zip64 is True, but allowZip64 was False when opening "
+                "the ZIP file."
+            )
+        if self._writing:
+            raise ValueError("Can't write to the ZIP file while there is "
+                             "another write handle open on it. "
+                             "Close the first handle before opening another.")
+
+        # Sizes and CRC are overwritten with correct data after processing the file
+        if not hasattr(zinfo, 'file_size'):
+            zinfo.file_size = 0
+        zinfo.compress_size = 0
+        zinfo.CRC = 0
+
+        zinfo.flag_bits = 0x00
+        if zinfo.compress_type == ZIP_LZMA:
+            # Compressed data includes an end-of-stream (EOS) marker
+            zinfo.flag_bits |= 0x02
+        if not self._seekable:
+            zinfo.flag_bits |= 0x08
+
+        if not zinfo.external_attr:
+            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
+
+        # Compressed size can be larger than uncompressed size
+        zip64 = self._allowZip64 and \
+                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
+
+        if self._seekable:
+            self.fp.seek(self.start_dir)
+        zinfo.header_offset = self.fp.tell()
+
+        self._writecheck(zinfo)
+        self._didModify = True
+
+        self.fp.write(zinfo.FileHeader(zip64))
+
+        self._writing = True
+        return _ZipWriteFile(self, zinfo, zip64)
+
+    def extract(self, member, path=None, pwd=None):
+        """Extract a member from the archive to the current working directory,
+           using its full name. Its file information is extracted as accurately
+           as possible. `member' may be a filename or a ZipInfo object. You can
+           specify a different directory using `path'.
+        """
+        if path is None:
+            path = os.getcwd()
+        else:
+            path = os.fspath(path)
+
+        return self._extract_member(member, path, pwd)
+
+    def extractall(self, path=None, members=None, pwd=None):
+        """Extract all members from the archive to the current working
+           directory. `path' specifies a different directory to extract to.
+           `members' is optional and must be a subset of the list returned
+           by namelist().
+        """
+        if members is None:
+            members = self.namelist()
+
+        if path is None:
+            path = os.getcwd()
+        else:
+            path = os.fspath(path)
+
+        for zipinfo in members:
+            self._extract_member(zipinfo, path, pwd)
+
+    @classmethod
+    def _sanitize_windows_name(cls, arcname, pathsep):
+        """Replace bad characters and remove trailing dots from parts."""
+        table = cls._windows_illegal_name_trans_table
+        if not table:
+            illegal = ':<>|"?*'
+            table = str.maketrans(illegal, '_' * len(illegal))
+            cls._windows_illegal_name_trans_table = table
+        arcname = arcname.translate(table)
+        # remove trailing dots
+        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
+        # rejoin, removing empty parts.
+        arcname = pathsep.join(x for x in arcname if x)
+        return arcname
+
+    def _extract_member(self, member, targetpath, pwd):
+        """Extract the ZipInfo object 'member' to a physical
+           file on the path targetpath.
+        """
+        if not isinstance(member, ZipInfo):
+            member = self.getinfo(member)
+
+        # build the destination pathname, replacing
+        # forward slashes to platform specific separators.
+        arcname = member.filename.replace('/', os.path.sep)
+
+        if os.path.altsep:
+            arcname = arcname.replace(os.path.altsep, os.path.sep)
+        # interpret absolute pathname as relative, remove drive letter or
+        # UNC path, redundant separators, "." and ".." components.
+        arcname = os.path.splitdrive(arcname)[1]
+        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
+        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
+                                   if x not in invalid_path_parts)
+        if os.path.sep == '\\':
+            # filter illegal characters on Windows
+            arcname = self._sanitize_windows_name(arcname, os.path.sep)
+
+        targetpath = os.path.join(targetpath, arcname)
+        targetpath = os.path.normpath(targetpath)
+
+        # Create all upper directories if necessary.
+        upperdirs = os.path.dirname(targetpath)
+        if upperdirs and not os.path.exists(upperdirs):
+            os.makedirs(upperdirs)
+
+        if member.is_dir():
+            if not os.path.isdir(targetpath):
+                os.mkdir(targetpath)
+            return targetpath
+
+        with self.open(member, pwd=pwd) as source, \
+             open(targetpath, "wb") as target:
+            shutil.copyfileobj(source, target)
+
+        return targetpath
+
+    def _writecheck(self, zinfo):
+        """Check for errors before writing a file to the archive."""
+        if zinfo.filename in self.NameToInfo:
+            import warnings
+            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
+        if self.mode not in ('w', 'x', 'a'):
+            raise ValueError("write() requires mode 'w', 'x', or 'a'")
+        if not self.fp:
+            raise ValueError(
+                "Attempt to write ZIP archive that was already closed")
+        _check_compression(zinfo.compress_type)
+        if not self._allowZip64:
+            requires_zip64 = None
+            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
+                requires_zip64 = "Files count"
+            elif zinfo.file_size > ZIP64_LIMIT:
+                requires_zip64 = "Filesize"
+            elif zinfo.header_offset > ZIP64_LIMIT:
+                requires_zip64 = "Zipfile size"
+            if requires_zip64:
+                raise LargeZipFile(requires_zip64 +
+                                   " would require ZIP64 extensions")
+
+    def write(self, filename, arcname=None,
+              compress_type=None, compresslevel=None):
+        """Put the bytes from filename into the archive under the name
+        arcname."""
+        if not self.fp:
+            raise ValueError(
+                "Attempt to write to ZIP archive that was already closed")
+        if self._writing:
+            raise ValueError(
+                "Can't write to ZIP archive while an open writing handle exists"
+            )
+
+        zinfo = ZipInfo.from_file(filename, arcname)
+
+        if zinfo.is_dir():
+            zinfo.compress_size = 0
+            zinfo.CRC = 0
+        else:
+            if compress_type is not None:
+                zinfo.compress_type = compress_type
+            else:
+                zinfo.compress_type = self.compression
+
+            if compresslevel is not None:
+                zinfo._compresslevel = compresslevel
+            else:
+                zinfo._compresslevel = self.compresslevel
+
+        if zinfo.is_dir():
+            with self._lock:
+                if self._seekable:
+                    self.fp.seek(self.start_dir)
+                zinfo.header_offset = self.fp.tell()  # Start of header bytes
+                if zinfo.compress_type == ZIP_LZMA:
+                # Compressed data includes an end-of-stream (EOS) marker
+                    zinfo.flag_bits |= 0x02
+
+                self._writecheck(zinfo)
+                self._didModify = True
+
+                self.filelist.append(zinfo)
+                self.NameToInfo[zinfo.filename] = zinfo
+                self.fp.write(zinfo.FileHeader(False))
+                self.start_dir = self.fp.tell()
+        else:
+            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
+                shutil.copyfileobj(src, dest, 1024*8)
+
+    def writestr(self, zinfo_or_arcname, data,
+                 compress_type=None, compresslevel=None):
+        """Write a file into the archive.  The contents is 'data', which
+        may be either a 'str' or a 'bytes' instance; if it is a 'str',
+        it is encoded as UTF-8 first.
+        'zinfo_or_arcname' is either a ZipInfo instance or
+        the name of the file in the archive."""
+        if isinstance(data, str):
+            data = data.encode("utf-8")
+        if not isinstance(zinfo_or_arcname, ZipInfo):
+            zinfo = ZipInfo(filename=zinfo_or_arcname,
+                            date_time=time.localtime(time.time())[:6])
+            zinfo.compress_type = self.compression
+            zinfo._compresslevel = self.compresslevel
+            if zinfo.filename[-1] == '/':
+                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
+                zinfo.external_attr |= 0x10           # MS-DOS directory flag
+            else:
+                zinfo.external_attr = 0o600 << 16     # ?rw-------
+        else:
+            zinfo = zinfo_or_arcname
+
+        if not self.fp:
+            raise ValueError(
+                "Attempt to write to ZIP archive that was already closed")
+        if self._writing:
+            raise ValueError(
+                "Can't write to ZIP archive while an open writing handle exists."
+            )
+
+        if compress_type is not None:
+            zinfo.compress_type = compress_type
+
+        if compresslevel is not None:
+            zinfo._compresslevel = compresslevel
+
+        zinfo.file_size = len(data)            # Uncompressed size
+        with self._lock:
+            with self.open(zinfo, mode='w') as dest:
+                dest.write(data)
+
+    def __del__(self):
+        """Call the "close()" method in case the user forgot."""
+        self.close()
+
+    def close(self):
+        """Close the file, and for mode 'w', 'x' and 'a' write the ending
+        records."""
+        if self.fp is None:
+            return
+
+        if self._writing:
+            raise ValueError("Can't close the ZIP file while there is "
+                             "an open writing handle on it. "
+                             "Close the writing handle before closing the zip.")
+
+        try:
+            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
+                with self._lock:
+                    if self._seekable:
+                        self.fp.seek(self.start_dir)
+                    self._write_end_record()
+        finally:
+            fp = self.fp
+            self.fp = None
+            self._fpclose(fp)
+
+    def _write_end_record(self):
+        for zinfo in self.filelist:         # write central directory
+            dt = zinfo.date_time
+            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+            extra = []
+            if zinfo.file_size > ZIP64_LIMIT \
+               or zinfo.compress_size > ZIP64_LIMIT:
+                extra.append(zinfo.file_size)
+                extra.append(zinfo.compress_size)
+                file_size = 0xffffffff
+                compress_size = 0xffffffff
+            else:
+                file_size = zinfo.file_size
+                compress_size = zinfo.compress_size
+
+            if zinfo.header_offset > ZIP64_LIMIT:
+                extra.append(zinfo.header_offset)
+                header_offset = 0xffffffff
+            else:
+                header_offset = zinfo.header_offset
+
+            extra_data = zinfo.extra
+            min_version = 0
+            if extra:
+                # Append a ZIP64 field to the extra's
+                extra_data = struct.pack(
+                    '<HH' + 'Q'*len(extra),
+                    1, 8*len(extra), *extra) + extra_data
+
+                min_version = ZIP64_VERSION
+
+            if zinfo.compress_type == ZIP_BZIP2:
+                min_version = max(BZIP2_VERSION, min_version)
+            elif zinfo.compress_type == ZIP_LZMA:
+                min_version = max(LZMA_VERSION, min_version)
+
+            extract_version = max(min_version, zinfo.extract_version)
+            create_version = max(min_version, zinfo.create_version)
+            try:
+                filename, flag_bits = zinfo._encodeFilenameFlags()
+                centdir = struct.pack(structCentralDir,
+                                      stringCentralDir, create_version,
+                                      zinfo.create_system, extract_version, zinfo.reserved,
+                                      flag_bits, zinfo.compress_type, dostime, dosdate,
+                                      zinfo.CRC, compress_size, file_size,
+                                      len(filename), len(extra_data), len(zinfo.comment),
+                                      0, zinfo.internal_attr, zinfo.external_attr,
+                                      header_offset)
+            except DeprecationWarning:
+                print((structCentralDir, stringCentralDir, create_version,
+                       zinfo.create_system, extract_version, zinfo.reserved,
+                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+                       zinfo.CRC, compress_size, file_size,
+                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
+                       0, zinfo.internal_attr, zinfo.external_attr,
+                       header_offset), file=sys.stderr)
+                raise
+            self.fp.write(centdir)
+            self.fp.write(filename)
+            self.fp.write(extra_data)
+            self.fp.write(zinfo.comment)
+
+        pos2 = self.fp.tell()
+        # Write end-of-zip-archive record
+        centDirCount = len(self.filelist)
+        centDirSize = pos2 - self.start_dir
+        centDirOffset = self.start_dir
+        requires_zip64 = None
+        if centDirCount > ZIP_FILECOUNT_LIMIT:
+            requires_zip64 = "Files count"
+        elif centDirOffset > ZIP64_LIMIT:
+            requires_zip64 = "Central directory offset"
+        elif centDirSize > ZIP64_LIMIT:
+            requires_zip64 = "Central directory size"
+        if requires_zip64:
+            # Need to write the ZIP64 end-of-archive records
+            if not self._allowZip64:
+                raise LargeZipFile(requires_zip64 +
+                                   " would require ZIP64 extensions")
+            zip64endrec = struct.pack(
+                structEndArchive64, stringEndArchive64,
+                44, 45, 45, 0, 0, centDirCount, centDirCount,
+                centDirSize, centDirOffset)
+            self.fp.write(zip64endrec)
+
+            zip64locrec = struct.pack(
+                structEndArchive64Locator,
+                stringEndArchive64Locator, 0, pos2, 1)
+            self.fp.write(zip64locrec)
+            centDirCount = min(centDirCount, 0xFFFF)
+            centDirSize = min(centDirSize, 0xFFFFFFFF)
+            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
+
+        endrec = struct.pack(structEndArchive, stringEndArchive,
+                             0, 0, centDirCount, centDirCount,
+                             centDirSize, centDirOffset, len(self._comment))
+        self.fp.write(endrec)
+        self.fp.write(self._comment)
+        self.fp.flush()
+
+    def _fpclose(self, fp):
+        assert self._fileRefCnt > 0
+        self._fileRefCnt -= 1
+        if not self._fileRefCnt and not self._filePassed:
+            fp.close()
+
+
+class PyZipFile(ZipFile):
+    """Class to create ZIP archives with Python library files and packages."""
+
+    def __init__(self, file, mode="r", compression=ZIP_STORED,
+                 allowZip64=True, optimize=-1):
+        ZipFile.__init__(self, file, mode=mode, compression=compression,
+                         allowZip64=allowZip64)
+        self._optimize = optimize
+
+    def writepy(self, pathname, basename="", filterfunc=None):
+        """Add all files from "pathname" to the ZIP archive.
+
+        If pathname is a package directory, search the directory and
+        all package subdirectories recursively for all *.py and enter
+        the modules into the archive.  If pathname is a plain
+        directory, listdir *.py and enter all modules.  Else, pathname
+        must be a Python *.py file and the module will be put into the
+        archive.  Added modules are always module.pyc.
+        This method will compile the module.py into module.pyc if
+        necessary.
+        If filterfunc(pathname) is given, it is called with every argument.
+        When it is False, the file or directory is skipped.
+        """
+        pathname = os.fspath(pathname)
+        if filterfunc and not filterfunc(pathname):
+            if self.debug:
+                label = 'path' if os.path.isdir(pathname) else 'file'
+                print('%s %r skipped by filterfunc' % (label, pathname))
+            return
+        dir, name = os.path.split(pathname)
+        if os.path.isdir(pathname):
+            initname = os.path.join(pathname, "__init__.py")
+            if os.path.isfile(initname):
+                # This is a package directory, add it
+                if basename:
+                    basename = "%s/%s" % (basename, name)
+                else:
+                    basename = name
+                if self.debug:
+                    print("Adding package in", pathname, "as", basename)
+                fname, arcname = self._get_codename(initname[0:-3], basename)
+                if self.debug:
+                    print("Adding", arcname)
+                self.write(fname, arcname)
+                dirlist = sorted(os.listdir(pathname))
+                dirlist.remove("__init__.py")
+                # Add all *.py files and package subdirectories
+                for filename in dirlist:
+                    path = os.path.join(pathname, filename)
+                    root, ext = os.path.splitext(filename)
+                    if os.path.isdir(path):
+                        if os.path.isfile(os.path.join(path, "__init__.py")):
+                            # This is a package directory, add it
+                            self.writepy(path, basename,
+                                         filterfunc=filterfunc)  # Recursive call
+                    elif ext == ".py":
+                        if filterfunc and not filterfunc(path):
+                            if self.debug:
+                                print('file %r skipped by filterfunc' % path)
+                            continue
+                        fname, arcname = self._get_codename(path[0:-3],
+                                                            basename)
+                        if self.debug:
+                            print("Adding", arcname)
+                        self.write(fname, arcname)
+            else:
+                # This is NOT a package directory, add its files at top level
+                if self.debug:
+                    print("Adding files from directory", pathname)
+                for filename in sorted(os.listdir(pathname)):
+                    path = os.path.join(pathname, filename)
+                    root, ext = os.path.splitext(filename)
+                    if ext == ".py":
+                        if filterfunc and not filterfunc(path):
+                            if self.debug:
+                                print('file %r skipped by filterfunc' % path)
+                            continue
+                        fname, arcname = self._get_codename(path[0:-3],
+                                                            basename)
+                        if self.debug:
+                            print("Adding", arcname)
+                        self.write(fname, arcname)
+        else:
+            if pathname[-3:] != ".py":
+                raise RuntimeError(
+                    'Files added with writepy() must end with ".py"')
+            fname, arcname = self._get_codename(pathname[0:-3], basename)
+            if self.debug:
+                print("Adding file", arcname)
+            self.write(fname, arcname)
+
+    def _get_codename(self, pathname, basename):
+        """Return (filename, archivename) for the path.
+
+        Given a module name path, return the correct file path and
+        archive name, compiling if necessary.  For example, given
+        /python/lib/string, return (/python/lib/string.pyc, string).
+        """
+        def _compile(file, optimize=-1):
+            import py_compile
+            if self.debug:
+                print("Compiling", file)
+            try:
+                py_compile.compile(file, doraise=True, optimize=optimize)
+            except py_compile.PyCompileError as err:
+                print(err.msg)
+                return False
+            return True
+
+        file_py  = pathname + ".py"
+        file_pyc = pathname + ".pyc"
+        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
+        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
+        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
+        if self._optimize == -1:
+            # legacy mode: use whatever file is present
+            if (os.path.isfile(file_pyc) and
+                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
+                # Use .pyc file.
+                arcname = fname = file_pyc
+            elif (os.path.isfile(pycache_opt0) and
+                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
+                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
+                # file name in the archive.
+                fname = pycache_opt0
+                arcname = file_pyc
+            elif (os.path.isfile(pycache_opt1) and
+                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
+                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
+                # file name in the archive.
+                fname = pycache_opt1
+                arcname = file_pyc
+            elif (os.path.isfile(pycache_opt2) and
+                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
+                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
+                # file name in the archive.
+                fname = pycache_opt2
+                arcname = file_pyc
+            else:
+                # Compile py into PEP 3147 pyc file.
+                if _compile(file_py):
+                    if sys.flags.optimize == 0:
+                        fname = pycache_opt0
+                    elif sys.flags.optimize == 1:
+                        fname = pycache_opt1
+                    else:
+                        fname = pycache_opt2
+                    arcname = file_pyc
+                else:
+                    fname = arcname = file_py
+        else:
+            # new mode: use given optimization level
+            if self._optimize == 0:
+                fname = pycache_opt0
+                arcname = file_pyc
+            else:
+                arcname = file_pyc
+                if self._optimize == 1:
+                    fname = pycache_opt1
+                elif self._optimize == 2:
+                    fname = pycache_opt2
+                else:
+                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
+                    raise ValueError(msg)
+            if not (os.path.isfile(fname) and
+                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
+                if not _compile(file_py, optimize=self._optimize):
+                    fname = arcname = file_py
+        archivename = os.path.split(arcname)[1]
+        if basename:
+            archivename = "%s/%s" % (basename, archivename)
+        return (fname, archivename)
+
+
+def main(args=None):
+    import argparse
+
+    description = 'A simple command-line interface for zipfile module.'
+    parser = argparse.ArgumentParser(description=description)
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('-l', '--list', metavar='<zipfile>',
+                       help='Show listing of a zipfile')
+    group.add_argument('-e', '--extract', nargs=2,
+                       metavar=('<zipfile>', '<output_dir>'),
+                       help='Extract zipfile into target dir')
+    group.add_argument('-c', '--create', nargs='+',
+                       metavar=('<name>', '<file>'),
+                       help='Create zipfile from sources')
+    group.add_argument('-t', '--test', metavar='<zipfile>',
+                       help='Test if a zipfile is valid')
+    args = parser.parse_args(args)
+
+    if args.test is not None:
+        src = args.test
+        with ZipFile(src, 'r') as zf:
+            badfile = zf.testzip()
+        if badfile:
+            print("The following enclosed file is corrupted: {!r}".format(badfile))
+        print("Done testing")
+
+    elif args.list is not None:
+        src = args.list
+        with ZipFile(src, 'r') as zf:
+            zf.printdir()
+
+    elif args.extract is not None:
+        src, curdir = args.extract
+        with ZipFile(src, 'r') as zf:
+            zf.extractall(curdir)
+
+    elif args.create is not None:
+        zip_name = args.create.pop(0)
+        files = args.create
+
+        def addToZip(zf, path, zippath):
+            if os.path.isfile(path):
+                zf.write(path, zippath, ZIP_DEFLATED)
+            elif os.path.isdir(path):
+                if zippath:
+                    zf.write(path, zippath)
+                for nm in sorted(os.listdir(path)):
+                    addToZip(zf,
+                             os.path.join(path, nm), os.path.join(zippath, nm))
+            # else: ignore
+
+        with ZipFile(zip_name, 'w') as zf:
+            for path in files:
+                zippath = os.path.basename(path)
+                if not zippath:
+                    zippath = os.path.basename(os.path.dirname(path))
+                if zippath in ('', os.curdir, os.pardir):
+                    zippath = ''
+                addToZip(zf, path, zippath)
+
+if __name__ == "__main__":
+    main()
diff --git a/caradoc b/caradoc
new file mode 100755
index 0000000..97cc119
Binary files /dev/null and b/caradoc differ
diff --git a/pdfcat b/pdfcat
new file mode 100755
index 0000000..115eda7
--- /dev/null
+++ b/pdfcat
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+"""
+Concatenate pages from pdf files into a single pdf file.
+
+Page ranges refer to the previously-named file.
+A file not followed by a page range means all the pages of the file.
+
+PAGE RANGES are like Python slices.
+        {page_range_help}
+EXAMPLES
+    pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
+        Concatenate all of head.pdf, all but page seven of content.pdf,
+        and the last page of tail.pdf, producing output.pdf.
+
+    pdfcat chapter*.pdf >book.pdf
+        You can specify the output file by redirection.
+
+    pdfcat chapter?.pdf chapter10.pdf >book.pdf
+        In case you don't want chapter 10 before chapter 2.
+"""
+# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+# All rights reserved. This software is available under a BSD license;
+# see https://github.com/mstamy2/PyPDF2/LICENSE
+
+from __future__ import print_function
+import argparse
+from PdfFileTransformer.PyPDF2.pagerange import PAGE_RANGE_HELP
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=__doc__.format(page_range_help=PAGE_RANGE_HELP),
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("-o", "--output",
+                        metavar="output_file")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                        help="show page ranges as they are being read")
+    parser.add_argument("first_filename", nargs=1,
+                        metavar="filename [page range...]")
+    # argparse chokes on page ranges like "-2:" unless caught like this:
+    parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER,
+                        metavar="filenames and/or page ranges")
+    args = parser.parse_args()
+    args.fn_pgrgs.insert(0, args.first_filename[0])
+    return args
+
+
+from sys import stderr, stdout, exit
+import os
+import traceback
+from collections import defaultdict
+
+from PdfFileTransformer.PyPDF2 import PdfFileMerger, parse_filename_page_ranges
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs)
+    if args.output:
+        output = open(args.output, "wb")
+    else:
+        stdout.flush()
+        output = os.fdopen(stdout.fileno(), "wb")
+
+    merger = PdfFileMerger()
+    in_fs = dict()
+    try:
+        for (filename, page_range) in filename_page_ranges:
+            if args.verbose:
+                print(filename, page_range, file=stderr)
+            if filename not in in_fs:
+                in_fs[filename] = open(filename, "rb")
+            merger.append(in_fs[filename], pages=page_range)
+    except:
+        print(traceback.format_exc(), file=stderr)
+        print("Error while reading " + filename, file=stderr)
+        exit(1)
+    merger.write(output)
+    # In 3.0, input files must stay open until output is written.
+    # Not closing the in_fs because this script exits now.
diff --git a/tests/samples/descriptions.txt b/tests/samples/descriptions.txt
new file mode 100644
index 0000000..6d57430
--- /dev/null
+++ b/tests/samples/descriptions.txt
@@ -0,0 +1,7 @@
+== Zip files ==
+
+test1.zip: deux fichiers et commentaire global.
+
+== Pdf files ==
+
+test1.pdf: fichier des impots.
\ No newline at end of file
diff --git a/tests/samples/test1.pdf b/tests/samples/test1.pdf
new file mode 100644
index 0000000..3e79ae1
Binary files /dev/null and b/tests/samples/test1.pdf differ
diff --git a/tests/samples/test1.zip b/tests/samples/test1.zip
new file mode 100644
index 0000000..cd93f9d
Binary files /dev/null and b/tests/samples/test1.zip differ
diff --git a/tests/samples/test1_normalized.pdf b/tests/samples/test1_normalized.pdf
new file mode 100644
index 0000000..e955cb1
Binary files /dev/null and b/tests/samples/test1_normalized.pdf differ
diff --git a/tests/test_pdf_add_data.py b/tests/test_pdf_add_data.py
new file mode 100755
index 0000000..eb12d93
--- /dev/null
+++ b/tests/test_pdf_add_data.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+
+import logging
+from PdfFileTransformer import Pdf
+
+
+input_file = "./samples/test1.pdf"
+output_file = "./samples/test1_out.pdf"
+
+logging.basicConfig(level=logging.DEBUG)
+
+p = Pdf(input_file)
+p.insert_new_obj_stream_at_start(b'A' * 140)
+p.insert_new_obj_stream_at_end(b'B' * 120)
+f = open(output_file, 'wb')
+f.write(p.get_build_buffer())
+f.close()
diff --git a/tests/test_pdf_normalisation.py b/tests/test_pdf_normalisation.py
new file mode 100755
index 0000000..aba197e
--- /dev/null
+++ b/tests/test_pdf_normalisation.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+import logging
+from PdfFileTransformer.PyPDF2 import PdfFileReader, PdfFileWriter
+
+
+input_file = "./samples/test1.pdf"
+output_file = "./samples/test1_out.pdf"
+
+logging.basicConfig(level=logging.DEBUG)
+
+f_input = open(input_file, "rb")
+reader = PdfFileReader(f_input)
+
+f_output = open(output_file, "wb")
+writer = PdfFileWriter()
+
+writer.appendPagesFromReader(reader)
+writer.setHeader(b"%PDF-1.5")
+writer.write(f_output)
+
+f_input.close()
+f_output.close()
diff --git a/tests/test_pdf_rebuild.py b/tests/test_pdf_rebuild.py
new file mode 100755
index 0000000..29b31c3
--- /dev/null
+++ b/tests/test_pdf_rebuild.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+
+from PdfFileTransformer import Pdf
+import logging
+
+input_file = "./samples/test1.pdf"
+output_file = "./samples/test1_out.pdf"
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+p = Pdf(input_file)
+f = open(output_file, 'wb')
+f.write(p.get_build_buffer())
+f.close()
diff --git a/tests/test_polyglot_pdfzip.py b/tests/test_polyglot_pdfzip.py
new file mode 100755
index 0000000..f4a3ff2
--- /dev/null
+++ b/tests/test_polyglot_pdfzip.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+
+from PdfFileTransformer import Pdf
+from ZipFileTransformer import Zip
+from PolyglotFile import PolyglotPdfZip
+import logging
+
+input_file_pdf = "./samples/test1.pdf"
+input_file_zip = "./samples/test1.zip"
+output_file = "./samples/test1_out.pdf"
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+p = Pdf(input_file_pdf)
+z = Zip(input_file_zip)
+a = PolyglotPdfZip(p, z)
+a.generate()
+a.write(output_file)
diff --git a/tests/test_rebuild_zip.py b/tests/test_rebuild_zip.py
new file mode 100755
index 0000000..ceb04a1
--- /dev/null
+++ b/tests/test_rebuild_zip.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+
+import tempfile
+
+from ZipFileTransformer import Zip, ZipFile
+
+input_file = "./samples/test1.zip"
+output_file = "./samples/test1_out.zip"
+
+zi = ZipFile(input_file,"r")
+zo = ZipFile(output_file,"w")
+zo.writestr(' ',b'AAAAAAAAAAAAAAAAAAAAAA',0)
+for zipinfo in zi.infolist():
+    zo.writestr(zipinfo, zi.read(zipinfo))
+zi.close()
+zo.close()
\ No newline at end of file
diff --git a/tests/test_zip.py b/tests/test_zip.py
new file mode 100755
index 0000000..81b8f6e
--- /dev/null
+++ b/tests/test_zip.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+sys.path.append("../")
+
+import tempfile
+
+from ZipFileTransformer import Zip
+
+input_file = "./samples/test1.zip"
+output_file = tempfile.mktemp()
+print("Output: " + output_file)
+
+z = Zip(input_file)
+a = bytearray(b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA')
+b = bytearray(b'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB')
+z.add_data_to_file(a, b, False)
+g = open(output_file, "wb")
+g.write(a + z.get_local_file_data() + b + z.get_data_after_central_directory())
+g.close()
diff --git a/truepolyglot b/truepolyglot
new file mode 100755
index 0000000..2ff9269
--- /dev/null
+++ b/truepolyglot
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import logging
+
+from PdfFileTransformer import Pdf
+from ZipFileTransformer import Zip
+from PolyglotFile import PolyglotZipPdf, PolyglotPdfZip, PolyglotSZipPdf
+
+
+def main():
+    description_str = ('Generate a polyglot file.\n\nFormats availables:\n' +
+                       '* pdfzip: Generate a file valid as PDF and ZIP.' +
+                       ' The format is closest to PDF.\n' +
+                       '* zippdf: Generate a file valid as ZIP and PDF.' +
+                       ' The format is closest to ZIP.\n' +
+                       '* szippdf: Generate a file valid as ZIP and PDF.' +
+                       ' The format is strictly a ZIP.' +
+                       ' Archive is modified.')
+    usage_str = '%(prog)s format [options] output-file'
+    epilog_str = 'TruePolyglot v1.3'
+    frm = argparse.RawTextHelpFormatter
+    parser = argparse.ArgumentParser(description=description_str,
+                                     epilog=epilog_str,
+                                     usage=usage_str,
+                                     formatter_class=frm)
+    parser.add_argument('format', nargs='+', choices=["pdfzip",
+                                                      "zippdf",
+                                                      "szippdf"],
+                        help='Output polyglot format')
+    parser.add_argument('--pdffile', dest='pdffile',
+                        help='PDF input file')
+    parser.add_argument('--zipfile', dest='zipfile',
+                        help='ZIP input file')
+    parser.add_argument('--verbose', dest='verbose',
+                        help='Verbosity level  (default: debug)',
+                        default="info",
+                        choices=["none", "error", "info", "debug"])
+    parser.add_argument('output_file', nargs='+',
+                        help='Output polyglot file path')
+
+    args = parser.parse_args()
+
+    formats = ["pdfzip", "zippdf", "szippdf"]
+    if args.format[0] in formats:
+        if args.pdffile is None:
+            parser.error('pdffile is required')
+        if args.zipfile is None:
+            parser.error('zipfile is required')
+
+    if args.verbose == "none":
+        logging.basicConfig(level=logging.CRITICAL)
+    if args.verbose == "error":
+        logging.basicConfig(level=logging.ERROR)
+    if args.verbose == "info":
+        logging.basicConfig(level=logging.INFO)
+    if args.verbose == "debug":
+        logging.basicConfig(level=logging.DEBUG)
+
+    p = Pdf(args.pdffile)
+    z = Zip(args.zipfile)
+    if args.format[0] == "pdfzip":
+        a = PolyglotPdfZip(p, z)
+    if args.format[0] == "zippdf":
+        a = PolyglotZipPdf(p, z)
+    if args.format[0] == "szippdf":
+        a = PolyglotSZipPdf(p, z)
+    a.generate()
+    a.write(args.output_file[0])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/website/css/styles.css b/website/css/styles.css
new file mode 100644
index 0000000..adc9728
--- /dev/null
+++ b/website/css/styles.css
@@ -0,0 +1,63 @@
+html {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+body {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+td {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+th {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+h1 {
+	color: white;
+}
+a:link {
+	color: #47B8C7;
+}
+a:visited {
+	color: #47B8C7;
+}
+a:active {
+	color: #47B8C7;
+}
+table {
+    border-collapse: collapse;
+}
+table, th, td {
+   border: 1px solid white;
+}
+th {
+   background-color: #92D050;
+   color: black;
+}
+th {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+    padding-top: 0.5em;
+    padding-bottom: 0.5em;
+} 
+td {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+    padding-bottom: 0.5em;
+    padding-top: 0.5em;
+    text-align: left;
+}
+
+.font_reduce {
+    font-size: 75%;
+}
+
+.warning {
+    color: #ffb833;
+}
diff --git a/website/css/styles2.css b/website/css/styles2.css
new file mode 100644
index 0000000..56ef7e5
--- /dev/null
+++ b/website/css/styles2.css
@@ -0,0 +1,61 @@
+html {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+body {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+td {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+th {
+	background-color: black;
+	font-family: Consolas,monaco,monospace;
+ 	color: #92D050;
+}
+a:link {
+	color: #47B8C7;
+}
+a:visited {
+	color: #47B8C7;
+}
+a:active {
+	color: #47B8C7;
+}
+table {
+    border-collapse: collapse;
+}
+table, th, td {
+   border: 1px solid white;
+}
+th {
+   background-color: #92D050;
+   color: black;
+}
+th {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+    padding-top: 0.5em;
+    padding-bottom: 0.5em;
+} 
+td {
+    padding-left: 0.5em;
+    padding-right: 0.5em;
+    padding-bottom: 0.5em;
+    padding-top: 0.5em;
+    text-align: left;
+} 
+th a:link {
+    color: black;
+}
+th a:visited {
+    color: black;
+}
+th a:active {
+    color: black;
+}
diff --git a/website/favicon.ico b/website/favicon.ico
new file mode 100644
index 0000000..0ea93ea
Binary files /dev/null and b/website/favicon.ico differ
diff --git a/website/gen_pocs.sh b/website/gen_pocs.sh
new file mode 100755
index 0000000..a62e112
--- /dev/null
+++ b/website/gen_pocs.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+find -type f -name 'polyglot.pdf' -delete
+
+mkdir -p ./samples/pdfzip/poc1/
+../truepolyglot pdfzip --pdffile ./samples/pdfzip/poc1/doc.pdf --zipfile ./samples/pdfzip/poc1/archive.zip ./samples/pdfzip/poc1/polyglot.pdf
+
+mkdir -p ./samples/pdfzip/poc2/
+../truepolyglot pdfzip --pdffile ./samples/pdfzip/poc2/orwell_1984.pdf --zipfile ./samples/pdfzip/poc2/file-FILE5_32.zip ./samples/pdfzip/poc2/polyglot.pdf
+
+mkdir -p ./samples/pdfzip/poc3/
+../truepolyglot pdfzip --pdffile ./samples/pdfzip/poc3/x86asm.pdf --zipfile ./samples/pdfzip/poc3/fasmw17304.zip ./samples/pdfzip/poc3/polyglot.pdf
+
+mkdir -p ./samples/zippdf/poc4/
+../truepolyglot zippdf --pdffile ./samples/zippdf/poc4/doc.pdf --zipfile ./samples/zippdf/poc4/archive.zip ./samples/zippdf/poc4/polyglot.pdf
+
+mkdir -p ./samples/szippdf/poc5/
+../truepolyglot szippdf --pdffile ./samples/szippdf/poc5/electronics.pdf --zipfile ./samples/szippdf/poc5/hello_world.jar ./samples/szippdf/poc5/polyglot.pdf
+
+mkdir -p ./samples/pdfzip/poc6/
+../truepolyglot pdfzip --pdffile ./samples/pdfzip/poc6/hexinator.pdf --zipfile ./samples/pdfzip/poc6/eicar.zip ./samples/pdfzip/poc6/polyglot.pdf
diff --git a/website/index.html b/website/index.html
new file mode 100644
index 0000000..c19a2ab
--- /dev/null
+++ b/website/index.html
@@ -0,0 +1,249 @@
+<!doctype html>
+
+<html lang="fr">
+<head>
+  <meta charset="utf-8">
+
+  <title>TruePolyglot</title>
+  <meta name="description" content="TruePolyglot project website">
+  <meta name="author" content="hackade">
+  <link rel="stylesheet" href="css/styles.css">
+  <link rel="shortcut icon" href="/favicon.ico">
+
+
+</head>
+
+<body>
+    <h1>TruePolyglot</h1>
+Truepolyglot is polyglot file generator project.
+This means that the generated file is composed of several file formats. The same file can be opened as a ZIP file and as a PDF file for example.
+The idea of this project comes from work of <a href="https://github.com/corkami">Ange Albertini</a>, <a href="https://www.alchemistowl.org/pocorgtfo/pocorgtfo07.pdf">International Journal of Proof-of-Concept or Get The Fuck Out</a> and <a href="https://www.troopers.de/wp-content/uploads/2011/04/TR11_Wolf_OMG_PDF.pdf">Julia Wolf</a> that explain how we can build a polyglot file.<br>
+Polyglot file can be fastidious to build, even more if you want to respect correctly file format. That's why I decided to build a tool to generate them.<br>
+My main motivation was the technical challenge.
+<br>
+
+    <h2>Features and changelog</h2>
+    <div class="font_reduce">
+    <table>
+    <tr>
+	<th>Description</th>
+	<th>Version</th>
+    </tr>
+    <tr>
+	<td>Build a polyglot file valid as PDF and ZIP format and that can be opened with 7Zip and Windows Explorer</td>
+	<td>POC</td>
+    </tr>
+    <tr>
+	<td>Add a stream object in PDF part</td>
+	<td>POC</td>
+    </tr>
+    <tr>
+	<td>Polyglot file checked without warning with <a href="https://poppler.freedesktop.org/">pdftocairo</a></td>
+	<td> &gt;= 1.0</td>
+    </tr>
+    <tr>
+	<td>Polyglot file checked without warning with <a href="https://github.com/ANSSI-FR/caradoc">caradoc</a></td>
+	<td> &gt;= 1.0</td>
+    </tr>
+    <tr>
+	<td>Rebuild PDF Xref Table</td>
+	<td>&gt;= 1.0</td>
+    </tr>
+    <tr>
+	<td>Stream object with correct length header value</td>
+	<td>&gt;= 1.0</td>
+    </tr>
+    <tr>
+    <td>Format "zippdf", file without offset after Zip data</td>
+    <td>&gt;= 1.1</td>
+    </tr>
+    <tr>
+    <td>Polyglot file keep original PDF version</td>
+    <td>&gt;= 1.1.1</td>
+    </tr>
+    <tr>
+    <td>Add "szippdf" format without offset before and after Zip data</td>
+    <td>&gt;= 1.2</td>
+    </tr>
+    <tr>
+    <td>Fix /Length stream object value and PDF offset for szippdf format</td>
+    <td>&gt;= 1.2.1</td>
+    </tr>
+    <tr>
+    <td>PDF object numbers reorder after insertion</td>
+    <td>&gt;= 1.3</td>
+    </tr>
+    </table>
+    </div>
+
+    <h2>Polyglot file compatibility</h2>
+    <div class="font_reduce">
+    <table>
+    <tr>
+        <th>Software</th>
+        <th>Formats</th>
+        <th>status</th>
+    </tr>
+    <tr>
+        <td>Acrobat Reader</td>
+        <td>pdfzip, zippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Acrobat Reader</td>
+        <td>szippdf</td>
+        <td><span class="warning">KO</span></td>
+    </tr>
+    <tr>
+        <td>Sumatra PDF</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Edge</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Firefox</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>7zip</td>
+        <td>pdfzip, zippdf</td>
+        <td><span class="warning">OK with warning</span></td>
+    </tr>
+    <tr>
+        <td>7zip</td>
+        <td>szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Explorer Windows</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Info-ZIP (unzip)</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>Evince</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>pdftocairo -pdf</td>
+        <td>pdfzip, zippdf, szippdf</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>caradoc stats</td>
+        <td>pdfzip</td>
+        <td>OK</td>
+    </tr>
+    <tr>
+        <td>java</td>
+        <td>szippdf</td>
+        <td>OK</td>
+    </tr>
+
+    </table>
+    </div>
+
+   
+    <h2>Examples</h2> 
+    <ul>
+        <li><a href="/samples/">Polyglot files repository</a></li>
+    </ul>
+    <div class="font_reduce">
+    <table>
+    <tr>
+        <th>PDF input file</th>
+        <th>Zip input file</th>
+        <th>Format</th>
+        <th>Polyglot</th>
+        <th>Comment</th>
+    </tr>
+    <tr>
+        <td><a href="/samples/pdfzip/poc1/doc.pdf">doc.pdf</a></td>
+        <td><a href="/samples/pdfzip/poc1/archive.zip">archive.zip</a></td>
+        <td>pdfzip</td>
+        <td><a href="/samples/pdfzip/poc1/polyglot.pdf">polyglot.pdf</a></td>
+        <td>PDF/ZIP polyglot - 122 Ko</td>
+    </tr>
+    <tr>
+        <td><a href="/samples/pdfzip/poc2/orwell_1984.pdf">orwell_1984.pdf</a></td>
+        <td><a href="/samples/pdfzip/poc2/file-FILE5_32.zip">file-FILE5_32.zip</a></td>
+        <td>pdfzip</td>
+        <td><a href="/samples/pdfzip/poc2/polyglot.pdf">polyglot.pdf</a></td>
+        <td>PDF/ZIP polyglot - 1.3 Mo</td>
+    </tr>
+    <tr>
+        <td><a href="/samples/pdfzip/poc3/x86asm.pdf">x86asm.pdf</a></td>
+        <td><a href="/samples/pdfzip/poc3/fasmw17304.zip">fasmw17304.zip</a></td>
+        <td>pdfzip</td>
+        <td><a href="/samples/pdfzip/poc3/polyglot.pdf">polyglot.pdf</a></td>
+        <td>PDF/ZIP polyglot - 1.8 Mo</td>
+    </tr>
+    <tr>
+        <td><a href="/samples/zippdf/poc4/doc.pdf">doc.pdf</a></td>
+        <td><a href="/samples/zippdf/poc4/archive.zip">archive.zip</a></td>
+        <td>zippdf</td>
+        <td><a href="/samples/zippdf/poc4/polyglot.pdf">polyglot.pdf</a></td>
+        <td>PDF/ZIP polyglot - 112 Ko</td>
+    </tr>
+    <tr>
+        <td><a href="/samples/szippdf/poc5/electronics.pdf">electronics.pdf</a></td>
+        <td><a href="/samples/szippdf/poc5/hello_world.jar">hello_world.jar</a></td>
+        <td>szippdf</td>
+        <td><a href="/samples/szippdf/poc5/polyglot.pdf">polyglot.pdf</a></td>
+        <td>PDF/JAR polyglot - 778 Ko</td>
+    </tr>
+    <tr>
+        <td><a href="/samples/pdfzip/poc6/hexinator.pdf">hexinator.pdf</a></td>
+        <td><a href="/samples/pdfzip/poc6/eicar.zip">eicar.zip</a>&nbsp;(<a href="https://www.virustotal.com/#/file/2174e17e6b03bb398666c128e6ab0a27d4ad6f7d7922127fe828e07aa94ab79d/detection">scan virustotal.com</a>)</td>
+        <td>pdfzip</td>
+        <td><a href="/samples/pdfzip/poc6/polyglot.pdf">polyglot.pdf</a>&nbsp;(<a href="https://www.virustotal.com/#/file/883d08efc14e0cacc9a260d84fdef285b383cc9a9125366dfb0bf676ddeb0f98/detection">scan virustotal.com</a>)</td>
+        <td>PDF/ZIP polyglot with Eicar test in Zip - 2.9 Mo</td>
+    </tr>
+    </table>
+    </div>
+
+    <h2>Manual</h2>
+<pre>
+usage: truepolyglot format [options] output-file
+
+Generate a polyglot file.
+
+Formats availables:
+* pdfzip: Generate a file valid as PDF and ZIP. The format is closest to PDF.
+* zippdf: Generate a file valid as ZIP and PDF. The format is closest to ZIP.
+* szippdf: Generate a file valid as ZIP and PDF. The format is strictly a ZIP. Archive is modified.
+
+positional arguments:
+  {pdfzip,zippdf,szippdf}
+                        Output polyglot format
+  output_file           Output polyglot file path
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --pdffile PDFFILE     PDF input file
+  --zipfile ZIPFILE     ZIP input file
+  --verbose {none,error,info,debug}
+                        Verbosity level  (default: debug)
+
+TruePolyglot v1.3
+</pre>
+
+    <h2>Code</h2>
+
+<a href="https://git.hackade.org/truepolyglot.git/">Project Git repository</a>
+
+    <h2>Contact</h2>
+On <a href="https://webchat.freenode.net/">IRC Freenode</a> my nickname is hackade or by mail at <a href="mailtp:truepolyglot@hackade.org">truepolyglot@hackade.org</a>.
+
+</body>
+</html>
diff --git a/website/robots.txt b/website/robots.txt
new file mode 100644
index 0000000..77470cb
--- /dev/null
+++ b/website/robots.txt
@@ -0,0 +1,2 @@
+User-agent: *
+Disallow: /
\ No newline at end of file
diff --git a/website/samples/pdfzip/poc1/archive.zip b/website/samples/pdfzip/poc1/archive.zip
new file mode 100644
index 0000000..c1eaa24
Binary files /dev/null and b/website/samples/pdfzip/poc1/archive.zip differ
diff --git a/website/samples/pdfzip/poc1/doc.pdf b/website/samples/pdfzip/poc1/doc.pdf
new file mode 100644
index 0000000..b31c420
Binary files /dev/null and b/website/samples/pdfzip/poc1/doc.pdf differ
diff --git a/website/samples/pdfzip/poc1/polyglot.pdf b/website/samples/pdfzip/poc1/polyglot.pdf
new file mode 100644
index 0000000..5943e1c
Binary files /dev/null and b/website/samples/pdfzip/poc1/polyglot.pdf differ
diff --git a/website/samples/pdfzip/poc2/file-FILE5_32.zip b/website/samples/pdfzip/poc2/file-FILE5_32.zip
new file mode 100644
index 0000000..8a4b897
Binary files /dev/null and b/website/samples/pdfzip/poc2/file-FILE5_32.zip differ
diff --git a/website/samples/pdfzip/poc2/orwell_1984.pdf b/website/samples/pdfzip/poc2/orwell_1984.pdf
new file mode 100644
index 0000000..687d12a
Binary files /dev/null and b/website/samples/pdfzip/poc2/orwell_1984.pdf differ
diff --git a/website/samples/pdfzip/poc2/polyglot.pdf b/website/samples/pdfzip/poc2/polyglot.pdf
new file mode 100644
index 0000000..497a427
Binary files /dev/null and b/website/samples/pdfzip/poc2/polyglot.pdf differ
diff --git a/website/samples/pdfzip/poc3/fasmw17304.zip b/website/samples/pdfzip/poc3/fasmw17304.zip
new file mode 100644
index 0000000..8e96137
Binary files /dev/null and b/website/samples/pdfzip/poc3/fasmw17304.zip differ
diff --git a/website/samples/pdfzip/poc3/polyglot.pdf b/website/samples/pdfzip/poc3/polyglot.pdf
new file mode 100644
index 0000000..a82491b
Binary files /dev/null and b/website/samples/pdfzip/poc3/polyglot.pdf differ
diff --git a/website/samples/pdfzip/poc3/x86asm.pdf b/website/samples/pdfzip/poc3/x86asm.pdf
new file mode 100644
index 0000000..b7b9f4a
Binary files /dev/null and b/website/samples/pdfzip/poc3/x86asm.pdf differ
diff --git a/website/samples/pdfzip/poc6/eicar.zip b/website/samples/pdfzip/poc6/eicar.zip
new file mode 100644
index 0000000..02850ca
Binary files /dev/null and b/website/samples/pdfzip/poc6/eicar.zip differ
diff --git a/website/samples/pdfzip/poc6/hexinator.pdf b/website/samples/pdfzip/poc6/hexinator.pdf
new file mode 100644
index 0000000..0f87b0f
Binary files /dev/null and b/website/samples/pdfzip/poc6/hexinator.pdf differ
diff --git a/website/samples/pdfzip/poc6/polyglot.pdf b/website/samples/pdfzip/poc6/polyglot.pdf
new file mode 100644
index 0000000..7957207
Binary files /dev/null and b/website/samples/pdfzip/poc6/polyglot.pdf differ
diff --git a/website/samples/szippdf/poc5/electronics.pdf b/website/samples/szippdf/poc5/electronics.pdf
new file mode 100644
index 0000000..6582363
Binary files /dev/null and b/website/samples/szippdf/poc5/electronics.pdf differ
diff --git a/website/samples/szippdf/poc5/hello_world.jar b/website/samples/szippdf/poc5/hello_world.jar
new file mode 100644
index 0000000..b875e1c
Binary files /dev/null and b/website/samples/szippdf/poc5/hello_world.jar differ
diff --git a/website/samples/szippdf/poc5/polyglot.pdf b/website/samples/szippdf/poc5/polyglot.pdf
new file mode 100644
index 0000000..7733caf
Binary files /dev/null and b/website/samples/szippdf/poc5/polyglot.pdf differ
diff --git a/website/samples/zippdf/poc4/archive.zip b/website/samples/zippdf/poc4/archive.zip
new file mode 100644
index 0000000..c1eaa24
Binary files /dev/null and b/website/samples/zippdf/poc4/archive.zip differ
diff --git a/website/samples/zippdf/poc4/doc.pdf b/website/samples/zippdf/poc4/doc.pdf
new file mode 100644
index 0000000..b31c420
Binary files /dev/null and b/website/samples/zippdf/poc4/doc.pdf differ
diff --git a/website/samples/zippdf/poc4/polyglot.pdf b/website/samples/zippdf/poc4/polyglot.pdf
new file mode 100644
index 0000000..0993dd4
Binary files /dev/null and b/website/samples/zippdf/poc4/polyglot.pdf differ
diff --git a/website/start_server.sh b/website/start_server.sh
new file mode 100755
index 0000000..0060e39
--- /dev/null
+++ b/website/start_server.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+echo "http://127.0.0.1:8000"
+python -m SimpleHTTPServer 8000
diff --git a/website/update.sh b/website/update.sh
new file mode 100755
index 0000000..ed7f515
--- /dev/null
+++ b/website/update.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+rsync -av --progress ./ -e ssh dragon:/var/www/html/truepolyglot/
-- 
cgit v1.2.3