diff options
Diffstat (limited to 'PolyglotFile')
-rw-r--r-- | PolyglotFile/__init__.py | 7 | ||||
-rw-r--r-- | PolyglotFile/polyglotpdfzip.py | 55 | ||||
-rw-r--r-- | PolyglotFile/polyglotszippdf.py | 110 | ||||
-rw-r--r-- | PolyglotFile/polyglotzippdf.py | 37 |
4 files changed, 209 insertions, 0 deletions
diff --git a/PolyglotFile/__init__.py b/PolyglotFile/__init__.py new file mode 100644 index 0000000..4261a1a --- /dev/null +++ b/PolyglotFile/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- + +from .polyglotpdfzip import PolyglotPdfZip +from .polyglotzippdf import PolyglotZipPdf +from .polyglotszippdf import PolyglotSZipPdf + diff --git a/PolyglotFile/polyglotpdfzip.py b/PolyglotFile/polyglotpdfzip.py new file mode 100644 index 0000000..81c3f06 --- /dev/null +++ b/PolyglotFile/polyglotpdfzip.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +import logging + +''' + |-------------------------------| - + |--------- PDF Header ----------K1 | J1 + |-------------------------------| - + |----- PDF OBJ 1 = ZIP Data ----K2 | + |-------------------------------| - + |---- Original PDF Ojbects -----K3 | J2 + |-------------------------------| - + |--- Last OBJ = End Zip Data ---K4 | + |-------------------------------| | + |---------- Xref Table ---------| | + |-------------------------------K5 | + |----------- Trailer -----------| | + |-------------------------------| | +''' + + +class PolyglotPdfZip(): + from PdfFileTransformer import Pdf + from ZipFileTransformer import Zip + + def __init__(self, Pdf, Zip): + self.buffer = bytearray() + self.pdf = Pdf + self.zip = Zip + self.buffer = bytearray() + + def generate(self): + k2_stream = self.zip.buffer[:self.zip.end_of_data] + size_k2_stream = len(k2_stream) + self.pdf.insert_new_obj_stream_at_start(k2_stream) + offset_k2_stream = self.pdf.get_first_stream_offset() + + k4_stream = self.zip.buffer[self.zip.central_dir_file_header:] + size_k4_stream = len(k4_stream) + self.pdf.insert_new_obj_stream_at_end(k4_stream) + offset_k4_stream = self.pdf.get_last_stream_offset() + + pdf_buffer = self.pdf.get_build_buffer() + + j1 = pdf_buffer[0:offset_k2_stream] + j2 = pdf_buffer[offset_k2_stream + size_k2_stream:offset_k4_stream] + self.zip.add_data_to_file(j1, j2, True) + + k5 = pdf_buffer[offset_k4_stream + size_k4_stream:] + self.buffer = self.zip.buffer + k5 + + def write(self, filename): + fd = open(filename, "wb") + fd.write(self.buffer) + fd.close() diff --git a/PolyglotFile/polyglotszippdf.py b/PolyglotFile/polyglotszippdf.py new file mode 100644 index 0000000..0796946 --- /dev/null +++ b/PolyglotFile/polyglotszippdf.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +from .polyglotpdfzip import PolyglotPdfZip +import logging +import tempfile +from ZipFileTransformer import ZipFile +from ZipFileTransformer import Zip +from PdfFileTransformer import Pdf + +''' + |-----------------------------------| - + |--------- ZIP Data[0] = -----------| | + |- PDF Header + PDF Obj[0] Header --| | + |-----------------------------------| | K2 + |------- PDF Obj[0] stream = ------| | + |--------- ZIP Data LF [1:] --------| | + |-----------------------------------| - + |------ Original PDF Ojbects -------| | + |-----------------------------------| | + |------------ Xref Table -----------| | + |-----------------------------------| | J2 + |------------- Trailer -------------| | + |-----------------------------------| - + |---------- End Zip Data -----------| + |-----------------------------------| +''' + + +class PolyglotSZipPdf(PolyglotPdfZip): + + def __init__(self, Pdf, Zip): + super().__init__(Pdf, Zip) + + def get_rebuild_zip_first_part_size(self): + + zo_path = tempfile.mkstemp()[1] + logging.info("use tmp file zip: " + zo_path) + zo = ZipFile(zo_path, 'a') + zi = ZipFile(self.zip.filename, 'r') + for zipinfo in zi.infolist(): + zo.writestr(zipinfo, zi.read(zipinfo)) + zi.close() + zo.close() + + rebuild_zip = Zip(zo_path) + + p = rebuild_zip.end_of_data + k2_stream = rebuild_zip.buffer[:p] + + size_k2_stream = len(k2_stream) + + return size_k2_stream + + def get_pdf_header(self): + return self.pdf.get_file_header() + + def generate_zip_with_pdf_part(self, filename, pdf_data): + + zo = ZipFile(filename, 'a') + zi = ZipFile(self.zip.filename, 'r') + zo.writestr(' ', pdf_data, 0) + for zipinfo in zi.infolist(): + zo.writestr(zipinfo, zi.read(zipinfo)) + zi.close() + zo.close() + + def get_rebuild_pdf(self, zo_path, offset): + ''' + Generate polyglot with final zip. + ''' + new_zip = Zip(zo_path) + new_pdf = Pdf(self.pdf.filename) + + p1 = new_zip.end_of_first_local_file_header + p2 = new_zip.end_of_data + k2_stream = new_zip.buffer[p1:p2] + + size_k2_stream = len(k2_stream) + new_pdf.insert_new_obj_stream_at_start(k2_stream) + k2_stream_offset = new_pdf.get_first_stream_offset() + + new_pdf.file_offset = offset + pdf_buffer = new_pdf.get_build_buffer() + j2 = pdf_buffer[k2_stream_offset + size_k2_stream:] + new_zip.add_data_to_file(b'', j2, True) + + return new_zip.buffer + + def get_pdf_offset(self, zipfile): + + f = open(zipfile, "rb") + data = f.read() + return data.find(b"%PDF") + + def generate(self): + + zip_stream_size = self.get_rebuild_zip_first_part_size() + pdf_header = self.get_pdf_header() + pdf_header = (pdf_header + + b'1 0 obj\n<<\n/Filter /FlateDecode\n/Length ' + + str(zip_stream_size).encode("utf-8") + + b'\n>>\nstream\n') + + filename = tempfile.mkstemp()[1] + logging.info("use tmp file for new zip: " + filename) + self.generate_zip_with_pdf_part(filename, pdf_header) + + pdf_offset = self.get_pdf_offset(filename) + + self.buffer = self.get_rebuild_pdf(filename, pdf_offset) diff --git a/PolyglotFile/polyglotzippdf.py b/PolyglotFile/polyglotzippdf.py new file mode 100644 index 0000000..2493663 --- /dev/null +++ b/PolyglotFile/polyglotzippdf.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +from .polyglotpdfzip import PolyglotPdfZip + + +''' + |-------------------------------| - + |--------- PDF Header ----------K1 | J1 + |-------------------------------| - + |----- PDF OBJ 1 = ZIP Data ----K2 | + |-------------------------------| - + |---- Original PDF Ojbects -----K3 | + |-------------------------------| | + |---------- Xref Table ---------| | + |-------------------------------K4 | J2 + |----------- Trailer -----------| | + |-------------------------------| - + |-------- End Zip Data ---------| | + |-------------------------------| | +''' + + +class PolyglotZipPdf(PolyglotPdfZip): + + def generate(self): + k2_stream = self.zip.buffer[:self.zip.end_of_data] + size_k2_stream = len(k2_stream) + self.pdf.insert_new_obj_stream_at_start(k2_stream) + offset_k2_stream = self.pdf.get_first_stream_offset() + + pdf_buffer = self.pdf.get_build_buffer() + + j1 = pdf_buffer[0:offset_k2_stream] + j2 = pdf_buffer[offset_k2_stream + size_k2_stream:] + + self.zip.add_data_to_file(j1, j2, True) + self.buffer = self.zip.buffer |