From caaaaafeb09165971b49a948bab1ec2189031319 Mon Sep 17 00:00:00 2001 From: ben Date: Sat, 29 Sep 2018 00:12:49 +0200 Subject: Add pdfraw format, v1.5 --- PolyglotFile/__init__.py | 1 + PolyglotFile/polyglotpdfraw.py | 40 ++++++++++++++++++++++++++++++++++++++++ truepolyglot | 36 ++++++++++++++++++++++++++---------- 3 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 PolyglotFile/polyglotpdfraw.py diff --git a/PolyglotFile/__init__.py b/PolyglotFile/__init__.py index 4261a1a..6800028 100644 --- a/PolyglotFile/__init__.py +++ b/PolyglotFile/__init__.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from .polyglotpdfzip import PolyglotPdfZip +from .polyglotpdfraw import PolyglotPdfRaw from .polyglotzippdf import PolyglotZipPdf from .polyglotszippdf import PolyglotSZipPdf diff --git a/PolyglotFile/polyglotpdfraw.py b/PolyglotFile/polyglotpdfraw.py new file mode 100644 index 0000000..c524788 --- /dev/null +++ b/PolyglotFile/polyglotpdfraw.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +import logging + +''' + |-------------------------------| - + |--------- PDF Header ----------K1 | J1 + |-------------------------------| - + |----- PDF OBJ 1 = RAW Data ----K2 | + |-------------------------------| - + |---- Original PDF Ojbects -----K3 | J2 + |-------------------------------| - + |---------- Xref Table ---------| | + |-------------------------------K5 | + |----------- Trailer -----------| | + |-------------------------------| | +''' + + +class PolyglotPdfRaw(): + from PdfFileTransformer import Pdf + + def __init__(self, Pdf, Raw_filename): + self.buffer = bytearray() + self.pdf = Pdf + self.raw_filename = Raw_filename + self.buffer = bytearray() + + def generate(self): + raw_buffer = bytearray() + with open(self.raw_filename, "rb") as f: + raw_buffer = f.read() + k2_stream = raw_buffer + self.pdf.insert_new_obj_stream_at_start(k2_stream) + self.buffer = self.pdf.get_build_buffer() + + def write(self, filename): + fd = open(filename, "wb") + fd.write(self.buffer) + fd.close() diff --git a/truepolyglot b/truepolyglot index 4b4075f..5208e64 100755 --- a/truepolyglot +++ b/truepolyglot @@ -6,7 +6,10 @@ import logging from PdfFileTransformer import Pdf from ZipFileTransformer import Zip -from PolyglotFile import PolyglotZipPdf, PolyglotPdfZip, PolyglotSZipPdf +from PolyglotFile import PolyglotZipPdf +from PolyglotFile import PolyglotPdfZip +from PolyglotFile import PolyglotSZipPdf +from PolyglotFile import PolyglotPdfRaw def main(): @@ -17,9 +20,11 @@ def main(): ' The format is closest to ZIP.\n' + '* szippdf: Generate a file valid as ZIP and PDF.' + ' The format is strictly a ZIP.' + - ' Archive is modified.') + ' Archive is modified.' + '* pdfraw: Generate a file strictly valid as a PDF ' + + ' with a custom first object content.') usage_str = '%(prog)s format [options] output-file' - epilog_str = 'TruePolyglot v1.4.1' + epilog_str = 'TruePolyglot v1.5' frm = argparse.RawTextHelpFormatter parser = argparse.ArgumentParser(description=description_str, epilog=epilog_str, @@ -27,12 +32,15 @@ def main(): formatter_class=frm) parser.add_argument('format', nargs='+', choices=["pdfzip", "zippdf", - "szippdf"], + "szippdf", + "pdfraw"], help='Output polyglot format') parser.add_argument('--pdffile', dest='pdffile', help='PDF input file') parser.add_argument('--zipfile', dest='zipfile', help='ZIP input file') + parser.add_argument('--rawfile', dest='rawfile', + help='RAW input file') parser.add_argument('--acrobat-compatibility', dest='acrobat_compatibility', help='Add a byte at start for Acrobat Reader compatibility with szippdf format', @@ -46,14 +54,14 @@ def main(): args = parser.parse_args() - formats = ["pdfzip", "zippdf", "szippdf"] if args.acrobat_compatibility and args.format[0] != "szippdf": parser.error('--acrobat-compatibility is for szippdf only') - if args.format[0] in formats: - if args.pdffile is None: + if "pdf" in args.format[0] and args.pdffile is None: parser.error('pdffile is required') - if args.zipfile is None: + if "zip" in args.format[0] and args.zipfile is None: parser.error('zipfile is required') + if "raw" in args.format[0] and args.rawfile is None: + parser.error('rawfile is required') if args.verbose == "none": logging.basicConfig(level=logging.CRITICAL) @@ -64,14 +72,22 @@ def main(): if args.verbose == "debug": logging.basicConfig(level=logging.DEBUG) - p = Pdf(args.pdffile) - z = Zip(args.zipfile) if args.format[0] == "pdfzip": + p = Pdf(args.pdffile) + z = Zip(args.zipfile) a = PolyglotPdfZip(p, z) if args.format[0] == "zippdf": + p = Pdf(args.pdffile) + z = Zip(args.zipfile) a = PolyglotZipPdf(p, z) if args.format[0] == "szippdf": + p = Pdf(args.pdffile) + z = Zip(args.zipfile) a = PolyglotSZipPdf(p, z, args.acrobat_compatibility) + if args.format[0] == "pdfraw": + p = Pdf(args.pdffile) + a = PolyglotPdfRaw(p, args.rawfile) + a.generate() a.write(args.output_file[0]) -- cgit v1.2.3