Autres langues

Code source de plmapp.utils.archive

import os.path
import tarfile
import itertools
from cStringIO import StringIO
import struct, time, sys
import binascii, stat
from zipfile import ZipInfo, ZIP_STORED, ZIP_DEFLATED, LargeZipFile, ZIP64_LIMIT

try:
    import zlib # We may need its compression method
    crc32 = zlib.crc32
except ImportError:
    zlib = None
    crc32 = binascii.crc32

[docs]def get_available_name(name, exiting_files): """ """ dir_name, file_name = os.path.split(name) file_root, file_ext = os.path.splitext(file_name) # If the filename already exists, add an underscore and a number (before # the file extension, if one exists) to the filename until the generated # filename doesn't exist. count = itertools.count(1) while name in exiting_files: # file_ext includes the dot. name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext)) return name #: True if files are compressed or not according to their extension
ZIP_AUTO = -1 #: formats that are stored uncompressed STORED_FORMATS = set(( "zip", "gz", "bz2", "tgz", "xz", "rar", ".zipx", # archives "png", "gif", "jpg", "jpeg", "svgz", # images "odt", "odf", "ods", "odm", "ott", "odp", "otp", # openDocument "odg", "odf", "docx", "docm", "xlsx", "xlsm", "pptx", "pptm", "dotx", # openXML "flac", "ogg", "mp3", "m4a", "ace", "aac", "m4p", "mpa", # audio "mp2", "ra", "rm", "avi", "dat", "mpeg", "mpg", "mkv", "mov", "ogg", "wmv", # video "flv", "3gp", "aaf", "ram", )) # constants taken from zipfile module ZIP_FILECOUNT_LIMIT = 1 << 16 ZIP_MAX_COMMENT = (1 << 16) - 1 structCentralDir = "<4s4B4HL2L5H2L" stringCentralDir = "PK\001\002" sizeCentralDir = struct.calcsize(structCentralDir) structEndArchive = "<4s4H2LH" stringEndArchive = "PK\005\006" sizeEndCentDir = struct.calcsize(structEndArchive) # The "Zip64 end of central directory" record, magic number, size, and indices # (section V.G in the format document) structEndArchive64 = "<4sQ2H2L4Q" stringEndArchive64 = "PK\x06\x06" sizeEndCentDir64 = struct.calcsize(structEndArchive64) # The "Zip64 end of central directory locator" structure, magic number, and size structEndArchive64Locator = "<4sLQL" stringEndArchive64Locator = "PK\x06\x07" sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
[docs]class IterZipFile: """ A write-only ZipFile that does not write to a file but yields its output. Example:: z = IterZipFile() for buf in z.write(filename, arcname): # do stuff with buf for buf in z.close(): # do stuff with buf The code is mostly based on :class:`zipfile.ZipFile`. :param compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib) or ZIP_AUTO (compression or not according to the filename). :param allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. """ def __init__(self, compression=ZIP_AUTO, allowZip64=False): if compression == ZIP_STORED: pass elif compression in (ZIP_DEFLATED, ZIP_AUTO): if not zlib: raise RuntimeError,\ "Compression requires the (missing) zlib module" else: raise RuntimeError, "That compression method is not supported" self._allowZip64 = allowZip64 self.debug = 0 # Level of printing: 0 through 3 self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression self.mode = key = "w" self.comment = '' self.tell = 0 def _writecheck(self, zinfo): """Check for errors before writing a file to the archive.""" if zinfo.filename in self.NameToInfo: if self.debug: # Warning for duplicate names print "Duplicate name:", zinfo.filename if zinfo.compress_type == ZIP_DEFLATED and not zlib: raise RuntimeError, \ "Compression requires the (missing) zlib module" if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): raise RuntimeError, \ "That compression method is not supported" if zinfo.file_size > ZIP64_LIMIT: if not self._allowZip64: raise LargeZipFile("Filesize would require ZIP64 extensions") if zinfo.header_offset > ZIP64_LIMIT: if not self._allowZip64: raise LargeZipFile("Zipfile size would require ZIP64 extensions")
[docs] def write(self, filename, arcname=None, compress_type=None): """Put the bytes from filename into the archive under the name arcname.""" st = os.stat(filename) isdir = stat.S_ISDIR(st.st_mode) mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] # Create ZipInfo instance to store file information if arcname is None: arcname = filename arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) while arcname[0] in (os.sep, os.altsep): arcname = arcname[1:] if isdir: arcname += '/' zinfo = ZipInfo(arcname, date_time) zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes if self.compression == ZIP_AUTO: ext = os.path.splitext(filename)[1].lower() compression = ZIP_STORED if ext and ext[1:] in STORED_FORMATS \ else ZIP_DEFLATED else: compression = self.compression if compress_type is None: zinfo.compress_type = compression else: zinfo.compress_type = compress_type zinfo.file_size = st.st_size zinfo.flag_bits |= 0x08 zinfo.header_offset = self.tell # Start of header bytes self._writecheck(zinfo) self._didModify = True if isdir: zinfo.file_size = 0 zinfo.compress_size = 0 zinfo.CRC = 0 self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo header = zinfo.FileHeader() yield header self.tell += len(header) return fp = open(filename, "rb") # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 zinfo.file_size = file_size = 0 header = zinfo.FileHeader() yield header self.tell += len(header) if zinfo.compress_type == ZIP_DEFLATED: cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) else: cmpr = None while 1: buf = fp.read(1024 * 8) if not buf: break file_size = file_size + len(buf) CRC = crc32(buf, CRC) & 0xffffffff if cmpr: buf = cmpr.compress(buf) compress_size = compress_size + len(buf) yield buf fp.close() if cmpr: buf = cmpr.flush() compress_size = compress_size + len(buf) yield buf zinfo.compress_size = compress_size else: zinfo.compress_size = file_size self.tell += zinfo.compress_size zinfo.CRC = CRC zinfo.file_size = file_size # write the data descriptor data_descriptor = struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, zinfo.file_size) yield data_descriptor self.tell += len(data_descriptor) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo
[docs] def close(self): """Close the file, and for mode "w" and "a" write the ending records.""" count = 0 pos1 = self.tell for zinfo in self.filelist: # write central directory count = count + 1 dt = zinfo.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) extra = [] if zinfo.file_size > ZIP64_LIMIT \ or zinfo.compress_size > ZIP64_LIMIT: extra.append(zinfo.file_size) extra.append(zinfo.compress_size) file_size = 0xffffffff compress_size = 0xffffffff else: file_size = zinfo.file_size compress_size = zinfo.compress_size if zinfo.header_offset > ZIP64_LIMIT: extra.append(zinfo.header_offset) header_offset = 0xffffffffL else: header_offset = zinfo.header_offset extra_data = zinfo.extra if extra: # Append a ZIP64 field to the extra's extra_data = struct.pack( '<HH' + 'Q'*len(extra), 1, 8*len(extra), *extra) + extra_data extract_version = max(45, zinfo.extract_version) create_version = max(45, zinfo.create_version) else: extract_version = zinfo.extract_version create_version = zinfo.create_version try: filename, flag_bits = zinfo._encodeFilenameFlags() centdir = struct.pack(structCentralDir, stringCentralDir, create_version, zinfo.create_system, extract_version, zinfo.reserved, flag_bits, zinfo.compress_type, dostime, dosdate, zinfo.CRC, compress_size, file_size, len(filename), len(extra_data), len(zinfo.comment), 0, zinfo.internal_attr, zinfo.external_attr, header_offset) except DeprecationWarning: print >>sys.stderr, (structCentralDir, stringCentralDir, create_version, zinfo.create_system, extract_version, zinfo.reserved, zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, zinfo.CRC, compress_size, file_size, len(zinfo.filename), len(extra_data), len(zinfo.comment), 0, zinfo.internal_attr, zinfo.external_attr, header_offset) raise yield centdir yield filename yield extra_data yield zinfo.comment self.tell += len(centdir) + len(filename) + len(extra_data) + len(zinfo.comment) pos2 = self.tell # Write end-of-zip-archive record centDirCount = count centDirSize = pos2 - pos1 centDirOffset = pos1 if (centDirCount >= ZIP_FILECOUNT_LIMIT or centDirOffset > ZIP64_LIMIT or centDirSize > ZIP64_LIMIT): # Need to write the ZIP64 end-of-archive records zip64endrec = struct.pack( structEndArchive64, stringEndArchive64, 44, 45, 45, 0, 0, centDirCount, centDirCount, centDirSize, centDirOffset) yield zip64endrec zip64locrec = struct.pack( structEndArchive64Locator, stringEndArchive64Locator, 0, pos2, 1) yield zip64locrec centDirCount = min(centDirCount, 0xFFFF) centDirSize = min(centDirSize, 0xFFFFFFFF) centDirOffset = min(centDirOffset, 0xFFFFFFFF) # check for valid comment length if len(self.comment) >= ZIP_MAX_COMMENT: if self.debug > 0: msg = 'Archive comment is too long; truncating to %d bytes' \ % ZIP_MAX_COMMENT self.comment = self.comment[:ZIP_MAX_COMMENT] endrec = struct.pack(structEndArchive, stringEndArchive, 0, 0, centDirCount, centDirCount, centDirSize, centDirOffset, len(self.comment)) yield endrec yield self.comment
[docs]def generate_tarfile(files): """ Returns a generator that yields *files* as a tar file. This generator does **not** create temporary files and is designed to not consume too much memory so it can be used to serve efficiently a tar file of large files. :param files: a sequence of class:`.DocumentFile` """ fake_file = StringIO() tf = tarfile.open(mode= "w", fileobj=fake_file) filenames = set() for df in files: # yields the header filename = get_available_name(df.filename, filenames) filenames.add(filename) info = tf.gettarinfo(df.file.path, filename) f, size = df.document.get_leaf_object().get_content_and_size(df) # change the name of the owner info.uname = info.gname = df.document.owner.username info.size = size yield info.tobuf() # yields the content of the file try: s = f.read(512) while s: yield s s = f.read(512) yield s blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE) if remainder > 0: yield (tarfile.NUL * (tarfile.BLOCKSIZE - remainder)) finally: f.close() # yields the nul blocks that mark the end of the tar file yield (tarfile.NUL * tarfile.BLOCKSIZE * 2)
[docs]def generate_zipfile(files): """ Returns a generator that yields *files* as a zip file. This generator does **not** create temporary files and is designed to not consume too much memory so it can be used to serve efficiently a tar file of large files. :param files: a sequence of class:`.DocumentFile` :param compressed: ``True`` if files should be compressed (default: True) """ zf = IterZipFile() filenames = set() for df in files: filename = get_available_name(df.filename, filenames) filenames.add(filename) f, size = df.document.get_leaf_object().get_content_and_size(df) path = f.name try: for s in zf.write(path, filename): yield s finally: f.close() for s in zf.close(): yield s
_generators = { "zip" : generate_zipfile, "tar" : generate_tarfile, } #: List of available archive formats (currently: ``zip`` and ``tar``). ARCHIVE_FORMATS = _generators.keys()
[docs]def generate_archive(files, format): return _generators[format](files)