import os.path
import tarfile
import itertools
from cStringIO import StringIO
import struct, time, sys
import binascii, stat
from zipfile import ZipInfo, ZIP_STORED, ZIP_DEFLATED, LargeZipFile, ZIP64_LIMIT
try:
import zlib # We may need its compression method
crc32 = zlib.crc32
except ImportError:
zlib = None
crc32 = binascii.crc32
[docs]def get_available_name(name, exiting_files):
"""
"""
dir_name, file_name = os.path.split(name)
file_root, file_ext = os.path.splitext(file_name)
# If the filename already exists, add an underscore and a number (before
# the file extension, if one exists) to the filename until the generated
# filename doesn't exist.
count = itertools.count(1)
while name in exiting_files:
# file_ext includes the dot.
name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))
return name
#: True if files are compressed or not according to their extension
ZIP_AUTO = -1
#: formats that are stored uncompressed
STORED_FORMATS = set((
"zip", "gz", "bz2", "tgz", "xz", "rar", ".zipx", # archives
"png", "gif", "jpg", "jpeg", "svgz", # images
"odt", "odf", "ods", "odm", "ott", "odp", "otp", # openDocument
"odg", "odf",
"docx", "docm", "xlsx", "xlsm", "pptx", "pptm", "dotx", # openXML
"flac", "ogg", "mp3", "m4a", "ace", "aac", "m4p", "mpa", # audio
"mp2", "ra", "rm",
"avi", "dat", "mpeg", "mpg", "mkv", "mov", "ogg", "wmv", # video
"flv", "3gp", "aaf", "ram",
))
# constants taken from zipfile module
ZIP_FILECOUNT_LIMIT = 1 << 16
ZIP_MAX_COMMENT = (1 << 16) - 1
structCentralDir = "<4s4B4HL2L5H2L"
stringCentralDir = "PK\001\002"
sizeCentralDir = struct.calcsize(structCentralDir)
structEndArchive = "<4s4H2LH"
stringEndArchive = "PK\005\006"
sizeEndCentDir = struct.calcsize(structEndArchive)
# The "Zip64 end of central directory" record, magic number, size, and indices
# (section V.G in the format document)
structEndArchive64 = "<4sQ2H2L4Q"
stringEndArchive64 = "PK\x06\x06"
sizeEndCentDir64 = struct.calcsize(structEndArchive64)
# The "Zip64 end of central directory locator" structure, magic number, and size
structEndArchive64Locator = "<4sLQL"
stringEndArchive64Locator = "PK\x06\x07"
sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
[docs]class IterZipFile:
""" A write-only ZipFile that does not write to a file but yields
its output.
Example::
z = IterZipFile()
for buf in z.write(filename, arcname):
# do stuff with buf
for buf in z.close():
# do stuff with buf
The code is mostly based on :class:`zipfile.ZipFile`.
:param compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib)
or ZIP_AUTO (compression or not according to the filename).
:param allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
"""
def __init__(self, compression=ZIP_AUTO, allowZip64=False):
if compression == ZIP_STORED:
pass
elif compression in (ZIP_DEFLATED, ZIP_AUTO):
if not zlib:
raise RuntimeError,\
"Compression requires the (missing) zlib module"
else:
raise RuntimeError, "That compression method is not supported"
self._allowZip64 = allowZip64
self.debug = 0 # Level of printing: 0 through 3
self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression
self.mode = key = "w"
self.comment = ''
self.tell = 0
def _writecheck(self, zinfo):
"""Check for errors before writing a file to the archive."""
if zinfo.filename in self.NameToInfo:
if self.debug: # Warning for duplicate names
print "Duplicate name:", zinfo.filename
if zinfo.compress_type == ZIP_DEFLATED and not zlib:
raise RuntimeError, \
"Compression requires the (missing) zlib module"
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \
"That compression method is not supported"
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if zinfo.header_offset > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Zipfile size would require ZIP64 extensions")
[docs] def write(self, filename, arcname=None, compress_type=None):
"""Put the bytes from filename into the archive under the name
arcname."""
st = os.stat(filename)
isdir = stat.S_ISDIR(st.st_mode)
mtime = time.localtime(st.st_mtime)
date_time = mtime[0:6]
# Create ZipInfo instance to store file information
if arcname is None:
arcname = filename
arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
while arcname[0] in (os.sep, os.altsep):
arcname = arcname[1:]
if isdir:
arcname += '/'
zinfo = ZipInfo(arcname, date_time)
zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
if self.compression == ZIP_AUTO:
ext = os.path.splitext(filename)[1].lower()
compression = ZIP_STORED if ext and ext[1:] in STORED_FORMATS \
else ZIP_DEFLATED
else:
compression = self.compression
if compress_type is None:
zinfo.compress_type = compression
else:
zinfo.compress_type = compress_type
zinfo.file_size = st.st_size
zinfo.flag_bits |= 0x08
zinfo.header_offset = self.tell # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
if isdir:
zinfo.file_size = 0
zinfo.compress_size = 0
zinfo.CRC = 0
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
header = zinfo.FileHeader()
yield header
self.tell += len(header)
return
fp = open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0
header = zinfo.FileHeader()
yield header
self.tell += len(header)
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
else:
cmpr = None
while 1:
buf = fp.read(1024 * 8)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
yield buf
fp.close()
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
yield buf
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
self.tell += zinfo.compress_size
zinfo.CRC = CRC
zinfo.file_size = file_size
# write the data descriptor
data_descriptor = struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
zinfo.file_size)
yield data_descriptor
self.tell += len(data_descriptor)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
[docs] def close(self):
"""Close the file, and for mode "w" and "a" write the ending
records."""
count = 0
pos1 = self.tell
for zinfo in self.filelist: # write central directory
count = count + 1
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff
compress_size = 0xffffffff
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffffL
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data
extract_version = max(45, zinfo.extract_version)
create_version = max(45, zinfo.create_version)
else:
extract_version = zinfo.extract_version
create_version = zinfo.create_version
try:
filename, flag_bits = zinfo._encodeFilenameFlags()
centdir = struct.pack(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
except DeprecationWarning:
print >>sys.stderr, (structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
raise
yield centdir
yield filename
yield extra_data
yield zinfo.comment
self.tell += len(centdir) + len(filename) + len(extra_data) + len(zinfo.comment)
pos2 = self.tell
# Write end-of-zip-archive record
centDirCount = count
centDirSize = pos2 - pos1
centDirOffset = pos1
if (centDirCount >= ZIP_FILECOUNT_LIMIT or
centDirOffset > ZIP64_LIMIT or
centDirSize > ZIP64_LIMIT):
# Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset)
yield zip64endrec
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
yield zip64locrec
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
# check for valid comment length
if len(self.comment) >= ZIP_MAX_COMMENT:
if self.debug > 0:
msg = 'Archive comment is too long; truncating to %d bytes' \
% ZIP_MAX_COMMENT
self.comment = self.comment[:ZIP_MAX_COMMENT]
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset, len(self.comment))
yield endrec
yield self.comment
[docs]def generate_tarfile(files):
"""
Returns a generator that yields *files* as a tar file.
This generator does **not** create temporary files and is designed to not
consume too much memory so it can be used to serve efficiently a tar file
of large files.
:param files: a sequence of class:`.DocumentFile`
"""
fake_file = StringIO()
tf = tarfile.open(mode= "w", fileobj=fake_file)
filenames = set()
for df in files:
# yields the header
filename = get_available_name(df.filename, filenames)
filenames.add(filename)
info = tf.gettarinfo(df.file.path, filename)
f, size = df.document.get_leaf_object().get_content_and_size(df)
# change the name of the owner
info.uname = info.gname = df.document.owner.username
info.size = size
yield info.tobuf()
# yields the content of the file
try:
s = f.read(512)
while s:
yield s
s = f.read(512)
yield s
blocks, remainder = divmod(info.size, tarfile.BLOCKSIZE)
if remainder > 0:
yield (tarfile.NUL * (tarfile.BLOCKSIZE - remainder))
finally:
f.close()
# yields the nul blocks that mark the end of the tar file
yield (tarfile.NUL * tarfile.BLOCKSIZE * 2)
[docs]def generate_zipfile(files):
"""
Returns a generator that yields *files* as a zip file.
This generator does **not** create temporary files and is designed to not
consume too much memory so it can be used to serve efficiently a tar file
of large files.
:param files: a sequence of class:`.DocumentFile`
:param compressed: ``True`` if files should be compressed (default: True)
"""
zf = IterZipFile()
filenames = set()
for df in files:
filename = get_available_name(df.filename, filenames)
filenames.add(filename)
f, size = df.document.get_leaf_object().get_content_and_size(df)
path = f.name
try:
for s in zf.write(path, filename):
yield s
finally:
f.close()
for s in zf.close():
yield s
_generators = {
"zip" : generate_zipfile,
"tar" : generate_tarfile,
}
#: List of available archive formats (currently: ``zip`` and ``tar``).
ARCHIVE_FORMATS = _generators.keys()
[docs]def generate_archive(files, format):
return _generators[format](files)