Source code

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import gzip
import os
import stat
import tarfile
from contextlib import contextmanager
# 2016-01-01T00:00:00+0000
DEFAULT_MTIME = 1451606400
# Python 3.9 contains this change:
# which changes the output of tar creation compared to earlier versions.
# As this code is used to generate tar files that are meant to be deterministic
# across versions of python (specifically, it's used as part of computing the hash
# of docker images, which needs to be identical between CI (which uses python 3.8),
# and developer environments (using arbitrary versions of python, at this point,
# most probably more recent than 3.9)).
# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the
# behavior from python < 3.9.
# Here's how it goes:
# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded
# in the tarinfo is CHRTYPE or BLKTYPE.
# - the value of the type is only compared in the context of choosing which behavior
# to take
# - we replace the type with the same value (so that using the value has no changes)
# but that pretends to be the same as CHRTYPE so that the condition that enables the
# old behavior is taken.
class HackedType(bytes):
def __eq__(self, other):
if other == tarfile.CHRTYPE:
return True
return self == other
class TarInfo(tarfile.TarInfo):
@staticmethod
def _create_header(info, format, encoding, errors):
info["type"] = HackedType(info["type"])
# ignore type checking because it looks like pyright complains because we're calling a
# non-public method
return tarfile.TarInfo._create_header(info, format, encoding, errors) # type: ignore
def create_tar_from_files(fp, files):
"""Create a tar file deterministically.
Receives a dict mapping names of files in the archive to local filesystem
paths or ``mozpack.files.BaseFile`` instances.
The files will be archived and written to the passed file handle opened
for writing.
Only regular files can be written.
FUTURE accept a filename argument (or create APIs to write files)
"""
# The format is explicitly set to tarfile.GNU_FORMAT, because this default format
# has been changed in Python 3.8.
with tarfile.open(
name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT
) as tf:
for archive_path, f in sorted(files.items()):
if isinstance(f, str):
s = os.stat(f)
mode = s.st_mode
size = s.st_size
f = open(f, "rb")
else:
mode = 0o0644
size = len(f.read())
f.seek(0)
ti = TarInfo(archive_path)
ti.mode = mode
ti.type = tarfile.REGTYPE
if not ti.isreg():
raise ValueError(f"not a regular file: {f}")
# Disallow setuid and setgid bits. This is an arbitrary restriction.
# However, since we set uid/gid to root:root, setuid and setgid
# would be a glaring security hole if the archive were
# uncompressed as root.
if ti.mode & (stat.S_ISUID | stat.S_ISGID):
raise ValueError(f"cannot add file with setuid or setgid set: {f}")
# Set uid, gid, username, and group as deterministic values.
ti.uid = 0
ti.gid = 0
ti.uname = ""
ti.gname = ""
# Set mtime to a constant value.
ti.mtime = DEFAULT_MTIME
ti.size = size
# tarfile wants to pass a size argument to read(). So just
# wrap/buffer in a proper file object interface.
tf.addfile(ti, f)
@contextmanager
def gzip_compressor(fp, filename=None, compresslevel=9):
"""Create a deterministic GzipFile writer.
This is a glorified wrapper around ``GzipFile`` that adds some
determinism.
The passed file handle should be opened for writing in binary mode.
"""
# Offset 3-7 in the gzip header contains an mtime. Pin it to a known
# value so output is deterministic.
gf = gzip.GzipFile(
filename=filename or "",
mode="wb",
fileobj=fp,
compresslevel=compresslevel,
mtime=DEFAULT_MTIME,
)
with gf:
yield gf
def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
"""Create a tar.gz file deterministically from files.
This is a glorified wrapper around ``create_tar_from_files`` that
adds gzip compression.
The passed file handle should be opened for writing in binary mode.
When the function returns, all data has been written to the handle.
"""
with gzip_compressor(fp, filename, compresslevel) as gf:
create_tar_from_files(gf, files)