Source code
Revision control
Copy as Markdown
Other Tools
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import print_function
import difflib
import hashlib
import itertools
import json
import os
import sys
import zipfile
from util import build_utils
sys.path.insert(1, os.path.join(build_utils.DIR_SOURCE_ROOT, 'build'))
import print_python_deps
# When set and a difference is detected, a diff of what changed is printed.
PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
# An escape hatch that causes all targets to be rebuilt.
_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
def CallAndWriteDepfileIfStale(on_stale_md5,
options,
record_path=None,
input_paths=None,
input_strings=None,
output_paths=None,
force=False,
pass_changes=False,
track_subpaths_allowlist=None,
depfile_deps=None):
"""Wraps CallAndRecordIfStale() and writes a depfile if applicable.
Depfiles are automatically added to output_paths when present in the |options|
argument. They are then created after |on_stale_md5| is called.
By default, only python dependencies are added to the depfile. If there are
other input paths that are not captured by GN deps, then they should be listed
in depfile_deps. It's important to write paths to the depfile that are already
captured by GN deps since GN args can cause GN deps to change, and such
"""
if not output_paths:
raise Exception('At least one output_path must be specified.')
input_paths = list(input_paths or [])
input_strings = list(input_strings or [])
output_paths = list(output_paths or [])
input_paths += print_python_deps.ComputePythonDependencies()
CallAndRecordIfStale(
on_stale_md5,
record_path=record_path,
input_paths=input_paths,
input_strings=input_strings,
output_paths=output_paths,
force=force,
pass_changes=pass_changes,
track_subpaths_allowlist=track_subpaths_allowlist)
# Write depfile even when inputs have not changed to ensure build correctness
# on bots that build with & without patch, and the patch changes the depfile
# location.
if hasattr(options, 'depfile') and options.depfile:
build_utils.WriteDepfile(options.depfile, output_paths[0], depfile_deps)
def CallAndRecordIfStale(function,
record_path=None,
input_paths=None,
input_strings=None,
output_paths=None,
force=False,
pass_changes=False,
track_subpaths_allowlist=None):
"""Calls function if outputs are stale.
Outputs are considered stale if:
- any output_paths are missing, or
- the contents of any file within input_paths has changed, or
- the contents of input_strings has changed.
To debug which files are out-of-date, set the environment variable:
PRINT_MD5_DIFFS=1
Args:
function: The function to call.
record_path: Path to record metadata.
Defaults to output_paths[0] + '.md5.stamp'
input_paths: List of paths to calcualte an md5 sum on.
input_strings: List of strings to record verbatim.
output_paths: List of output paths.
force: Whether to treat outputs as missing regardless of whether they
actually are.
pass_changes: Whether to pass a Changes instance to |function|.
track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip
files from |input_paths| to make subpath information available for.
"""
assert record_path or output_paths
input_paths = input_paths or []
input_strings = input_strings or []
output_paths = output_paths or []
record_path = record_path or output_paths[0] + '.md5.stamp'
assert record_path.endswith('.stamp'), (
'record paths must end in \'.stamp\' so that they are easy to find '
'and delete')
new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS)
new_metadata.AddStrings(input_strings)
zip_allowlist = set(track_subpaths_allowlist or [])
for path in input_paths:
# It's faster to md5 an entire zip file than it is to just locate & hash
# its central directory (which is what this used to do).
if path in zip_allowlist:
entries = _ExtractZipEntries(path)
new_metadata.AddZipFile(path, entries)
else:
new_metadata.AddFile(path, _ComputeTagForPath(path))
old_metadata = None
force = force or _FORCE_REBUILD
missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
too_new = []
# When outputs are missing, don't bother gathering change information.
if not missing_outputs and os.path.exists(record_path):
record_mtime = os.path.getmtime(record_path)
# Outputs newer than the change information must have been modified outside
# of the build, and should be considered stale.
too_new = [x for x in output_paths if os.path.getmtime(x) > record_mtime]
if not too_new:
with open(record_path, 'r') as jsonfile:
try:
old_metadata = _Metadata.FromFile(jsonfile)
except: # pylint: disable=bare-except
pass # Not yet using new file format.
changes = Changes(old_metadata, new_metadata, force, missing_outputs, too_new)
if not changes.HasChanges():
return
if PRINT_EXPLANATIONS:
print('=' * 80)
print('Target is stale: %s' % record_path)
print(changes.DescribeDifference())
print('=' * 80)
args = (changes,) if pass_changes else ()
function(*args)
with open(record_path, 'w') as f:
new_metadata.ToFile(f)
class Changes(object):
"""Provides and API for querying what changed between runs."""
def __init__(self, old_metadata, new_metadata, force, missing_outputs,
too_new):
self.old_metadata = old_metadata
self.new_metadata = new_metadata
self.force = force
self.missing_outputs = missing_outputs
self.too_new = too_new
def _GetOldTag(self, path, subpath=None):
return self.old_metadata and self.old_metadata.GetTag(path, subpath)
def HasChanges(self):
"""Returns whether any changes exist."""
return (self.HasStringChanges()
or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
def HasStringChanges(self):
"""Returns whether string metadata changed."""
return (self.force or not self.old_metadata
or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5())
def AddedOrModifiedOnly(self):
"""Returns whether the only changes were from added or modified (sub)files.
No missing outputs, no removed paths/subpaths.
"""
if self.HasStringChanges():
return False
if any(self.IterRemovedPaths()):
return False
for path in self.IterModifiedPaths():
if any(self.IterRemovedSubpaths(path)):
return False
return True
def IterAllPaths(self):
"""Generator for paths."""
return self.new_metadata.IterPaths();
def IterAllSubpaths(self, path):
"""Generator for subpaths."""
return self.new_metadata.IterSubpaths(path);
def IterAddedPaths(self):
"""Generator for paths that were added."""
for path in self.new_metadata.IterPaths():
if self._GetOldTag(path) is None:
yield path
def IterAddedSubpaths(self, path):
"""Generator for paths that were added within the given zip file."""
for subpath in self.new_metadata.IterSubpaths(path):
if self._GetOldTag(path, subpath) is None:
yield subpath
def IterRemovedPaths(self):
"""Generator for paths that were removed."""
if self.old_metadata:
for path in self.old_metadata.IterPaths():
if self.new_metadata.GetTag(path) is None:
yield path
def IterRemovedSubpaths(self, path):
"""Generator for paths that were removed within the given zip file."""
if self.old_metadata:
for subpath in self.old_metadata.IterSubpaths(path):
if self.new_metadata.GetTag(path, subpath) is None:
yield subpath
def IterModifiedPaths(self):
"""Generator for paths whose contents have changed."""
for path in self.new_metadata.IterPaths():
old_tag = self._GetOldTag(path)
new_tag = self.new_metadata.GetTag(path)
if old_tag is not None and old_tag != new_tag:
yield path
def IterModifiedSubpaths(self, path):
"""Generator for paths within a zip file whose contents have changed."""
for subpath in self.new_metadata.IterSubpaths(path):
old_tag = self._GetOldTag(path, subpath)
new_tag = self.new_metadata.GetTag(path, subpath)
if old_tag is not None and old_tag != new_tag:
yield subpath
def IterChangedPaths(self):
"""Generator for all changed paths (added/removed/modified)."""
return itertools.chain(self.IterRemovedPaths(),
self.IterModifiedPaths(),
self.IterAddedPaths())
def IterChangedSubpaths(self, path):
"""Generator for paths within a zip that were added/removed/modified."""
return itertools.chain(self.IterRemovedSubpaths(path),
self.IterModifiedSubpaths(path),
self.IterAddedSubpaths(path))
def DescribeDifference(self):
"""Returns a human-readable description of what changed."""
if self.force:
return 'force=True'
elif self.missing_outputs:
return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
elif self.too_new:
return 'Outputs newer than stamp file:\n ' + '\n '.join(self.too_new)
elif self.old_metadata is None:
return 'Previous stamp file not found.'
if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
self.new_metadata.GetStrings())
changed = [s for s in ndiff if not s.startswith(' ')]
return 'Input strings changed:\n ' + '\n '.join(changed)
if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
return "There's no difference."
lines = []
lines.extend('Added: ' + p for p in self.IterAddedPaths())
lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
for path in self.IterModifiedPaths():
lines.append('Modified: ' + path)
lines.extend(' -> Subpath added: ' + p
for p in self.IterAddedSubpaths(path))
lines.extend(' -> Subpath removed: ' + p
for p in self.IterRemovedSubpaths(path))
lines.extend(' -> Subpath modified: ' + p
for p in self.IterModifiedSubpaths(path))
if lines:
return 'Input files changed:\n ' + '\n '.join(lines)
return 'I have no idea what changed (there is a bug).'
class _Metadata(object):
"""Data model for tracking change metadata.
Args:
track_entries: Enables per-file change tracking. Slower, but required for
Changes functionality.
"""
# Schema:
# {
# "files-md5": "VALUE",
# "strings-md5": "VALUE",
# "input-files": [
# {
# "path": "path.jar",
# "tag": "{MD5 of entries}",
# "entries": [
# { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
# ]
# }, {
# "path": "path.txt",
# "tag": "{MD5}",
# }
# ],
# "input-strings": ["a", "b", ...],
# }
def __init__(self, track_entries=False):
self._track_entries = track_entries
self._files_md5 = None
self._strings_md5 = None
self._files = []
self._strings = []
# Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
self._file_map = None
@classmethod
def FromFile(cls, fileobj):
"""Returns a _Metadata initialized from a file object."""
ret = cls()
obj = json.load(fileobj)
ret._files_md5 = obj['files-md5']
ret._strings_md5 = obj['strings-md5']
ret._files = obj.get('input-files', [])
ret._strings = obj.get('input-strings', [])
return ret
def ToFile(self, fileobj):
"""Serializes metadata to the given file object."""
obj = {
'files-md5': self.FilesMd5(),
'strings-md5': self.StringsMd5(),
}
if self._track_entries:
obj['input-files'] = sorted(self._files, key=lambda e: e['path'])
obj['input-strings'] = self._strings
json.dump(obj, fileobj, indent=2)
def _AssertNotQueried(self):
assert self._files_md5 is None
assert self._strings_md5 is None
assert self._file_map is None
def AddStrings(self, values):
self._AssertNotQueried()
self._strings.extend(str(v) for v in values)
def AddFile(self, path, tag):
"""Adds metadata for a non-zip file.
Args:
path: Path to the file.
tag: A short string representative of the file contents.
"""
self._AssertNotQueried()
self._files.append({
'path': path,
'tag': tag,
})
def AddZipFile(self, path, entries):
"""Adds metadata for a zip file.
Args:
path: Path to the file.
entries: List of (subpath, tag) tuples for entries within the zip.
"""
self._AssertNotQueried()
tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
(e[1] for e in entries)))
self._files.append({
'path': path,
'tag': tag,
'entries': [{"path": e[0], "tag": e[1]} for e in entries],
})
def GetStrings(self):
"""Returns the list of input strings."""
return self._strings
def FilesMd5(self):
"""Lazily computes and returns the aggregate md5 of input files."""
if self._files_md5 is None:
# Omit paths from md5 since temporary files have random names.
self._files_md5 = _ComputeInlineMd5(
self.GetTag(p) for p in sorted(self.IterPaths()))
return self._files_md5
def StringsMd5(self):
"""Lazily computes and returns the aggregate md5 of input strings."""
if self._strings_md5 is None:
self._strings_md5 = _ComputeInlineMd5(self._strings)
return self._strings_md5
def _GetEntry(self, path, subpath=None):
"""Returns the JSON entry for the given path / subpath."""
if self._file_map is None:
self._file_map = {}
for entry in self._files:
self._file_map[(entry['path'], None)] = entry
for subentry in entry.get('entries', ()):
self._file_map[(entry['path'], subentry['path'])] = subentry
return self._file_map.get((path, subpath))
def GetTag(self, path, subpath=None):
"""Returns the tag for the given path / subpath."""
ret = self._GetEntry(path, subpath)
return ret and ret['tag']
def IterPaths(self):
"""Returns a generator for all top-level paths."""
return (e['path'] for e in self._files)
def IterSubpaths(self, path):
"""Returns a generator for all subpaths in the given zip.
If the given path is not a zip file or doesn't exist, returns an empty
iterable.
"""
outer_entry = self._GetEntry(path)
if not outer_entry:
return ()
subentries = outer_entry.get('entries', [])
return (entry['path'] for entry in subentries)
def _ComputeTagForPath(path):
stat = os.stat(path)
if stat.st_size > 1 * 1024 * 1024:
# Fallback to mtime for large files so that md5_check does not take too long
# to run.
return stat.st_mtime
md5 = hashlib.md5()
with open(path, 'rb') as f:
md5.update(f.read())
return md5.hexdigest()
def _ComputeInlineMd5(iterable):
"""Computes the md5 of the concatenated parameters."""
md5 = hashlib.md5()
for item in iterable:
md5.update(str(item).encode('ascii'))
return md5.hexdigest()
def _ExtractZipEntries(path):
"""Returns a list of (path, CRC32) of all files within |path|."""
entries = []
with zipfile.ZipFile(path) as zip_file:
for zip_info in zip_file.infolist():
# Skip directories and empty files.
if zip_info.CRC:
entries.append(
(zip_info.filename, zip_info.CRC + zip_info.compress_type))
return entries