fetchlogs.py - mozsearch

Enable keyboard shortcuts

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import argparse

import os

import requests

import urlparse

treeherder_base = "https://treeherder.mozilla.org/"

"""Simple script for downloading structured logs from treeherder.

For the moment this is specialised to work with web-platform-tests

logs; in due course it should move somewhere generic and get hooked

up to mach or similar"""

# Interpretation of the "job" list from

# https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18

def create_parser():

    parser = argparse.ArgumentParser()

    parser.add_argument("branch", action="store", help="Branch on which jobs ran")

    parser.add_argument("commit", action="store", help="Commit hash for push")

    return parser

def download(url, prefix, dest, force_suffix=True):

    if dest is None:

        dest = "."

    if prefix and not force_suffix:

        name = os.path.join(dest, prefix + ".log")

    else:

        name = None

    counter = 0

    while not name or os.path.exists(name):

        counter += 1

        sep = "" if not prefix else "-"

        name = os.path.join(dest, prefix + sep + str(counter) + ".log")

    with open(name, "wb") as f:

        resp = requests.get(url, stream=True)

        for chunk in resp.iter_content(1024):

            f.write(chunk)

def fetch_json(url, params=None):

    headers = {

        "Accept": "application/json",

        "User-Agent": "wpt-fetchlogs",

    response = requests.get(url=url, params=params, headers=headers, timeout=30)

    response.raise_for_status()

    return response.json()

def get_blobber_url(branch, job):

    job_guid = job["job_guid"]

    artifact_url = urlparse.urljoin(treeherder_base, "/api/jobdetail/")

    artifact_params = {

        "job_guid": job_guid,

    job_data = fetch_json(artifact_url, params=artifact_params)

    if job_data:

        try:

            for item in job_data["results"]:

                if item["value"] == "wpt_raw.log" or item["value"] == "log_raw.log":

                    return item["url"]

        except Exception:

            return None

def get_structured_logs(branch, commit, dest=None):

    resultset_url = urlparse.urljoin(

        treeherder_base, "/api/project/%s/resultset/" % branch

    resultset_params = {

        "revision": commit,

    revision_data = fetch_json(resultset_url, params=resultset_params)

    result_set = revision_data["results"][0]["id"]

    jobs_url = urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/" % branch)

    jobs_params = {

        "result_set_id": result_set,

        "count": 2000,

        "exclusion_profile": "false",

    job_data = fetch_json(jobs_url, params=jobs_params)

    tasks = []

    for result in job_data["results"]:

        job_type_name = result["job_type_name"]

        if (

            job_type_name.startswith("W3C Web Platform")

            or job_type_name.startswith("test-")

            and "-web-platform-tests-" in job_type_name

):

            url = get_blobber_url(branch, result)

            if url:

                prefix = result["platform"]  # platform

                tasks.append((url, prefix, None))

    for task in tasks:

        download(*task)

def main():

    parser = create_parser()

    args = parser.parse_args()

    get_structured_logs(args.branch, args.commit)

if __name__ == "__main__":

    main()