Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
// There are three kinds of samples done by the profiler.
//
// - A "periodic" sample is the most complex kind. It is done in response to a
// timer while the profiler is active. It involves writing a stack trace plus
// a variety of other values (memory measurements, responsiveness
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
// get the register values.
//
// - A "synchronous" sample is a simpler kind. It is done in response to an API
// call (profiler_get_backtrace()). It involves writing a stack trace and
// little else into a temporary ProfileBuffer, and wrapping that up in a
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
// is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the
// register values.
//
// - A "backtrace" sample is the simplest kind. It is done in response to an
// API call (profiler_suspend_and_sample_thread()). It involves getting a
// stack trace via a ProfilerStackCollector; it does not write to a
// ProfileBuffer. The sampling is done from off-thread, and so uses
// SuspendAndSampleAndResumeThread() to get the register values.
#include "platform.h"
#include "GeckoProfiler.h"
#include "GeckoProfilerReporter.h"
#include "PageInformation.h"
#include "PowerCounters.h"
#include "ProfileBuffer.h"
#include "ProfiledThreadData.h"
#include "ProfilerBacktrace.h"
#include "ProfilerChild.h"
#include "ProfilerCodeAddressService.h"
#include "ProfilerControl.h"
#include "ProfilerCPUFreq.h"
#include "ProfilerIOInterposeObserver.h"
#include "ProfilerParent.h"
#include "ProfilerNativeStack.h"
#include "ProfilerStackWalk.h"
#include "ProfilerRustBindings.h"
#include "mozilla/Assertions.h"
#include "mozilla/Likely.h"
#include "mozilla/Maybe.h"
#include "mozilla/MozPromise.h"
#include "mozilla/Perfetto.h"
#include "nsCOMPtr.h"
#include "nsDebug.h"
#include "nsISupports.h"
#include "nsXPCOM.h"
#include "SharedLibraries.h"
#include "VTuneProfiler.h"
#include "ETWTools.h"
#include "js/ProfilingFrameIterator.h"
#include "memory_hooks.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/AutoProfilerLabel.h"
#include "mozilla/BaseAndGeckoProfilerDetail.h"
#include "mozilla/CycleCollectedJSContext.h"
#include "mozilla/ExtensionPolicyService.h"
#include "mozilla/extensions/WebExtensionPolicy.h"
#include "mozilla/glean/GleanMetrics.h"
#include "mozilla/Monitor.h"
#include "mozilla/Preferences.h"
#include "mozilla/Printf.h"
#include "mozilla/ProcInfo.h"
#include "mozilla/ProfilerBufferSize.h"
#include "mozilla/ProfileBufferChunkManagerSingle.h"
#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
#include "mozilla/ProfileChunkedBuffer.h"
#include "mozilla/ProfilerBandwidthCounter.h"
#include "mozilla/SchedulerGroup.h"
#include "mozilla/Services.h"
#include "mozilla/StackWalk.h"
#include "mozilla/Try.h"
#ifdef XP_WIN
# include "mozilla/NativeNt.h"
# include "mozilla/StackWalkThread.h"
# include "mozilla/WindowsStackWalkInitialization.h"
#endif
#include "mozilla/StaticPtr.h"
#include "mozilla/ThreadLocal.h"
#include "mozilla/TimeStamp.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Vector.h"
#include "BaseProfiler.h"
#include "nsDirectoryServiceDefs.h"
#include "nsDirectoryServiceUtils.h"
#include "nsIDocShell.h"
#include "nsIHttpProtocolHandler.h"
#include "nsIObserverService.h"
#include "nsIPropertyBag2.h"
#include "nsIXULAppInfo.h"
#include "nsIXULRuntime.h"
#include "nsJSPrincipals.h"
#include "nsMemoryReporterManager.h"
#include "nsPIDOMWindow.h"
#include "nsProfilerStartParams.h"
#include "nsScriptSecurityManager.h"
#include "nsSystemInfo.h"
#include "nsThreadUtils.h"
#include "nsXULAppAPI.h"
#include "nsDirectoryServiceUtils.h"
#include "Tracing.h"
#include "prdtoa.h"
#include "prtime.h"
#include <algorithm>
#include <errno.h>
#include <fstream>
#include <ostream>
#include <set>
#include <sstream>
#include <string_view>
#include <type_traits>
// To simplify other code in this file, define a helper definition to avoid
// repeating the same preprocessor checks.
// The signals that we use to control the profiler conflict with the signals
// used to control the code coverage tool. Therefore, if coverage is enabled,
// we need to disable our own signal handling mechanisms.
#ifndef MOZ_CODE_COVERAGE
# ifdef XP_WIN
# elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
defined(GP_OS_android) || defined(GP_OS_freebsd)
// Specify the specific platforms that we want to support
# define GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL 1
# else
// No support on this unknown platform!
# endif
#endif
// We need some extra includes if we're supporting async posix signals
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
# include <signal.h>
# include <fcntl.h>
# include <unistd.h>
# include <errno.h>
# include <pthread.h>
#endif
#if defined(GP_OS_android)
# include "JavaExceptions.h"
# include "mozilla/java/GeckoJavaSamplerNatives.h"
# include "mozilla/jni/Refs.h"
#endif
#if defined(XP_MACOSX)
# include "nsCocoaFeatures.h"
#endif
#if defined(GP_PLAT_amd64_darwin)
# include <cpuid.h>
#endif
#if defined(GP_OS_windows)
# include <processthreadsapi.h>
// GetThreadInformation is not available on Windows 7.
WINBASEAPI
BOOL WINAPI GetThreadInformation(
_In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass,
_Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation,
_In_ DWORD ThreadInformationSize);
#endif
// Win32 builds always have frame pointers, so FramePointerStackWalk() always
// works.
#if defined(GP_PLAT_x86_windows)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Win64 builds always omit frame pointers, so we use the slower
// MozStackWalk(), which works in that case.
#if defined(GP_PLAT_amd64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
// MozStackWalk().
#if defined(GP_PLAT_arm64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// Mac builds use FramePointerStackWalk(). Even if we build without
// frame pointers, we'll still get useful stacks in system libraries
// because those always have frame pointers.
// We don't use MozStackWalk() on Mac.
#if defined(GP_OS_darwin)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Android builds use the ARM Exception Handling ABI to unwind.
#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
# define HAVE_NATIVE_UNWIND
# define USE_EHABI_STACKWALK
# include "EHABIStackWalk.h"
#endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
defined(GP_PLAT_arm64_freebsd)
# define HAVE_NATIVE_UNWIND
# define USE_LUL_STACKWALK
# include "lul/LulMain.h"
# include "lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
// (See the comment at the top of the file for a definition of
// periodic/synchronous/backtrace.).
//
// FramePointerStackWalk can produce incomplete stacks when the current entry is
// in a shared library without framepointers, however LUL can take a long time
// to initialize, which is undesirable for consumers of
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
# if defined(MOZ_PROFILING)
# define USE_FRAME_POINTER_STACK_WALK
# endif
#endif
// We can only stackwalk without expensive initialization on platforms which
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
// which can be expensive.
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
# define HAVE_FASTINIT_NATIVE_UNWIND
#endif
#ifdef MOZ_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
#endif
#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
# include <ucontext.h>
#endif
using namespace mozilla;
using namespace mozilla::literals::ProportionValue_literals;
using mozilla::profiler::detail::RacyFeatures;
using ThreadRegistration = mozilla::profiler::ThreadRegistration;
using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo;
using ThreadRegistry = mozilla::profiler::ThreadRegistry;
LazyLogModule gProfilerLog("prof");
ProfileChunkedBuffer& profiler_get_core_buffer() {
// Defer to the Base Profiler in mozglue to create the core buffer if needed,
// and keep a reference here, for quick access in xul.
static ProfileChunkedBuffer& sProfileChunkedBuffer =
baseprofiler::profiler_get_core_buffer();
return sProfileChunkedBuffer;
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// Control character to start the profiler ('g' for "go"!)
static const char sAsyncSignalControlCharStart = 'g';
// Control character to stop the profiler ('s' for "stop"!)
static const char sAsyncSignalControlCharStop = 's';
// This is a file descriptor that is the "write" end of the POSIX pipe that we
// use to start the profiler. It is written to in profiler_start_signal_handler
// and read from in AsyncSignalControlThread
static mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed>
sAsyncSignalControlWriteFd(-1);
// Atomic flag to stop the profiler from within the sampling loop
mozilla::Atomic<bool, mozilla::MemoryOrdering::Relaxed> gStopAndDumpFromSignal(
false);
#endif
// Forward declare the function to call when we need to dump + stop from within
// the async control thread
void profiler_dump_and_stop();
// Forward declare the function to call when we need to start the profiler.
void profiler_start_from_signal();
mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling;
#if defined(GP_OS_android)
class GeckoJavaSampler
: public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
private:
GeckoJavaSampler();
public:
static double GetProfilerTime() {
if (!profiler_is_active()) {
return 0.0;
}
return profiler_time();
};
static void JavaStringArrayToCharArray(jni::ObjectArray::Param& aJavaArray,
Vector<const char*>& aCharArray,
JNIEnv* aJni) {
int arraySize = aJavaArray->Length();
for (int i = 0; i < arraySize; i++) {
jstring javaString =
(jstring)(aJni->GetObjectArrayElement(aJavaArray.Get(), i));
const char* filterString = aJni->GetStringUTFChars(javaString, 0);
// FIXME. These strings are leaked.
MOZ_RELEASE_ASSERT(aCharArray.append(filterString));
}
}
static void StartProfiler(jni::ObjectArray::Param aFiltersArray,
jni::ObjectArray::Param aFeaturesArray) {
JNIEnv* jni = jni::GetEnvForThread();
Vector<const char*> filtersTemp;
Vector<const char*> featureStringArray;
JavaStringArrayToCharArray(aFiltersArray, filtersTemp, jni);
JavaStringArrayToCharArray(aFeaturesArray, featureStringArray, jni);
uint32_t features = 0;
features = ParseFeaturesFromStringArray(featureStringArray.begin(),
featureStringArray.length());
// 128 * 1024 * 1024 is the entries preset that is given in
// devtools/client/performance-new/shared/background.sys.mjs
profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features,
filtersTemp.begin(), filtersTemp.length(), 0, Nothing());
}
static void StopProfiler(jni::Object::Param aGeckoResult) {
auto result = java::GeckoResult::LocalRef(aGeckoResult);
profiler_pause();
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService("@mozilla.org/tools/profiler;1"));
nsProfiler->GetProfileDataAsGzippedArrayBufferAndroid(0)->Then(
GetMainThreadSerialEventTarget(), __func__,
[result](FallibleTArray<uint8_t> compressedProfile) {
result->Complete(jni::ByteArray::New(
reinterpret_cast<const int8_t*>(compressedProfile.Elements()),
compressedProfile.Length()));
// Done with capturing a profile. Stop the profiler.
profiler_stop();
},
[result](nsresult aRv) {
char errorString[9];
sprintf(errorString, "%08x", uint32_t(aRv));
result->CompleteExceptionally(
mozilla::java::sdk::IllegalStateException::New(errorString)
.Cast<jni::Throwable>());
// Failed to capture a profile. Stop the profiler.
profiler_stop();
});
}
};
#endif
constexpr static bool ValidateFeatures() {
int expectedFeatureNumber = 0;
// Feature numbers should start at 0 and increase by 1 each.
#define CHECK_FEATURE(n_, str_, Name_, desc_) \
if ((n_) != expectedFeatureNumber) { \
return false; \
} \
++expectedFeatureNumber;
PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
#undef CHECK_FEATURE
return true;
}
static_assert(ValidateFeatures(), "Feature list is invalid");
// Return all features that are available on this platform.
static uint32_t AvailableFeatures() {
uint32_t features = 0;
#define ADD_FEATURE(n_, str_, Name_, desc_) \
ProfilerFeature::Set##Name_(features);
// Add all the possible features.
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration.
#if !defined(GP_OS_android)
ProfilerFeature::ClearJava(features);
#endif
#if !defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features);
#endif
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (getenv("XPCOM_MEM_BLOAT_LOG")) {
DEBUG_LOG("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
// The memory hooks are available, but the bloat log is enabled, which is
// not compatible with the native allocations tracking. See the comment in
// enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
// more information.
ProfilerFeature::ClearNativeAllocations(features);
}
#else
// The memory hooks are not available.
ProfilerFeature::ClearMemory(features);
ProfilerFeature::ClearNativeAllocations(features);
#endif
#if !defined(GP_OS_windows)
ProfilerFeature::ClearNoTimerResolutionChange(features);
#endif
#if !defined(HAVE_CPU_FREQ_SUPPORT)
ProfilerFeature::ClearCPUFrequency(features);
#endif
return features;
}
// Default features common to all contexts (even if not available).
static constexpr uint32_t DefaultFeatures() {
return ProfilerFeature::Java | ProfilerFeature::JS |
ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
// available).
static constexpr uint32_t StartupExtraDefaultFeatures() {
// Enable file I/Os by default for startup profiles as startup is heavy on
// I/O operations.
return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages;
}
Json::String ToCompactString(const Json::Value& aJsonValue) {
Json::StreamWriterBuilder builder;
// No indentations, and no newlines.
builder["indentation"] = "";
// This removes spaces after colons.
builder["enableYAMLCompatibility"] = false;
// Only 6 digits after the decimal point; timestamps in ms have ns precision.
builder["precision"] = 6;
builder["precisionType"] = "decimal";
return Json::writeString(builder, aJsonValue);
}
MOZ_RUNINIT /* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
ProfilingLog::gMutex;
MOZ_RUNINIT /* static */ mozilla::UniquePtr<Json::Value> ProfilingLog::gLog;
/* static */ void ProfilingLog::Init() {
mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
MOZ_ASSERT(!gLog);
gLog = mozilla::MakeUniqueFallible<Json::Value>(Json::objectValue);
if (gLog) {
(*gLog)[Json::StaticString{"profilingLogBegin" TIMESTAMP_JSON_SUFFIX}] =
ProfilingLog::Timestamp();
}
}
/* static */ void ProfilingLog::Destroy() {
mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
MOZ_ASSERT(gLog);
gLog = nullptr;
}
/* static */ bool ProfilingLog::IsLockedOnCurrentThread() {
return gMutex.IsLockedOnCurrentThread();
}
// RAII class to lock the profiler mutex.
// It provides a mechanism to determine if it is locked or not in order for
// memory hooks to avoid re-entering the profiler locked state.
// Locking order: Profiler, ThreadRegistry, ThreadRegistration.
class MOZ_RAII PSAutoLock {
public:
PSAutoLock()
: mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& {
// In DEBUG builds, *before* we attempt to lock gPSMutex, we want to
// check that the ThreadRegistry, ThreadRegistration, and ProfilingLog
// mutexes are *not* locked on this thread, to avoid inversion
// deadlocks.
MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread());
MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread());
return gPSMutex;
}()) {}
PSAutoLock(const PSAutoLock&) = delete;
void operator=(const PSAutoLock&) = delete;
static bool IsLockedOnCurrentThread() {
return gPSMutex.IsLockedOnCurrentThread();
}
private:
static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex;
mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock;
};
MOZ_RUNINIT /* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
PSAutoLock::gPSMutex{"Gecko Profiler mutex"};
// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
// fields.
typedef const PSAutoLock& PSLockRef;
#define PS_GET(type_, name_) \
static type_ name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return sInstance->m##name_; \
}
#define PS_GET_LOCKLESS(type_, name_) \
static type_ name_() { \
MOZ_ASSERT(sInstance); \
return sInstance->m##name_; \
}
#define PS_GET_AND_SET(type_, name_) \
PS_GET(type_, name_) \
static void Set##name_(PSLockRef, type_ a##name_) { \
MOZ_ASSERT(sInstance); \
sInstance->m##name_ = a##name_; \
}
static constexpr size_t MAX_JS_FRAMES =
mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES;
using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame;
using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer;
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// ASYNC POSIX SIGNAL HANDLING SUPPORT
//
// Integrating POSIX signals
// multi-threaded application such as Firefox can be a tricky proposition.
// Signals are delivered by the operating system to a program, which then
// invokes a signal handler
// flow of control. This handler is responsible for performing operations in
// response to the signal. If there is no "custom" handler defined, then default
// behaviour is triggered, which usually results in a terminated program.
//
// As signal handlers interrupt the normal flow of control, Firefox may not be
// in a safe state while the handler is running (e.g. it may be halfway through
// a garbage collection cycle, or a critical lock may be held by the current
// thread). This is something we must be aware of while writing one, and we are
// additionally limited in terms of which POSIX functions we can call to those
// which are async signal safe
//
// In the context of Firefox, this presents a number of details that we must be
// aware of:
//
// * We are very limited by what we can call when we handle a signal: Many
// functions in Firefox, and in the profiler specifically, allocate memory
// when called. Allocating memory is specifically **not** async-signal-safe,
// and so any functions that allocate should not be called from a signal
// handler.
//
// * We need to be careful with how we communicate to other threads in the
// process. The signal handler runs asynchronously, interrupting the current
// thread of execution. Communication should therefore use atomics or other
// concurrency constructs to ensure that data is read and written correctly.
// We should avoid taking locks, as we may easily deadlock while within the
// signal handler.
//
// * We cannot use the usual Firefox mechanisms for triggering behaviour in
// other threads. For instance, tools such as ``NS_DispatchToMainThread``
// allocate memory when called, which is not allowed within a signal handler.
//
// We solve these constraints by introducing a new thread within the Firefox
// profiler, the AsyncSignalControlThread which is responsible for carrying out
// the actions triggered by a signal handler. We communicate between handlers
// and this thread with the use of a libc pipe
// Writing to a pipe is async-signal-safe, so we can do so from a signal
// handler, and we can set the pipe to be "blocking", meaning that when our
// control thread tries to read it will block at the OS level (consuming no CPU)
// until the handler writes to it. This is in contrast to (e.g.) an atomic
// variable, where our thread would have to "busy wait" for it to be set.
//
// We have one "control" thread per process, and use a single byte for messages
// we send. Writes to pipes are atomic if the size is less than or equal to
// ``PIPE_BUF``, which (although implementation defined) in our case is always
// one, thus trivially atomic.
//
// The control flow for a typical Firefox session in which a user starts and
// stops profiling using POSIX signals therefore looks something like the
// following:
//
// * Profiler initialization.
//
// * The main thread of each process starts the signal control thread, and
// initialises signal handlers for ``SIGUSR1`` and ``SIGSUR2``.
// * The signal control thread sets up pipes for communication, and begins
// reading, blocking itself.
//
// * *After some time...*
// * The user sends ``SIGUSR1`` to Firefox, e.g. using ``kill -s USR1 <firefox
// pid>``
//
// * The profiler_start_signal_handler signal handler for ``SIGUSR1`` is
// triggered by the operating system. This writes the "start" control
// character to the communication pipe and returns.
// * The signal control thread wakes up, as there is now data on the pipe.
// * The control thread recognises the "start" character, and starts the
// profiler with a set of default presets.
// * The control thread loops, and goes back to waiting on the pipe.
//
// * *The user uses Firefox, or waits for it to do something...*
// * The user sends ``SIGUSR2`` to Firefox, e.g. using ``kill -s USR1 <firefox
// pid>``
//
// * The profiler_stop_signal_handler signal handler for ``SIGUSR2`` is
// triggered by the operating system. This writes the "stop" control
// character to the communication pipe and returns.
// * The signal control thread wakes up, as there is now data on the pipe.
// * The control thread recognises the "stop" character, and calls
// profiler_stop_signal_handler to dump the profile to disk.
// * The control thread loops, and goes back to waiting on the pipe.
//
// * *The user can now start another profiling session...*
//
// Forward declare this, so we can call it from the constructor.
static void* AsyncSignalControlThreadEntry(void* aArg);
// Define our platform specific async (posix) signal control thread here.
class AsyncSignalControlThread {
public:
AsyncSignalControlThread() : mThread() {
// Try to open a pipe for this to communicate with. If we can't do this,
// then we give up and return, as there's no point continuing without
// being able to communicate
int pipeFds[2];
if (pipe(pipeFds)) {
LOG("Profiler AsyncSignalControlThread failed to create a pipe.");
return;
}
// Close this pipe on calls to exec().
fcntl(pipeFds[0], F_SETFD, FD_CLOEXEC);
fcntl(pipeFds[1], F_SETFD, FD_CLOEXEC);
// Write the reading side to mFd, and the writing side to the global atomic
mFd = pipeFds[0];
sAsyncSignalControlWriteFd = pipeFds[1];
// We don't really care about stack size, as it should be minimal, so
// leave the pthread attributes as a nullptr, i.e. choose the default.
pthread_attr_t* attr_ptr = nullptr;
if (pthread_create(&mThread, attr_ptr, AsyncSignalControlThreadEntry,
this) != 0) {
MOZ_CRASH("pthread_create failed");
}
};
~AsyncSignalControlThread() {
// Derived from code in nsDumpUtils.cpp. Comment reproduced here for
// poisterity: Close sAsyncSignalControlWriteFd /after/ setting the fd to
// -1. Otherwise we have the (admittedly far-fetched) race where we
//
// 1) close sAsyncSignalControlWriteFd
// 2) open a new fd with the same number as sAsyncSignalControlWriteFd
// had.
// 3) receive a signal, then write to the fd.
int asyncSignalControlWriteFd = sAsyncSignalControlWriteFd.exchange(-1);
// This will unblock the "read" in StartWatching.
close(asyncSignalControlWriteFd);
// Finally, exit the thread.
pthread_join(mThread, nullptr);
};
void Watch() {
char msg[1];
ssize_t nread;
while (true) {
// Try reading from the pipe. This will block until something is written:
nread = read(mFd, msg, sizeof(msg));
if (nread == -1 && errno == EINTR) {
// nread == -1 and errno == EINTR means that `read` was interrupted
// by a signal before reading any data. This is likely because the
// profiling thread interrupted us (with SIGPROF). We can safely ignore
// this and "go around" the loop again to try and read.
continue;
}
if (nread == -1 && errno != EINTR) {
// nread == -1 and errno != EINTR means that `read` has failed in some
// way that we can't recover from. In this case, all we can do is give
// up, and quit the watcher, as the pipe is likely broken.
LOG("Error (%d) when reading in AsyncSignalControlThread", errno);
return;
}
if (nread == 0) {
// nread == 0 signals that the other end of the pipe has been cleanly
// closed. Close our end, and exit the reading loop.
close(mFd);
return;
}
// If we reach here, nread != 0 and nread != -1. This means that we
// should have read at least one byte, which should be a control byte
// for the profiler.
// It *might* happen that `read` is interrupted by the sampler thread
// after successfully reading. If this occurs, read returns the number
// of bytes read. As anything other than 1 is wrong for us, we can
// always assume that we can read whatever `read` read.
MOZ_RELEASE_ASSERT(nread == 1);
if (msg[0] == sAsyncSignalControlCharStart) {
// Check to see if the profiler is already running. This is done within
// `profiler_start` anyway, but if we check sooner we avoid running all
// the other code between now and that check.
if (!profiler_is_active()) {
profiler_start_from_signal();
}
} else if (msg[0] == sAsyncSignalControlCharStop) {
// Check to see whether the profiler is even running before trying to
// stop the profiler. Most other methods of stopping the profiler (i.e.
// those through nsProfiler etc) already know whether or not the
// profiler is running, so don't try and stop it if it's already
// running. Signal-stopping doesn't have this constraint, so we should
// check just in case there is a codepath followed by
// `profiler_dump_and_stop` that breaks if we stop while stopped.
if (profiler_is_active()) {
profiler_dump_and_stop();
}
} else {
LOG("AsyncSignalControlThread recieved unknown control signal: %c",
msg[0]);
}
}
};
private:
// The read side of the pipe that we use to communicate from a signal handler
// to the AsyncSignalControlThread
int mFd;
// The thread handle for the async signal control thread
// Note, that unlike the sampler thread, this is currently a posix-only
// feature. Therefore, we don't bother to have a windows equivalent - we
// just use a pthread_t
pthread_t mThread;
};
static void* AsyncSignalControlThreadEntry(void* aArg) {
NS_SetCurrentThreadName("AsyncSignalControlThread");
auto* thread = static_cast<AsyncSignalControlThread*>(aArg);
thread->Watch();
return nullptr;
}
#endif
// All functions in this file can run on multiple threads unless they have an
// NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is
// valid even when the profiler is not active. Most profile operations can't do
// anything useful when this class is not instantiated, so we release-assert
// its non-nullness in all such operations.
//
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
// locked. This makes it clear when gPSMutex is locked and helps avoid
// accidental unlocked accesses to global state. There are ways to circumvent
// this mechanism, but please don't do so without *very* good reason and a
// detailed explanation.
//
// The exceptions to this rule:
//
// - mProcessStartTime, because it's immutable;
class CorePS {
private:
#ifdef MOZ_PERFETTO
class PerfettoObserver : public perfetto::TrackEventSessionObserver {
public:
PerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(this); }
~PerfettoObserver() { perfetto::TrackEvent::RemoveSessionObserver(this); }
void OnStart(const perfetto::DataSourceBase::StartArgs&) override {
mozilla::profiler::detail::RacyFeatures::SetPerfettoTracingActive();
}
void OnStop(const perfetto::DataSourceBase::StopArgs&) override {
mozilla::profiler::detail::RacyFeatures::SetPerfettoTracingInactive();
}
} perfettoObserver;
#endif
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()),
mMaybeBandwidthCounter(nullptr)
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
,
mAsyncSignalControlThread(nullptr)
#endif
#ifdef USE_LUL_STACKWALK
,
mLul(nullptr)
#endif
{
MOZ_ASSERT(NS_IsMainThread(),
"CorePS must be created from the main thread");
}
~CorePS() {
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
delete mAsyncSignalControlThread;
#endif
#ifdef USE_LUL_STACKWALK
delete sInstance->mLul;
delete mMaybeBandwidthCounter;
#endif
}
public:
static void Create(PSLockRef aLock) {
MOZ_ASSERT(!sInstance);
sInstance = new CorePS();
}
static void Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
delete sInstance;
sInstance = nullptr;
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
// being locked. This is because CorePS is instantiated so early on the main
// thread that we don't have to worry about it being racy.
static bool Exists() { return !!sInstance; }
static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
size_t& aProfSize, size_t& aLulSize) {
MOZ_ASSERT(sInstance);
aProfSize += aMallocSizeOf(sInstance);
aProfSize += ThreadRegistry::SizeOfIncludingThis(aMallocSizeOf);
for (auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it
// is worthwhile:
// - CorePS::mRegisteredPages itself (its elements' children are
// measured above)
#if defined(USE_LUL_STACKWALK)
if (lul::LUL* lulPtr = sInstance->mLul; lulPtr) {
aLulSize += lulPtr->SizeOfIncludingThis(aMallocSizeOf);
}
#endif
}
// No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
PS_GET(JsFrameBuffer&, JsFrames)
PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
static void AppendRegisteredPage(PSLockRef,
RefPtr<PageInformation>&& aRegisteredPage) {
MOZ_ASSERT(sInstance);
struct RegisteredPageComparator {
PageInformation* aA;
bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
};
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) {
if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
// When a BrowsingContext is loaded, the first url loaded in it will be
// about:blank, and if the principal matches, the first document loaded
// in it will share an inner window. That's why we should delete the
// intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
} else {
// Do not register the same page again.
return;
}
}
MOZ_RELEASE_ASSERT(
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
}
static void RemoveRegisteredPage(PSLockRef,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
// Remove RegisteredPage from mRegisteredPages by given inner window ID.
sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
return rd->InnerWindowID() == aRegisteredInnerWindowID;
});
}
static void ClearRegisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mRegisteredPages.clear();
}
PS_GET(const Vector<BaseProfilerCount*>&, Counters)
static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance);
// we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
// we may be called to remove a counter after the profiler is stopped or
// late in shutdown.
if (sInstance) {
auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
#ifdef USE_LUL_STACKWALK
static lul::LUL* Lul() {
MOZ_RELEASE_ASSERT(sInstance);
return sInstance->mLul;
}
static void SetLul(UniquePtr<lul::LUL> aLul) {
MOZ_RELEASE_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(
sInstance->mLul.compareExchange(nullptr, aLul.release()));
}
#endif
PS_GET_AND_SET(const nsACString&, ProcessName)
PS_GET_AND_SET(const nsACString&, ETLDplus1)
#if !defined(XP_WIN)
PS_GET_AND_SET(const Maybe<nsCOMPtr<nsIFile>>&, AsyncSignalDumpDirectory)
#endif
static void SetBandwidthCounter(ProfilerBandwidthCounter* aBandwidthCounter) {
MOZ_ASSERT(sInstance);
sInstance->mMaybeBandwidthCounter = aBandwidthCounter;
}
static ProfilerBandwidthCounter* GetBandwidthCounter() {
MOZ_ASSERT(sInstance);
return sInstance->mMaybeBandwidthCounter;
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
static void SetAsyncSignalControlThread(
AsyncSignalControlThread* aAsyncSignalControlThread) {
MOZ_ASSERT(sInstance);
sInstance->mAsyncSignalControlThread = aAsyncSignalControlThread;
}
#endif
private:
// The singleton instance
static CorePS* sInstance;
// The time that the process started.
const TimeStamp mProcessStartTime;
// Network bandwidth counter for the Bandwidth feature.
ProfilerBandwidthCounter* mMaybeBandwidthCounter;
// Info on all the registered pages.
// InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// Background thread for communicating with async signal handlers
AsyncSignalControlThread* mAsyncSignalControlThread;
#endif
#ifdef USE_LUL_STACKWALK
// LUL's state. Null prior to the first activation, non-null thereafter.
// Owned by this CorePS.
mozilla::Atomic<lul::LUL*> mLul;
#endif
// Process name, provided by child process initialization code.
nsAutoCString mProcessName;
// Private name, provided by child process initialization code (eTLD+1 in
// fission)
nsAutoCString mETLDplus1;
// This memory buffer is used by the MergeStacks mechanism. Previously it was
// stack allocated, but this led to a stack overflow, as it was too much
// memory. Here the buffer can be pre-allocated, and shared with the
// MergeStacks feature as needed. MergeStacks is only run while holding the
// lock, so it is safe to have only one instance allocated for all of the
// threads.
JsFrameBuffer mJsFrames;
// Cached download directory for when we need to dump profiles to disk.
#if !defined(XP_WIN)
Maybe<nsCOMPtr<nsIFile>> mAsyncSignalDumpDirectory;
#endif
};
CorePS* CorePS::sInstance = nullptr;
void locked_profiler_add_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) {
CorePS::AppendCounter(aLock, aCounter);
}
void locked_profiler_remove_sampled_counter(PSLockRef aLock,
BaseProfilerCount* aCounter) {
// Note: we don't enforce a final sample, though we could do so if the
// profiler was active
CorePS::RemoveCounter(aLock, aCounter);
}
class SamplerThread;
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
double aInterval, uint32_t aFeatures);
struct LiveProfiledThreadData {
UniquePtr<ProfiledThreadData> mProfiledThreadData;
};
// This class contains the profiler's global state that is valid only when the
// profiler is active. When not instantiated, the profiler is inactive.
//
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
// CorePS.
//
class ActivePS {
private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
// Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Some features imply others.
if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} else if (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
if (aFeatures & ProfilerFeature::CPUAllThreads) {
aFeatures |= ProfilerFeature::CPUUtilization;
}
if (aFeatures & ProfilerFeature::Tracing) {
aFeatures &= ~ProfilerFeature::CPUUtilization;
aFeatures &= ~ProfilerFeature::Memory;
aFeatures |= ProfilerFeature::NoStackSampling;
aFeatures |= ProfilerFeature::JS;
}
return aFeatures;
}
bool ShouldInterposeIOs() {
return ProfilerFeature::HasMainThreadIO(mFeatures) ||
ProfilerFeature::HasFileIO(mFeatures) ||
ProfilerFeature::HasFileIOAll(mFeatures);
}
ActivePS(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull)
: mProfilingStartTime(aProfilingStartTime),
mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mActiveTabID(aActiveTabID),
mProfileBufferChunkManager(
aChunkManagerOrNull
? std::move(aChunkManagerOrNull)
: MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
size_t(ClampToAllowedEntries(aCapacity.Value())) *
scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value()))),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer();
coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager);
return coreBuffer;
}()),
mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures)
? new ProcessCPUCounter(aLock)
: nullptr),
mMaybePowerCounters(nullptr),
mMaybeCPUFreq(nullptr),
// The new sampler thread doesn't start sampling immediately because the
// main loop within Run() is blocked until this function's caller
// unlocks gPSMutex.
mSamplerThread(
NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
mIsPaused(false),
mIsSamplingPaused(false) {
ProfilingLog::Init();
// Deep copy and lower-case aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount));
for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
mFiltersLowered[i].reserve(mFilters[i].size());
std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
std::back_inserter(mFiltersLowered[i]), ::tolower);
}
#if !defined(RELEASE_OR_BETA)
if (ShouldInterposeIOs()) {
// We need to register the observer on the main thread, because we want
// to observe IO that happens on the main thread.
// IOInterposer needs to be initialized before calling
// IOInterposer::Register or our observer will be silently dropped.
if (NS_IsMainThread()) {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::ActivePS", []() {
// Note: This could theoretically happen after ActivePS gets
// destroyed, but it's ok:
// - The Observer always checks that the profiler is (still)
// active before doing its work.
// - The destruction should happen on the same thread as this
// construction, so the un-registration will also be dispatched
// and queued on the main thread, and run after this.
IOInterposer::Init();
IOInterposer::Register(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
}
#endif
if (ProfilerFeature::HasPower(aFeatures)) {
mMaybePowerCounters = new PowerCounters();
for (const auto& powerCounter : mMaybePowerCounters->GetCounters()) {
locked_profiler_add_sampled_counter(aLock, powerCounter.get());
}
}
if (ProfilerFeature::HasCPUFrequency(aFeatures)) {
mMaybeCPUFreq = new ProfilerCPUFreq();
}
}
~ActivePS() {
MOZ_ASSERT(
!mMaybeProcessCPUCounter,
"mMaybeProcessCPUCounter should have been deleted before ~ActivePS()");
MOZ_ASSERT(
!mMaybePowerCounters,
"mMaybePowerCounters should have been deleted before ~ActivePS()");
MOZ_ASSERT(!mMaybeCPUFreq,
"mMaybeCPUFreq should have been deleted before ~ActivePS()");
#if !defined(RELEASE_OR_BETA)
if (ShouldInterposeIOs()) {
// We need to unregister the observer on the main thread, because that's
// where we've registered it.
if (NS_IsMainThread()) {
IOInterposer::Unregister(IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::~ActivePS", []() {
IOInterposer::Unregister(
IOInterposeObserver::OpAll,
&ProfilerIOInterposeObserver::GetInstance());
}));
}
}
#endif
if (mProfileBufferChunkManager) {
// We still control the chunk manager, remove it from the core buffer.
profiler_get_core_buffer().ResetChunkManager();
}
ProfilingLog::Destroy();
}
bool ThreadSelected(const char* aThreadName) {
if (mFiltersLowered.empty()) {
return true;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (const auto& filter : mFiltersLowered) {
if (filter == "*") {
return true;
}
// Crude, non UTF-8 compatible, case insensitive substring search
if (name.find(filter) != std::string::npos) {
return true;
}
// If the filter is "pid:<my pid>", profile all threads.
if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) {
return true;
}
}
return false;
}
public:
static void Create(
PSLockRef aLock, const TimeStamp& aProfilingStartTime,
PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration,
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull) {
MOZ_ASSERT(!sInstance);
sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval,
aFeatures, aFilters, aFilterCount, aActiveTabID,
aDuration, std::move(aChunkManagerOrNull));
}
[[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
if (sInstance->mMaybeProcessCPUCounter) {
locked_profiler_remove_sampled_counter(
aLock, sInstance->mMaybeProcessCPUCounter);
delete sInstance->mMaybeProcessCPUCounter;
sInstance->mMaybeProcessCPUCounter = nullptr;
}
if (sInstance->mMaybePowerCounters) {
for (const auto& powerCounter :
sInstance->mMaybePowerCounters->GetCounters()) {
locked_profiler_remove_sampled_counter(aLock, powerCounter.get());
}
delete sInstance->mMaybePowerCounters;
sInstance->mMaybePowerCounters = nullptr;
}
if (sInstance->mMaybeCPUFreq) {
delete sInstance->mMaybeCPUFreq;
sInstance->mMaybeCPUFreq = nullptr;
}
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter();
if (counter && counter->IsRegistered()) {
// Because profiler_count_bandwidth_bytes does a racy
// profiler_feature_active check to avoid taking the lock,
// free'ing the memory of the counter would be crashy if the
// socket thread attempts to increment the counter while we are
// stopping the profiler.
// Instead, we keep the counter in CorePS and only mark it as
// unregistered so that the next attempt to count bytes
// will re-register it.
locked_profiler_remove_sampled_counter(aLock, counter);
counter->MarkUnregistered();
}
auto samplerThread = sInstance->mSamplerThread;
delete sInstance;
sInstance = nullptr;
return samplerThread;
}
static bool Exists(PSLockRef) { return !!sInstance; }
static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
const Maybe<double>& aDuration, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID) {
MOZ_ASSERT(sInstance);
if (sInstance->mCapacity != aCapacity ||
sInstance->mDuration != aDuration ||
sInstance->mInterval != aInterval ||
sInstance->mFeatures != aFeatures ||
sInstance->mFilters.length() != aFilterCount ||
sInstance->mActiveTabID != aActiveTabID) {
return false;
}
for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
return false;
}
}
return true;
}
static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
MOZ_ASSERT(sInstance);
size_t n = aMallocSizeOf(sInstance);
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it
// is worthwhile:
// - mLiveProfiledThreads (both the array itself, and the contents)
// - mDeadProfiledThreads (both the array itself, and the contents)
//
return n;
}
static ThreadProfilingFeatures ProfilingFeaturesForThread(
PSLockRef aLock, const ThreadRegistrationInfo& aInfo) {
MOZ_ASSERT(sInstance);
if (sInstance->ThreadSelected(aInfo.Name())) {
// This thread was selected by the user, record everything.
return ThreadProfilingFeatures::Any;
}
ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled;
if (ActivePS::FeatureCPUAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::CPUUtilization);
}
if (ActivePS::FeatureSamplingAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Sampling);
}
if (ActivePS::FeatureMarkersAllThreads(aLock)) {
features = Combine(features, ThreadProfilingFeatures::Markers);
}
return features;
}
[[nodiscard]] static bool AppendPostSamplingCallback(
PSLockRef, PostSamplingCallback&& aCallback);
// Writes out the current active configuration of the profile.
static void WriteActiveConfiguration(
PSLockRef aLock, JSONWriter& aWriter,
const Span<const char>& aPropertyName = MakeStringSpan("")) {
if (!sInstance) {
if (!aPropertyName.empty()) {
aWriter.NullProperty(aPropertyName);
} else {
aWriter.NullElement();
}
return;
};
if (!aPropertyName.empty()) {
aWriter.StartObjectProperty(aPropertyName);
} else {
aWriter.StartObjectElement();
}
{
aWriter.StartArrayProperty("features");
#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \
if (profiler_feature_active(ProfilerFeature::Name_)) { \
aWriter.StringElement(str_); \
}
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
#undef WRITE_ACTIVE_FEATURES
aWriter.EndArray();
}
{
aWriter.StartArrayProperty("threads");
for (const auto& filter : sInstance->mFilters) {
aWriter.StringElement(filter);
}
aWriter.EndArray();
}
{
// Now write all the simple values.
// The interval is also available on profile.meta.interval
aWriter.DoubleProperty("interval", sInstance->mInterval);
aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
if (sInstance->mDuration) {
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
}
// Here, we are converting uint64_t to double. Tab IDs are
// being created using `nsContentUtils::GenerateProcessSpecificId`, which
// is specifically designed to only use 53 of the 64 bits to be lossless
// when passed into and out of JS as a double.
aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
}
aWriter.EndObject();
}
PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime)
PS_GET(uint32_t, Generation)
PS_GET(PowerOfTwo32, Capacity)
PS_GET(Maybe<double>, Duration)
PS_GET(double, Interval)
PS_GET(uint32_t, Features)
PS_GET(uint64_t, ActiveTabID)
#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
static bool Feature##Name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
}
PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
#undef PS_GET_FEATURE
static bool ShouldInstallMemoryHooks(PSLockRef) {
MOZ_ASSERT(sInstance);
return ProfilerFeature::ShouldInstallMemoryHooks(sInstance->mFeatures);
}
static uint32_t JSFlags(PSLockRef aLock) {
uint32_t Flags = 0;
Flags |=
FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
Flags |= FeatureJSAllocations(aLock)
? uint32_t(JSInstrumentationFlags::Allocations)
: 0;
return Flags;
}
PS_GET(const Vector<std::string>&, Filters)
PS_GET(const Vector<std::string>&, FiltersLowered)
// Not using PS_GET, because only the "Controlled" interface of
// `mProfileBufferChunkManager` should be exposed here.
static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
PSLockRef) {
MOZ_ASSERT(sInstance);
MOZ_ASSERT(sInstance->mProfileBufferChunkManager);
return *sInstance->mProfileBufferChunkManager;
}
static void FulfillChunkRequests(PSLockRef) {
MOZ_ASSERT(sInstance);
if (sInstance->mProfileBufferChunkManager) {
sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
}
}
static ProfileBuffer& Buffer(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mProfileBuffer;
}
static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mLiveProfiledThreads;
}
struct ProfiledThreadListElement {
TimeStamp mRegisterTime;
JSContext* mJSContext; // Null for unregistered threads.
ProfiledThreadData* mProfiledThreadData;
};
using ProfiledThreadList = Vector<ProfiledThreadListElement>;
// Returns a ProfiledThreadList with all threads that should be included in a
// profile, both for threads that are still registered, and for threads that
// have been unregistered but still have data in the buffer.
// The returned array is sorted by thread register time.
// Do not hold on to the return value past LockedRegistry.
static ProfiledThreadList ProfiledThreads(
ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ProfiledThreadList array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length()));
for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) {
ProfiledThreadData* profiledThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData(
aLock);
if (!profiledThreadData) {
// This thread was not profiled, continue with the next one.
continue;
}
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
profiledThreadData->Info().RegisterTime(),
lockedThreadData->GetJSContext(), profiledThreadData}));
}
for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
t->Info().RegisterTime(), (JSContext*)nullptr, t.get()}));
}
std::sort(array.begin(), array.end(),
[](const ProfiledThreadListElement& a,
const ProfiledThreadListElement& b) {
return a.mRegisterTime < b.mRegisterTime;
});
return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array;
for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
}
for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
}
// We don't need to sort the pages like threads since we won't show them
// as a list.
return array;
}
static ProfiledThreadData* AddLiveProfiledThread(
PSLockRef, UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
MOZ_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(sInstance->mLiveProfiledThreads.append(
LiveProfiledThreadData{std::move(aProfiledThreadData)}));
// Return a weak pointer to the ProfiledThreadData object.
return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
}
static void UnregisterThread(PSLockRef aLockRef,
ProfiledThreadData* aProfiledThreadData) {
MOZ_ASSERT(sInstance);
DiscardExpiredDeadProfiledThreads(aLockRef);
// Find the right entry in the mLiveProfiledThreads array and remove the
// element, moving the ProfiledThreadData object for the thread into the
// mDeadProfiledThreads array.
for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
if (thread.mProfiledThreadData == aProfiledThreadData) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]);
return;
}
}
}
// This is a counter to collect process CPU utilization during profiling.
// It cannot be a raw `ProfilerCounter` because we need to manually add/remove
// it while the profiler lock is already held.
class ProcessCPUCounter final : public BaseProfilerCount {
public:
explicit ProcessCPUCounter(PSLockRef aLock)
: BaseProfilerCount("processCPU", &mCounter, nullptr, "CPU",
"Process CPU utilization") {
// Adding on construction, so it's ready before the sampler starts.
locked_profiler_add_sampled_counter(aLock, this);
// Note: Removed from ActivePS::Destroy, because a lock is needed.
}
void Add(int64_t aNumber) { mCounter += aNumber; }
private:
ProfilerAtomicSigned mCounter;
};
PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter);
PS_GET(PowerCounters*, MaybePowerCounters);
PS_GET(ProfilerCPUFreq*, MaybeCPUFreq);
PS_GET_AND_SET(bool, IsPaused)
// True if sampling is paused (though generic `SetIsPaused()` or specific
// `SetIsSamplingPaused()`).
static bool IsSamplingPaused(PSLockRef lock) {
MOZ_ASSERT(sInstance);
return IsPaused(lock) || sInstance->mIsSamplingPaused;
}
static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
MOZ_ASSERT(sInstance);
sInstance->mIsSamplingPaused = aIsSamplingPaused;
}
static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead threads that were unregistered before bufferRangeStart.
sInstance->mDeadProfiledThreads.eraseIf(
[bufferRangeStart](
const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
Maybe<uint64_t> bufferPosition =
aProfiledThreadData->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this thread");
return *bufferPosition < bufferRangeStart;
});
}
static void UnregisterPage(PSLockRef aLock,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
auto& registeredPages = CorePS::RegisteredPages(aLock);
for (size_t i = 0; i < registeredPages.length(); i++) {
RefPtr<PageInformation>& page = registeredPages[i];
if (page->InnerWindowID() == aRegisteredInnerWindowID) {
page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(
sInstance->mDeadProfiledPages.append(std::move(page)));
registeredPages.erase(®isteredPages[i--]);
}
}
}
static void DiscardExpiredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead pages that were unregistered before
// bufferRangeStart.
sInstance->mDeadProfiledPages.eraseIf(
[bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
Maybe<uint64_t> bufferPosition =
aProfiledPage->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this page");
return *bufferPosition < bufferRangeStart;
});
}
static void ClearUnregisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mDeadProfiledPages.clear();
}
static void ClearExpiredExitProfiles(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard exit profiles that were gathered before our buffer RangeStart.
// If we have started to overwrite our data from when the Base profile was
// added, we should get rid of that Base profile because it's now older than
// our oldest Gecko profile data.
//
// When adding: (In practice the starting buffer should be empty)
// v Start == End
// | <-- Buffer range, initially empty.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Later, still in range:
// v Start v End
// |=========| <-- Buffer range growing.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Even later, now out of range:
// v Start v End
// |============| <-- Buffer range full and sliding.
// ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
if (sInstance->mBaseProfileThreads &&
sInstance->mGeckoIndexWhenBaseProfileAdded
.ConvertToProfileBufferIndex() <
profiler_get_core_buffer().GetState().mRangeStart) {
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
sInstance->mBaseProfileThreads.get());
sInstance->mBaseProfileThreads.reset();
}
sInstance->mExitProfiles.eraseIf(
[bufferRangeStart](const ExitProfile& aExitProfile) {
return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
});
}
static void AddBaseProfileThreads(PSLockRef aLock,
UniquePtr<char[]> aBaseProfileThreads) {
MOZ_ASSERT(sInstance);
DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
sInstance->mGeckoIndexWhenBaseProfileAdded =
ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
profiler_get_core_buffer().GetState().mRangeEnd);
}
static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
sInstance->mBaseProfileThreads.get());
return std::move(sInstance->mBaseProfileThreads);
}
static void AddExitProfile(PSLockRef aLock, const nsACString& aExitProfile) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(ExitProfile{
nsCString(aExitProfile), sInstance->mProfileBuffer.BufferRangeEnd()}));
}
static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
Vector<nsCString> profiles;
MOZ_RELEASE_ASSERT(
profiles.initCapacity(sInstance->mExitProfiles.length()));
for (auto& profile : sInstance->mExitProfiles) {
MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
}
sInstance->mExitProfiles.clear();
return profiles;
}
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
MOZ_ASSERT(sInstance);
sInstance->mMemoryCounter = aMemoryCounter;
}
static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
MOZ_ASSERT(sInstance);
return sInstance->mMemoryCounter == aMemoryCounter;
}
#endif
private:
// The singleton instance.
static ActivePS* sInstance;
const TimeStamp mProfilingStartTime;
// We need to track activity generations. If we didn't we could have the
// following scenario.
//
// - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
// gPSMutex, deletes the SamplerThread (which does a join).
//
// - profiler_start() runs on a different thread, locks gPSMutex,
// re-instantiates ActivePS, unlocks gPSMutex -- all before the join
// completes.
//
// - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
// and continues as if the start/stop pair didn't occur. Also
// profiler_stop() is stuck, unable to finish.
//
// By checking ActivePS *and* the generation, we can avoid this scenario.
// sNextGeneration is used to track the next generation number; it is static
// because it must persist across different ActivePS instantiations.
const uint32_t mGeneration;
static uint32_t sNextGeneration;
// The maximum number of entries in mProfileBuffer.
const PowerOfTwo32 mCapacity;
// The maximum duration of entries in mProfileBuffer, in seconds.
const Maybe<double> mDuration;
// The interval between samples, measured in milliseconds.
const double mInterval;
// The profile features that are enabled.
const uint32_t mFeatures;
// Substrings of names of threads we want to profile.
Vector<std::string> mFilters;
Vector<std::string> mFiltersLowered;
// ID of the active browser screen's active tab.
// It's being used to determine the profiled tab. It's "0" if we failed to
// get the ID.
const uint64_t mActiveTabID;
// The chunk manager used by `mProfileBuffer` below.
// May become null if it gets transferred ouf of the Gecko Profiler.
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager;
// The buffer into which all samples are recorded.
ProfileBuffer mProfileBuffer;
// ProfiledThreadData objects for any threads that were profiled at any point
// during this run of the profiler:
// - mLiveProfiledThreads contains all threads that are still registered, and
// - mDeadProfiledThreads contains all threads that have already been
// unregistered but for which there is still data in the profile buffer.
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
// Info on all the dead pages.
// Registered pages are being moved to this array after unregistration.
// We are keeping them in case we need them in the profile data.
// We are removing them when we ensure that we won't need them anymore.
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
// Used to collect process CPU utilization values, if the feature is on.
ProcessCPUCounter* mMaybeProcessCPUCounter;
// Used to collect power use data, if the power feature is on.
PowerCounters* mMaybePowerCounters;
// Used to collect cpu frequency, if the CPU frequency feature is on.
ProfilerCPUFreq* mMaybeCPUFreq;
// The current sampler thread. This class is not responsible for destroying
// the SamplerThread object; the Destroy() method returns it so the caller
// can destroy it.
SamplerThread* const mSamplerThread;
// Is the profiler fully paused?
bool mIsPaused;
// Is the profiler periodic sampling paused?
bool mIsSamplingPaused;
// Optional startup profile thread array from BaseProfiler.
UniquePtr<char[]> mBaseProfileThreads;
ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded;
struct ExitProfile {
nsCString mJSON;
uint64_t mBufferPositionAtGatherTime;
};
Vector<ExitProfile> mExitProfiles;
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
Atomic<const BaseProfilerCount*> mMemoryCounter;
#endif
};
ActivePS* ActivePS::sInstance = nullptr;
uint32_t ActivePS::sNextGeneration = 0;
#undef PS_GET
#undef PS_GET_LOCKLESS
#undef PS_GET_AND_SET
using ProfilerStateChangeMutex =
mozilla::baseprofiler::detail::BaseProfilerMutex;
using ProfilerStateChangeLock =
mozilla::baseprofiler::detail::BaseProfilerAutoLock;
MOZ_RUNINIT static ProfilerStateChangeMutex gProfilerStateChangeMutex;
struct IdentifiedProfilingStateChangeCallback {
ProfilingStateSet mProfilingStateSet;
ProfilingStateChangeCallback mProfilingStateChangeCallback;
uintptr_t mUniqueIdentifier;
explicit IdentifiedProfilingStateChangeCallback(
ProfilingStateSet aProfilingStateSet,
ProfilingStateChangeCallback&& aProfilingStateChangeCallback,
uintptr_t aUniqueIdentifier)
: mProfilingStateSet(aProfilingStateSet),
mProfilingStateChangeCallback(aProfilingStateChangeCallback),
mUniqueIdentifier(aUniqueIdentifier) {}
};
using IdentifiedProfilingStateChangeCallbackUPtr =
UniquePtr<IdentifiedProfilingStateChangeCallback>;
MOZ_RUNINIT static Vector<IdentifiedProfilingStateChangeCallbackUPtr>
mIdentifiedProfilingStateChangeCallbacks;
void profiler_add_state_change_callback(
ProfilingStateSet aProfilingStateSet,
ProfilingStateChangeCallback&& aCallback,
uintptr_t aUniqueIdentifier /* = 0 */) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
#ifdef DEBUG
// Check if a non-zero id is not already used. Bug forgive it in non-DEBUG
// builds; in the worst case they may get removed too early.
if (aUniqueIdentifier != 0) {
for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
mIdentifiedProfilingStateChangeCallbacks) {
MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
}
}
#endif // DEBUG
if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
profiler_is_active()) {
aCallback(ProfilingState::AlreadyActive);
}
(void)mIdentifiedProfilingStateChangeCallbacks.append(
MakeUnique<IdentifiedProfilingStateChangeCallback>(
aProfilingStateSet, std::move(aCallback), aUniqueIdentifier));
}
// Remove the callback with the given identifier.
void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
MOZ_ASSERT(aUniqueIdentifier != 0);
if (aUniqueIdentifier == 0) {
// Forgive zero in non-DEBUG builds.
return;
}
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
mIdentifiedProfilingStateChangeCallbacks.eraseIf(
[aUniqueIdentifier](
const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) {
if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) {
return false;
}
if (aIdedCallback->mProfilingStateSet.contains(
ProfilingState::RemovingCallback)) {
aIdedCallback->mProfilingStateChangeCallback(
ProfilingState::RemovingCallback);
}
return true;
});
}
static void invoke_profiler_state_change_callbacks(
ProfilingState aProfilingState) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
mIdentifiedProfilingStateChangeCallbacks) {
if (idedCallback->mProfilingStateSet.contains(aProfilingState)) {
idedCallback->mProfilingStateChangeCallback(aProfilingState);
}
}
}
Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
// The name of the main thread.
static const char* const kMainThreadName = "GeckoMain";
////////////////////////////////////////////////////////////////////////
// BEGIN sampling/unwinding code
// Additional registers that have to be saved when thread is paused.
#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \
defined(GP_ARCH_x86)
# define UNWINDING_REGS_HAVE_ECX_EDX
#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
defined(GP_PLAT_amd64_freebsd) || defined(GP_ARCH_amd64) || \
defined(__x86_64__)
# define UNWINDING_REGS_HAVE_R10_R12
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
# define UNWINDING_REGS_HAVE_LR_R7
#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
defined(GP_PLAT_arm64_freebsd) || defined(GP_ARCH_arm64) || \
defined(__aarch64__)
# define UNWINDING_REGS_HAVE_LR_R11
#endif
// The registers used for stack unwinding and a few other sampling purposes.
// The ctor does nothing; users are responsible for filling in the fields.
class Registers {
public:
Registers()
: mPC{nullptr},
mSP{nullptr},
mFP{nullptr}
#if defined(UNWINDING_REGS_HAVE_ECX_EDX)
,
mEcx{nullptr},
mEdx{nullptr}
#elif defined(UNWINDING_REGS_HAVE_R10_R12)
,
mR10{nullptr},
mR12{nullptr}
#elif defined(UNWINDING_REGS_HAVE_LR_R7)
,
mLR{nullptr},
mR7{nullptr}
#elif defined(UNWINDING_REGS_HAVE_LR_R11)
,
mLR{nullptr},
mR11{nullptr}
#endif
{
}
void Clear() { memset(this, 0, sizeof(*this)); }
// These fields are filled in by
// Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
// samples, and by REGISTERS_SYNC_POPULATE for synchronous samples.
Address mPC; // Instruction pointer.
Address mSP; // Stack pointer.
Address mFP; // Frame pointer.
#if defined(UNWINDING_REGS_HAVE_ECX_EDX)
Address mEcx; // Temp for return address.
Address mEdx; // Temp for frame pointer.
#elif defined(UNWINDING_REGS_HAVE_R10_R12)
Address mR10; // Temp for return address.
Address mR12; // Temp for frame pointer.
#elif defined(UNWINDING_REGS_HAVE_LR_R7)
Address mLR; // ARM link register, or temp for return address.
Address mR7; // Temp for frame pointer.
#elif defined(UNWINDING_REGS_HAVE_LR_R11)
Address mLR; // ARM link register, or temp for return address.
Address mR11; // Temp for frame pointer.
#endif
#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
// This contains all the registers, which means it duplicates the four fields
// above. This is ok.
ucontext_t* mContext; // The context from the signal handler or below.
ucontext_t mContextSyncStorage; // Storage for sync stack unwinding.
#endif
};
Atomic<bool> WALKING_JS_STACK(false);
struct AutoWalkJSStack {
bool walkAllowed;
AutoWalkJSStack() : walkAllowed(false) {
walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
}
~AutoWalkJSStack() {
if (walkAllowed) {
WALKING_JS_STACK = false;
}
}
};
class StackWalkControl {
public:
struct ResumePoint {
// If lost, the stack walker should resume at these values.
void* resumeSp; // If null, stop the walker here, don't resume again.
void* resumeBp;
void* resumePc;
};
#if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \
defined(GP_ARCH_amd64))
public:
static constexpr bool scIsSupported = true;
void Clear() { mResumePointCount = 0; }
size_t ResumePointCount() const { return mResumePointCount; }
static constexpr size_t MaxResumePointCount() {
return scMaxResumePointCount;
}
// Add a resume point. Note that adding anything past MaxResumePointCount()
// would silently fail. In practice this means that stack walking may still
// lose native frames.
void AddResumePoint(ResumePoint&& aResumePoint) {
// If SP is null, we expect BP and PC to also be null.
MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp);
MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc);
// If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to
// be null even if SP is not null.)
MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp);
MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp);
if (mResumePointCount < scMaxResumePointCount) {
mResumePoint[mResumePointCount] = std::move(aResumePoint);
++mResumePointCount;
}
}
// Only allow non-modifying range-for loops.
const ResumePoint* begin() const { return &mResumePoint[0]; }
const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; }
// Find the next resume point that would be a caller of the function with the
// given SP; i.e., the resume point with the closest resumeSp > aSp.
const ResumePoint* GetResumePointCallingSp(void* aSp) const {
const ResumePoint* callingResumePoint = nullptr;
for (const ResumePoint& resumePoint : *this) {
if (resumePoint.resumeSp && // This is a potential resume point.
resumePoint.resumeSp > aSp && // It is a caller of the given SP.
(!callingResumePoint || // This is the first candidate.
resumePoint.resumeSp < callingResumePoint->resumeSp) // Or better.
) {
callingResumePoint = &resumePoint;
}
}
return callingResumePoint;
}
private:
size_t mResumePointCount = 0;
static constexpr size_t scMaxResumePointCount = 32;
ResumePoint mResumePoint[scMaxResumePointCount];
#else
public:
static constexpr bool scIsSupported = false;
// Discarded constexpr-if statements are still checked during compilation,
// these declarations are necessary for that, even if not actually used.
void Clear();
size_t ResumePointCount();
static constexpr size_t MaxResumePointCount();
void AddResumePoint(ResumePoint&& aResumePoint);
const ResumePoint* begin() const;
const ResumePoint* end() const;
const ResumePoint* GetResumePointCallingSp(void* aSp) const;
#endif
};
// Make a copy of the JS stack into a JSFrame array, and return the number of
// copied frames.
// This copy is necessary since, like the native stack, the JS stack is iterated
// youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks.
static uint32_t ExtractJsFrames(
bool aIsSynchronous,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, ProfilerStackCollector& aCollector,
JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
MOZ_ASSERT(aJsFrames,
"ExtractJsFrames should only be called if there is a "
"JsFrameBuffer to fill.");
uint32_t jsFramesCount = 0;
// Only walk jit stack if profiling frame iterator is turned on.
JSContext* context = aThreadData.GetJSContext();
if (context && JS::IsProfilingEnabledForContext(context)) {
AutoWalkJSStack autoWalkJSStack;
if (autoWalkJSStack.walkAllowed) {
JS::ProfilingFrameIterator::RegisterState registerState;
registerState.pc = aRegs.mPC;
registerState.sp = aRegs.mSP;
registerState.fp = aRegs.mFP;
#if defined(UNWINDING_REGS_HAVE_ECX_EDX)
registerState.tempRA = aRegs.mEcx;
registerState.tempFP = aRegs.mEdx;
#elif defined(UNWINDING_REGS_HAVE_R10_R12)
registerState.tempRA = aRegs.mR10;
registerState.tempFP = aRegs.mR12;
#elif defined(UNWINDING_REGS_HAVE_LR_R7)
registerState.lr = aRegs.mLR;
registerState.tempFP = aRegs.mR7;
#elif defined(UNWINDING_REGS_HAVE_LR_R11)
registerState.lr = aRegs.mLR;
registerState.tempFP = aRegs.mR11;
#endif
// Non-periodic sampling passes Nothing() as the buffer write position to
// ProfilingFrameIterator to avoid incorrectly resetting the buffer
// position of sampled JIT frames inside the JS engine.
Maybe<uint64_t> samplePosInBuffer;
if (!aIsSynchronous) {
// aCollector.SamplePositionInBuffer() will return Nothing() when
// profiler_suspend_and_sample_thread is called from the background hang
// reporter.
samplePosInBuffer = aCollector.SamplePositionInBuffer();
}
for (JS::ProfilingFrameIterator jsIter(context, registerState,
samplePosInBuffer);
!jsIter.done(); ++jsIter) {
if (aIsSynchronous || jsIter.isWasm()) {
jsFramesCount +=
jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES);
if (jsFramesCount == MAX_JS_FRAMES) {
break;
}
} else {
Maybe<JS::ProfilingFrameIterator::Frame> frame =
jsIter.getPhysicalFrameWithoutLabel();
if (frame.isSome()) {
aJsFrames[jsFramesCount++] = std::move(frame).ref();
if (jsFramesCount == MAX_JS_FRAMES) {
break;
}
}
}
if constexpr (StackWalkControl::scIsSupported) {
if (aStackWalkControlIfSupported) {
jsIter.getCppEntryRegisters().apply(
[&](const JS::ProfilingFrameIterator::RegisterState&
aCppEntry) {
StackWalkControl::ResumePoint resumePoint;
resumePoint.resumeSp = aCppEntry.sp;
resumePoint.resumeBp = aCppEntry.fp;
resumePoint.resumePc = aCppEntry.pc;
aStackWalkControlIfSupported->AddResumePoint(
std::move(resumePoint));
});
}
} else {
MOZ_ASSERT(!aStackWalkControlIfSupported,
"aStackWalkControlIfSupported should be null when "
"!StackWalkControl::scIsSupported");
(void)aStackWalkControlIfSupported;
}
}
}
}
return jsFramesCount;
}
// Merges the profiling stack, native stack, and JS stack, outputting the
// details to aCollector.
static void MergeStacks(
bool aIsSynchronous,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const NativeStack& aNativeStack, ProfilerStackCollector& aCollector,
JsFrame* aJsFrames, uint32_t aJsFramesCount) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0);
const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef();
const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
uint32_t profilingStackFrameCount = profilingStack.stackSize();
// While the profiling stack array is ordered oldest-to-youngest, the JS and
// native arrays are ordered youngest-to-oldest. We must add frames to aInfo
// oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
// and native arrays backwards. Note: this means the terminating condition
// jsIndex and nativeIndex is being < 0.
uint32_t profilingStackIndex = 0;
int32_t jsIndex = aJsFramesCount - 1;
int32_t nativeIndex = aNativeStack.mCount - 1;
uint8_t* lastLabelFrameStackAddr = nullptr;
uint8_t* jitEndStackAddr = nullptr;
// Iterate as long as there is at least one frame remaining.
while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
nativeIndex >= 0) {
// There are 1 to 3 frames available. Find and add the oldest.
uint8_t* profilingStackAddr = nullptr;
uint8_t* jsStackAddr = nullptr;
uint8_t* nativeStackAddr = nullptr;
uint8_t* jsActivationAddr = nullptr;
if (profilingStackIndex != profilingStackFrameCount) {
const js::ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
if (profilingStackFrame.isLabelFrame() ||
profilingStackFrame.isSpMarkerFrame()) {
lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
}
// Skip any JS_OSR frames. Such frames are used when the JS interpreter
// enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
// To avoid both the profiling stack frame and jit frame being recorded
// (and showing up twice), the interpreter marks the interpreter
// profiling stack frame as JS_OSR to ensure that it doesn't get counted.
if (profilingStackFrame.isOSRFrame()) {
profilingStackIndex++;
continue;
}
MOZ_ASSERT(lastLabelFrameStackAddr);
profilingStackAddr = lastLabelFrameStackAddr;
}
if (jsIndex >= 0) {
jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress;
jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation;
}
if (nativeIndex >= 0) {
nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
}
// If there's a native stack frame which has the same SP as a profiling
// stack frame, pretend we didn't see the native stack frame. Ditto for a
// native stack frame which has the same SP as a JS stack frame. In effect
// this means profiling stack frames or JS frames trump conflicting native
// frames.
if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
jsStackAddr == nativeStackAddr)) {
nativeStackAddr = nullptr;
nativeIndex--;
MOZ_ASSERT(profilingStackAddr || jsStackAddr);
}
// Sanity checks.
MOZ_ASSERT_IF(profilingStackAddr,
profilingStackAddr != jsStackAddr &&
profilingStackAddr != nativeStackAddr);
MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
jsStackAddr != nativeStackAddr);
MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
nativeStackAddr != jsStackAddr);
// Check to see if profiling stack frame is top-most.
if (profilingStackAddr > jsStackAddr &&
profilingStackAddr > nativeStackAddr) {
MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
const js::ProfilingStackFrame& profilingStackFrame =
profilingStackFrames[profilingStackIndex];
// Sp marker frames are just annotations and should not be recorded in
// the profile.
if (!profilingStackFrame.isSpMarkerFrame()) {
// The JIT only allows the top-most frame to have a nullptr pc.
MOZ_ASSERT_IF(
profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
!profilingStackFrame.pc(),
&profilingStackFrame ==
&profilingStack.frames[profilingStack.stackSize() - 1]);
if (aIsSynchronous && profilingStackFrame.categoryPair() ==
JS::ProfilingCategoryPair::PROFILER) {
// For stacks captured synchronously (ie. marker stacks), stop
// walking the stack as soon as we enter the profiler category,
// to avoid showing profiler internal code in marker stacks.
return;
}
aCollector.CollectProfilingStackFrame(profilingStackFrame);
}
profilingStackIndex++;
continue;
}
// Check to see if JS jit stack frame is top-most
if (jsStackAddr > nativeStackAddr) {
MOZ_ASSERT(jsIndex >= 0);
const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
// Stringifying non-wasm JIT frames is delayed until streaming time. To
// re-lookup the entry in the JitcodeGlobalTable, we need to store the
// JIT code address (OptInfoAddr) in the circular buffer.
//
// Note that we cannot do this when we are sychronously sampling the
// current thread; that is, when called from profiler_get_backtrace. The
// captured backtrace is usually externally stored for an indeterminate
// amount of time, such as in nsRefreshDriver. Problematically, the
// stored backtrace may be alive across a GC during which the profiler
// itself is disabled. In that case, the JS engine is free to discard its
// JIT code. This means that if we inserted such OptInfoAddr entries into
// the buffer, nsRefreshDriver would now be holding on to a backtrace
// with stale JIT code return addresses.
if (aIsSynchronous ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmIon ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmBaseline ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_WasmOther) {
aCollector.CollectWasmFrame(jsFrame.profilingCategory(), jsFrame.label);
} else if (jsFrame.kind ==
JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
// Materialize a ProfilingStackFrame similar to the C++ Interpreter. We
// also set the IS_BLINTERP_FRAME flag to differentiate though.
JSScript* script = jsFrame.interpreterScript;
jsbytecode* pc = jsFrame.interpreterPC();
js::ProfilingStackFrame stackFrame;
constexpr uint32_t ExtraFlags =
uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME);
stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret,
ExtraFlags>("", jsFrame.label, script, pc,
jsFrame.realmID);
aCollector.CollectProfilingStackFrame(stackFrame);
} else {
MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
}
jsIndex--;
continue;
}
// If we reach here, there must be a native stack frame and it must be the
// greatest frame.
if (nativeStackAddr &&
// If the latest JS frame was JIT, this could be the native frame that
// corresponds to it. In that case, skip the native frame, because
// there's no need for the same frame to be present twice in the stack.
// The JS frame can be considered the symbolicated version of the native
// frame.
(!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
// This might still be a JIT operation, check to make sure that is not
// in range of the NEXT JavaScript's stacks' activation address.
(!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
MOZ_ASSERT(nativeIndex >= 0);
void* addr = (void*)aNativeStack.mPCs[nativeIndex];
aCollector.CollectNativeLeafAddr(addr);
}
if (nativeIndex >= 0) {
nativeIndex--;
}
}
// Update the JS context with the current profile sample buffer generation.
//
// Only do this for periodic samples. We don't want to do this for
// synchronous samples, and we also don't want to do it for calls to
// profiler_suspend_and_sample_thread() from the background hang reporter -
// in that case, aCollector.BufferRangeStart() will return Nothing().
if (!aIsSynchronous) {
aCollector.BufferRangeStart().apply(
[&aThreadData](uint64_t aBufferRangeStart) {
JSContext* context = aThreadData.GetJSContext();
if (context) {
JS::SetJSContextProfilerSampleBufferRangeStart(context,
aBufferRangeStart);
}
});
}
}
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
void* aClosure) {
NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
nativeStack->mSPs[nativeStack->mCount] = aSP;
nativeStack->mPCs[nativeStack->mCount] = aPC;
nativeStack->mCount++;
}
#endif
#if defined(USE_FRAME_POINTER_STACK_WALK)
static void DoFramePointerBacktrace(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
// Make a local copy of the Registers, to allow modifications.
Registers regs = aRegs;
// Start with the current function. We use 0 as the frame number here because
// the FramePointerStackWalk() call below will use 1..N. This is a bit weird
// but it doesn't matter because StackWalkCallback() doesn't use the frame
// number argument.
StackWalkCallback(/* frameNum */ 0, regs.mPC, regs.mSP, &aNativeStack);
const void* const stackEnd = aThreadData.StackTop();
// This is to check forward-progress after using a resume point.
void* previousResumeSp = nullptr;
for (;;) {
if (!(regs.mSP && regs.mSP <= regs.mFP && regs.mFP <= stackEnd)) {
break;
}
FramePointerStackWalk(StackWalkCallback,
uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
&aNativeStack, reinterpret_cast<void**>(regs.mFP),
const_cast<void*>(stackEnd));
if constexpr (!StackWalkControl::scIsSupported) {
break;
} else {
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
// No room to add more frames.
break;
}
if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) {
// No resume information.
break;
}
void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
// No progress after the previous resume point.
break;
}
const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
if (!resumePoint) {
break;
}
void* sp = resumePoint->resumeSp;
if (!sp) {
// Null SP in a resume point means we stop here.
break;
}
void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount;
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
break;
}
// Prepare context to resume stack walking.
regs.mPC = (Address)pc;
regs.mSP = (Address)sp;
regs.mFP = (Address)resumePoint->resumeBp;
previousResumeSp = sp;
}
}
}
#endif
#if defined(USE_MOZ_STACK_WALK)
static void DoMozStackWalkBacktrace(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
// Start with the current function. We use 0 as the frame number here because
// the MozStackWalkThread() call below will use 1..N. This is a bit weird but
// it doesn't matter because StackWalkCallback() doesn't use the frame number
// argument.
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
HANDLE thread = aThreadData.PlatformDataCRef().ProfiledThread();
MOZ_ASSERT(thread);
CONTEXT context_buf;
CONTEXT* context = nullptr;
if constexpr (StackWalkControl::scIsSupported) {
context = &context_buf;
memset(&context_buf, 0, sizeof(CONTEXT));
context_buf.ContextFlags = CONTEXT_FULL;
# if defined(_M_AMD64)
context_buf.Rsp = (DWORD64)aRegs.mSP;
context_buf.Rbp = (DWORD64)aRegs.mFP;
context_buf.Rip = (DWORD64)aRegs.mPC;
# else
static_assert(!StackWalkControl::scIsSupported,
"Mismatched support between StackWalkControl and "
"DoMozStackWalkBacktrace");
# endif
} else {
context = nullptr;
}
// This is to check forward-progress after using a resume point.
void* previousResumeSp = nullptr;
for (;;) {
MozStackWalkThread(StackWalkCallback,
uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
&aNativeStack, thread, context);
if constexpr (!StackWalkControl::scIsSupported) {
break;
} else {
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
// No room to add more frames.
break;
}
if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) {
// No resume information.
break;
}
void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
// No progress after the previous resume point.
break;
}
const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
if (!resumePoint) {
break;
}
void* sp = resumePoint->resumeSp;
if (!sp) {
// Null SP in a resume point means we stop here.
break;
}
void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount;
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
break;
}
// Prepare context to resume stack walking.
memset(&context_buf, 0, sizeof(CONTEXT));
context_buf.ContextFlags = CONTEXT_FULL;
# if defined(_M_AMD64)
context_buf.Rsp = (DWORD64)sp;
context_buf.Rbp = (DWORD64)resumePoint->resumeBp;
context_buf.Rip = (DWORD64)pc;
# else
static_assert(!StackWalkControl::scIsSupported,
"Mismatched support between StackWalkControl and "
"DoMozStackWalkBacktrace");
# endif
previousResumeSp = sp;
}
}
}
#endif
#ifdef USE_EHABI_STACKWALK
static void DoEHABIBacktrace(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
aNativeStack.mCount = EHABIStackWalk(
aRegs.mContext->uc_mcontext, const_cast<void*>(aThreadData.StackTop()),
aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
(void)aStackWalkControlIfSupported; // TODO: Implement.
}
#endif
#ifdef USE_LUL_STACKWALK
// See the comment at the callsite for why this function is necessary.
# if defined(MOZ_HAVE_ASAN_IGNORE)
MOZ_ASAN_IGNORE static void ASAN_memcpy(void* aDst, const void* aSrc,
size_t aLen) {
// The obvious thing to do here is call memcpy(). However, although
// ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
// false positive still manifests! So we must implement memcpy() ourselves
// within this function.
char* dst = static_cast<char*>(aDst);
const char* src = static_cast<const char*>(aSrc);
for (size_t i = 0; i < aLen; i++) {
dst[i] = src[i];
}
}
# endif
static void DoLULBacktrace(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
// WARNING: this function runs within the profiler's "critical section".
// WARNING: this function might be called while the profiler is inactive, and
// cannot rely on ActivePS.
(void)aStackWalkControlIfSupported; // TODO: Implement.
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
lul::UnwindRegs startRegs;
memset(&startRegs, 0, sizeof(startRegs));
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
# elif defined(GP_PLAT_amd64_freebsd)
startRegs.xip = lul::TaggedUWord(mc->mc_rip);
startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
startRegs.pc = lul::TaggedUWord(mc->pc);
startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
startRegs.sp = lul::TaggedUWord(mc->sp);
# elif defined(GP_PLAT_arm64_freebsd)
startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
# elif defined(GP_PLAT_mips64_linux)
startRegs.pc = lul::TaggedUWord(mc->pc);
startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
# else
# error "Unknown plat"
# endif
// Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
// stack's registered top point. Do some basic validity checks too. This
// assumes that the TaggedUWord holding the stack pointer value is valid, but
// it should be, since it was constructed that way in the code just above.
// We could construct |stackImg| so that LUL reads directly from the stack in
// question, rather than from a copy of it. That would reduce overhead and
// space use a bit. However, it gives a problem with dynamic analysis tools
// (ASan, TSan, Valgrind) which is that such tools will report invalid or
// racing memory accesses, and such accesses will be reported deep inside LUL.
// By taking a copy here, we can either sanitise the copy (for Valgrind) or
// copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
// to try and suppress errors inside LUL.
//
// N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
// observed in some minutes of testing, whilst keeping the size of this
// function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
// practice are small, 4KB or less, and so the copy costs are insignificant
// compared to other profiler overhead.
//
// |stackImg| is allocated on this (the sampling thread's) stack. That
// implies that the frame for this function is at least N_STACK_BYTES large.
// In general it would be considered unacceptable to have such a large frame
// on a stack, but it only exists for the unwinder thread, and so is not
// expected to be a problem. Allocating it on the heap is troublesome because
// this function runs whilst the sampled thread is suspended, so any heap
// allocation risks deadlock. Allocating it as a global variable is not
// thread safe, which would be a problem if we ever allow multiple sampler
// threads. Hence allocating it on the stack seems to be the least-worst
// option.
lul::StackImage stackImg;
{
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
defined(GP_PLAT_amd64_freebsd)
uintptr_t rEDZONE_SIZE = 128;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
defined(GP_PLAT_arm64_freebsd)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
# elif defined(GP_PLAT_mips64_linux)
uintptr_t rEDZONE_SIZE = 0;
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
# else
# error "Unknown plat"
# endif
uintptr_t end = reinterpret_cast<uintptr_t>(aThreadData.StackTop());
uintptr_t ws = sizeof(void*);
start &= ~(ws - 1);
end &= ~(ws - 1);
uintptr_t nToCopy = 0;
if (start < end) {
nToCopy = end - start;
if (nToCopy >= 1024u * 1024u) {
// start is abnormally far from end, possibly due to some special code
// that uses a separate stack elsewhere (e.g.: rr). In this case we just
// give up on this sample.
nToCopy = 0;
} else if (nToCopy > lul::N_STACK_BYTES) {
nToCopy = lul::N_STACK_BYTES;
}
}
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
stackImg.mLen = nToCopy;
stackImg.mStartAvma = start;
if (nToCopy > 0) {
// If this is a vanilla memcpy(), ASAN makes the following complaint:
//
// ERROR: AddressSanitizer: stack-buffer-underflow ...
// ...
// HINT: this may be a false positive if your program uses some custom
// stack unwind mechanism or swapcontext
//
// This code is very much a custom stack unwind mechanism! So we use an
// alternative memcpy() implementation that is ignored by ASAN.
# if defined(MOZ_HAVE_ASAN_IGNORE)
ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
# else
memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
# endif
(void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
}
}
size_t framePointerFramesAcquired = 0;
lul::LUL* lul = CorePS::Lul();
MOZ_RELEASE_ASSERT(lul);
lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
&aNativeStack.mCount, &framePointerFramesAcquired,
MAX_NATIVE_FRAMES, &startRegs, &stackImg);
// Update stats in the LUL stats object. Unfortunately this requires
// three global memory operations.
lul->mStats.mContext += 1;
lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
lul->mStats.mFP += framePointerFramesAcquired;
}
#endif
#ifdef HAVE_NATIVE_UNWIND
static void DoNativeBacktrace(
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
// This method determines which stackwalker is used for periodic and
// synchronous samples. (Backtrace samples are treated differently, see
// profiler_suspend_and_sample_thread() for details). The only part of the
// ordering that matters is that LUL must precede FRAME_POINTER, because on
// Linux they can both be present.
# if defined(USE_LUL_STACKWALK)
DoLULBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported);
# elif defined(USE_EHABI_STACKWALK)
DoEHABIBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported);
# elif defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported);
# elif defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(aThreadData, aRegs, aNativeStack,
aStackWalkControlIfSupported);
# else
# error "Invalid configuration"
# endif
}
void DoNativeBacktraceDirect(const void* stackTop, NativeStack& aNativeStack,
StackWalkControl* aStackWalkControlIfSupported) {
# if defined(MOZ_PROFILING)
# if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
// StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
void* previousResumeSp = nullptr;
for (;;) {
MozStackWalk(StackWalkCallback, stackTop,
uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
&aNativeStack);
if constexpr (!StackWalkControl::scIsSupported) {
break;
} else {
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
// No room to add more frames.
break;
}
if (!aStackWalkControlIfSupported ||
aStackWalkControlIfSupported->ResumePointCount() == 0) {
// No resume information.
break;
}
void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
if (previousResumeSp &&
((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
// No progress after the previous resume point.
break;
}
const StackWalkControl::ResumePoint* resumePoint =
aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
if (!resumePoint) {
break;
}
void* sp = resumePoint->resumeSp;
if (!sp) {
// Null SP in a resume point means we stop here.
break;
}
void* pc = resumePoint->resumePc;
StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
&aNativeStack);
++aNativeStack.mCount;
if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
break;
}
previousResumeSp = sp;
}
}
# else // defined(USE_FRAME_POINTER_STACK_WALK) ||
// defined(USE_MOZ_STACK_WALK)
MOZ_CRASH(
"Cannot call DoNativeBacktraceDirect without either "
"USE_FRAME_POINTER_STACK_WALK USE_MOZ_STACK_WALK");
# endif // defined(USE_FRAME_POINTER_STACK_WALK) ||
// defined(USE_MOZ_STACK_WALK)
# else
aNativeStack.mCount = 0;
# endif
}
#endif
// Writes some components shared by periodic and synchronous profiles to
// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
// and DoPeriodicSample().)
//
// The grammar for entry sequences is in a comment above
// ProfileBuffer::StreamSamplesToJSON.
static inline void DoSharedSample(
bool aIsSynchronous, uint32_t aFeatures,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos,
uint64_t aBufferRangeStart, ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
// WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(!aBuffer.IsThreadSafe(),
"Mutexes cannot be used inside this critical section");
ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
StackWalkControl* stackWalkControlIfSupported = nullptr;
#if defined(HAVE_NATIVE_UNWIND)
const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) &&
aCaptureOptions == StackCaptureOptions::Full;
StackWalkControl stackWalkControl;
if constexpr (StackWalkControl::scIsSupported) {
if (captureNative) {
stackWalkControlIfSupported = &stackWalkControl;
}
}
#endif // defined(HAVE_NATIVE_UNWIND)
const uint32_t jsFramesCount =
aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector,
aJsFrames, stackWalkControlIfSupported)
: 0;
NativeStack nativeStack{.mCount = 0};
#if defined(HAVE_NATIVE_UNWIND)
if (captureNative) {
DoNativeBacktrace(aThreadData, aRegs, nativeStack,
stackWalkControlIfSupported);
MergeStacks(aIsSynchronous, aThreadData, nativeStack, collector, aJsFrames,
jsFramesCount);
} else
#endif
{
MergeStacks(aIsSynchronous, aThreadData, nativeStack, collector, aJsFrames,
jsFramesCount);
// We can't walk the whole native stack, but we can record the top frame.
if (aCaptureOptions == StackCaptureOptions::Full) {
aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
}
}
}
// Writes the components of a synchronous sample to the given ProfileBuffer.
static void DoSyncSample(
uint32_t aFeatures,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const TimeStamp& aNow, const Registers& aRegs, ProfileBuffer& aBuffer,
StackCaptureOptions aCaptureOptions) {
// WARNING: this function runs within the profiler's "critical section".
MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack,
"DoSyncSample should not be called when no capture is needed");
const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
const uint64_t samplePos =
aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId());
TimeDuration delta = aNow - CorePS::ProcessStartTime();
aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
if (!aThreadData.GetJSContext()) {
// No JSContext, there is no JS frame buffer (and no need for it).
DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
/* aJsFrames = */ nullptr, aRegs, samplePos,
bufferRangeStart, aBuffer, aCaptureOptions);
} else {
// JSContext is present, we need to lock the thread data to access the JS
// frame buffer.
ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef
aOnThreadRef) {
aOnThreadRef.WithConstLockedRWOnThread(
[&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) {
DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
aLockedThreadData.GetJsFrameBuffer(), aRegs,
samplePos, bufferRangeStart, aBuffer,
aCaptureOptions);
});
});
}
}
// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
// The ThreadId entry is already written in the main ProfileBuffer, its location
// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
static inline void DoPeriodicSample(
PSLockRef aLock,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
ProfileBuffer& aBuffer) {
// WARNING: this function runs within the profiler's "critical section".
MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
DoSharedSample(/* aIsSynchronous = */ false, ActivePS::Features(aLock),
aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart,
aBuffer);
}
#undef UNWINDING_REGS_HAVE_ECX_EDX
#undef UNWINDING_REGS_HAVE_R10_R12
#undef UNWINDING_REGS_HAVE_LR_R7
#undef UNWINDING_REGS_HAVE_LR_R11
// END sampling/unwinding code
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// BEGIN saving/streaming code
const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
static int64_t SafeJSInteger(uint64_t aValue) {
return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
}
static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
const SharedLibrary& aLib) {
aWriter.StartObjectElement();
aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
aWriter.StringProperty("name", aLib.GetModuleName());
aWriter.StringProperty("path", aLib.GetModulePath());
aWriter.StringProperty("debugName", aLib.GetDebugName());
aWriter.StringProperty("debugPath", aLib.GetDebugPath());
aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
aWriter.StringProperty("codeId", aLib.GetCodeId());
aWriter.StringProperty("arch", aLib.GetArch());
aWriter.EndObject();
}
void AppendSharedLibraries(JSONWriter& aWriter,
const SharedLibraryInfo& aInfo) {
for (size_t i = 0; i < aInfo.GetSize(); i++) {
AddSharedLibraryInfoToStream(aWriter, aInfo.GetEntry(i));
}
}
static void StreamCategories(SpliceableJSONWriter& aWriter) {
// Same order as ProfilingCategory. Format:
// [
// {
// name: "Idle",
// color: "transparent",
// subcategories: ["Other"],
// },
// {
// name: "Other",
// color: "grey",
// subcategories: [
// "JSM loading",
// "Subprocess launching",
// "DLL loading"
// ]
// },
// ...
// ]
#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
aWriter.Start(); \
aWriter.StringProperty("name", labelAsString); \
aWriter.StringProperty("color", color); \
aWriter.StartArrayProperty("subcategories");
#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
aWriter.StringElement(labelAsString);
#define CATEGORY_JSON_END_CATEGORY \
aWriter.EndArray(); \
aWriter.EndObject();
MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
CATEGORY_JSON_SUBCATEGORY,
CATEGORY_JSON_END_CATEGORY)
#undef CATEGORY_JSON_BEGIN_CATEGORY
#undef CATEGORY_JSON_SUBCATEGORY
#undef CATEGORY_JSON_END_CATEGORY
}
static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
// Get an array view with all registered marker-type-specific functions.
base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList
markerTypeFunctionsArray;
// List of streamed marker names, this is used to spot duplicates.
std::set<std::string> names;
// Stream the display schema for each different one. (Duplications may come
// from the same code potentially living in different libraries.)
for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
auto name = markerTypeFunctions.mMarkerTypeNameFunction();
// std::set.insert(T&&) returns a pair, its `second` is true if the element
// was actually inserted (i.e., it was not there yet.)
const bool didInsert =
names.insert(std::string(name.data(), name.size())).second;
if (didInsert) {
markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
}
}
// Now stream the Rust marker schemas. Passing the names set as a void pointer
// as well, so we can continue checking if the schemes are added already in
// the Rust side.
profiler::ffi::gecko_profiler_stream_marker_schemas(
&aWriter, static_cast<void*>(&names));
}
// Some meta information that is better recorded before streaming the profile.
// This is *not* intended to be cached, as some values could change between
// profiling sessions.
struct PreRecordedMetaInformation {
bool mAsyncStacks;
// This struct should only live on the stack, so it's fine to use Auto
// strings.
nsAutoCString mHttpPlatform;
nsAutoCString mHttpOscpu;
nsAutoCString mHttpMisc;
nsAutoCString mRuntimeABI;
nsAutoCString mRuntimeToolkit;
nsAutoCString mAppInfoProduct;
nsAutoCString mAppInfoAppBuildID;
nsAutoCString mAppInfoSourceURL;
int32_t mProcessInfoCpuCount;
int32_t mProcessInfoCpuCores;
nsAutoCString mProcessInfoCpuName;
};
// This function should be called out of the profiler lock.
// It gathers non-trivial data that doesn't require the profiler to stop, or for
// which the request could theoretically deadlock if the profiler is locked.
static PreRecordedMetaInformation PreRecordMetaInformation(
bool aShutdown = false) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
PreRecordedMetaInformation info = {}; // Aggregate-init all fields.
if (!NS_IsMainThread()) {
// Leave these properties out if we're not on the main thread.
// At the moment, the only case in which this function is called on a
// background thread is if we're in a content process and are going to
// send this profile to the parent process. In that case, the parent
// process profile's "meta" object already has the rest of the properties,
// and the parent process profile is dumped on that process's main thread.
return info;
}
info.mAsyncStacks =
!aShutdown && Preferences::GetBool("javascript.options.asyncstack");
nsresult res;
if (nsCOMPtr<nsIHttpProtocolHandler> http =
do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
!NS_FAILED(res) && http) {
Unused << http->GetPlatform(info.mHttpPlatform);
#if defined(XP_MACOSX)
// On Mac, the http "oscpu" is capped at 10.15, so we need to get the real
// OS version directly.
int major = 0;
int minor = 0;
int bugfix = 0;
nsCocoaFeatures::GetSystemVersion(major, minor, bugfix);
if (major != 0) {
info.mHttpOscpu.AppendLiteral("macOS ");
info.mHttpOscpu.AppendInt(major);
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(minor);
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(bugfix);
} else
#endif
#if defined(GP_OS_windows)
// On Windows, the http "oscpu" is capped at Windows 10, so we need to get
// the real OS version directly.
OSVERSIONINFO ovi = {sizeof(OSVERSIONINFO)};
if (GetVersionEx(&ovi)) {
info.mHttpOscpu.AppendLiteral("Windows ");
// The major version returned for Windows 11 is 10, but we can
// identify it from the build number.
info.mHttpOscpu.AppendInt(
ovi.dwBuildNumber >= 22000 ? 11 : int32_t(ovi.dwMajorVersion));
info.mHttpOscpu.AppendLiteral(".");
info.mHttpOscpu.AppendInt(int32_t(ovi.dwMinorVersion));
# if defined(_ARM64_)
info.mHttpOscpu.AppendLiteral(" Arm64");
# endif
info.mHttpOscpu.AppendLiteral("; build=");
info.mHttpOscpu.AppendInt(int32_t(ovi.dwBuildNumber));
} else
#endif
{
Unused << http->GetOscpu(info.mHttpOscpu);
}
// Firefox version is capped to 109.0 in the http "misc" field due to some
info.mHttpMisc.AssignLiteral("rv:");
info.mHttpMisc.AppendLiteral(MOZILLA_UAVERSION);
}
if (nsCOMPtr<nsIXULRuntime> runtime =
do_GetService("@mozilla.org/xre/runtime;1");
runtime) {
Unused << runtime->GetXPCOMABI(info.mRuntimeABI);
Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit);
}
if (nsCOMPtr<nsIXULAppInfo> appInfo =
do_GetService("@mozilla.org/xre/app-info;1");
appInfo) {
Unused << appInfo->GetName(info.mAppInfoProduct);
Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID);
Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL);
}
ProcessInfo processInfo = {}; // Aggregate-init all fields to false/zeroes.
if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) {
info.mProcessInfoCpuCount = processInfo.cpuCount;
info.mProcessInfoCpuCores = processInfo.cpuCores;
info.mProcessInfoCpuName = processInfo.cpuName;
}
return info;
}
// Implemented in platform-specific cpps, to add object properties describing
// the units of CPU measurements in samples.
static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
SpliceableJSONWriter& aWriter);
static void StreamMetaJSCustomObject(
PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown,
const PreRecordedMetaInformation& aPreRecordedMetaInformation) {
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
aWriter.IntProperty("version", GECKO_PROFILER_FORMAT_VERSION);
// The "startTime" field holds the number of milliseconds since midnight
// January 1, 1970 GMT. This grotty code computes (Now - (Now -
// ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
// Note: This is the only absolute time in the profile! All other timestamps
// are relative to this startTime.
TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
aWriter.DoubleProperty(
"startTime",
static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() -
CorePS::ProcessStartTime())
.ToMilliseconds());
if (const TimeStamp contentEarliestTime =
ActivePS::Buffer(aLock)
.UnderlyingChunkedBuffer()
.GetEarliestChunkStartTimeStamp();
!contentEarliestTime.IsNull()) {
aWriter.DoubleProperty(
"contentEarliestTime",
(contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds());
} else {
aWriter.NullProperty("contentEarliestTime");
}
const double profilingEndTime = profiler_time();
aWriter.DoubleProperty("profilingEndTime", profilingEndTime);
if (aIsShuttingDown) {
aWriter.DoubleProperty("shutdownTime", profilingEndTime);
} else {
aWriter.NullProperty("shutdownTime");
}
aWriter.StartArrayProperty("categories");
StreamCategories(aWriter);
aWriter.EndArray();
aWriter.StartArrayProperty("markerSchema");
StreamMarkerSchema(aWriter);
aWriter.EndArray();
ActivePS::WriteActiveConfiguration(aLock, aWriter,
MakeStringSpan("configuration"));
aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
#ifdef DEBUG
aWriter.IntProperty("debug", 1);
#else
aWriter.IntProperty("debug", 0);
#endif
aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);
aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks);
aWriter.IntProperty("processType", XRE_GetProcessType());
aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL));
if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) {
aWriter.StringProperty("platform",
aPreRecordedMetaInformation.mHttpPlatform);
}
if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) {
aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu);
}
if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) {
aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc);
}
if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) {
aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI);
}
if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) {
aWriter.StringProperty("toolkit",
aPreRecordedMetaInformation.mRuntimeToolkit);
}
if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) {
aWriter.StringProperty("product",
aPreRecordedMetaInformation.mAppInfoProduct);
}
if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) {
aWriter.StringProperty("appBuildID",
aPreRecordedMetaInformation.mAppInfoAppBuildID);
}
if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) {
aWriter.StringProperty("sourceURL",
aPreRecordedMetaInformation.mAppInfoSourceURL);
}
if (!aPreRecordedMetaInformation.mProcessInfoCpuName.IsEmpty()) {
aWriter.StringProperty("CPUName",
aPreRecordedMetaInformation.mProcessInfoCpuName);
}
if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) {
aWriter.IntProperty("physicalCPUs",
aPreRecordedMetaInformation.mProcessInfoCpuCores);
}
if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) {
aWriter.IntProperty("logicalCPUs",
aPreRecordedMetaInformation.mProcessInfoCpuCount);
}
#if defined(GP_OS_android)
jni::String::LocalRef deviceInformation =
java::GeckoJavaSampler::GetDeviceInformation();
aWriter.StringProperty("device", deviceInformation->ToCString());
#endif
aWriter.StartObjectProperty("sampleUnits");
{
aWriter.StringProperty("time", "ms");
aWriter.StringProperty("eventDelay", "ms");
StreamMetaPlatformSampleUnits(aLock, aWriter);
}
aWriter.EndObject();
if (!NS_IsMainThread()) {
// Leave the rest of the properties out if we're not on the main thread.
// At the moment, the only case in which this function is called on a
// background thread is if we're in a content process and are going to
// send this profile to the parent process. In that case, the parent
// process profile's "meta" object already has the rest of the properties,
// and the parent process profile is dumped on that process's main thread.
return;
}
// We should avoid collecting extension metadata for profiler when there is no
// observer service, since a ExtensionPolicyService could not be created then.
if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
aWriter.StartObjectProperty("extensions");
{
{
JSONSchemaWriter schema(aWriter);
schema.WriteField("id");
schema.WriteField("name");
schema.WriteField("baseURL");
}
aWriter.StartArrayProperty("data");
{
nsTArray<RefPtr<WebExtensionPolicy>> exts;
ExtensionPolicyService::GetSingleton().GetAll(exts);
for (auto& ext : exts) {
aWriter.StartArrayElement();
nsAutoString id;
ext->GetId(id);
aWriter.StringElement(NS_ConvertUTF16toUTF8(id));
aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name()));
auto url = ext->GetURL(u""_ns);
if (url.isOk()) {
aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap()));
}
aWriter.EndArray();
}
}
aWriter.EndArray();
}
aWriter.EndObject();
}
}
static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
ActivePS::DiscardExpiredPages(aLock);
for (const auto& page : ActivePS::ProfiledPages(aLock)) {
page->StreamJSON(aWriter);
}
}
#if defined(GP_OS_android)
template <int N>
static bool StartsWith(const nsACString& string, const char (&prefix)[N]) {
if (N - 1 > string.Length()) {
return false;
}
return memcmp(string.Data(), prefix, N - 1) == 0;
}
static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) {
if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) {
return JS::ProfilingCategoryPair::IDLE;
}
if (aName.EqualsLiteral("java.lang.Object.wait()")) {
return JS::ProfilingCategoryPair::JAVA_BLOCKED;
}
if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) {
return JS::ProfilingCategoryPair::JAVA_ANDROID;
}
if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) {
return JS::ProfilingCategoryPair::JAVA_MOZILLA;
}
if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") ||
StartsWith(aName, "com.sun.")) {
return JS::ProfilingCategoryPair::JAVA_LANGUAGE;
}
if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) {
return JS::ProfilingCategoryPair::JAVA_KOTLIN;
}
if (StartsWith(aName, "androidx.")) {
return JS::ProfilingCategoryPair::JAVA_ANDROIDX;
}
return JS::ProfilingCategoryPair::OTHER;
}
// Marker type for Java markers without any details.
struct JavaMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("Java");
}
static void StreamJSONMarkerData(
baseprofiler::SpliceableJSONWriter& aWriter) {}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
MS::Location::MarkerTable};
schema.SetAllLabels("{marker.name}");
return schema;
}
};
// Marker type for Java markers with a detail field.
struct JavaMarkerWithDetails {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("JavaWithDetails");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
const ProfilerString8View& aText) {
// This (currently) needs to be called "name" to be searchable on the
// front-end.
aWriter.StringProperty("name", aText);
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
MS::Location::MarkerTable};
schema.SetTooltipLabel("{marker.name}");
schema.SetChartLabel("{marker.data.name}");
schema.SetTableLabel("{marker.name} - {marker.data.name}");
schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String,
MS::Searchable::Searchable);
return schema;
}
};
static void CollectJavaThreadProfileData(
nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef>& javaThreads,
ProfileBuffer& aProfileBuffer) {
// Retrieve metadata about the threads.
const auto threadCount = java::GeckoJavaSampler::GetRegisteredThreadCount();
for (int i = 0; i < threadCount; i++) {
javaThreads.AppendElement(
java::GeckoJavaSampler::GetRegisteredThreadInfo(i));
}
// locked_profiler_start uses sample count is 1000 for Java thread.
// This entry size is enough now, but we might have to estimate it
// if we can customize it
// Pass the samples
int sampleId = 0;
while (true) {
const auto threadId = java::GeckoJavaSampler::GetThreadId(sampleId);
double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId);
if (threadId == 0 || sampleTime == 0.0) {
break;
}
aProfileBuffer.AddThreadIdEntry(ProfilerThreadId::FromNumber(threadId));
aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime));
int frameId = 0;
while (true) {
jni::String::LocalRef frameName =
java::GeckoJavaSampler::GetFrameName(sampleId, frameId++);
if (!frameName) {
break;
}
nsCString frameNameString = frameName->ToCString();
auto categoryPair = InferJavaCategory(frameNameString);
aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0,
Nothing(), Nothing(),
Some(categoryPair));
}
sampleId++;
}
// Pass the markers now
while (true) {
// Gets the data from the Android UI thread only.
java::GeckoJavaSampler::Marker::LocalRef marker =
java::GeckoJavaSampler::PollNextMarker();
if (!marker) {
// All markers are transferred.
break;
}
// Get all the marker information from the Java thread using JNI.
const auto threadId = ProfilerThreadId::FromNumber(marker->GetThreadId());
nsCString markerName = marker->GetMarkerName()->ToCString();
jni::String::LocalRef text = marker->GetMarkerText();
TimeStamp startTime =
CorePS::ProcessStartTime() +
TimeDuration::FromMilliseconds(marker->GetStartTime());
double endTimeMs = marker->GetEndTime();
// A marker can be either a duration with start and end, or a point in time
// with only startTime. If endTime is 0, this means it's a point in time.
TimeStamp endTime = endTimeMs == 0
? startTime
: CorePS::ProcessStartTime() +
TimeDuration::FromMilliseconds(endTimeMs);
MarkerTiming timing = endTimeMs == 0
? MarkerTiming::InstantAt(startTime)
: MarkerTiming::Interval(startTime, endTime);
if (!text) {
// This marker doesn't have a text.
AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
geckoprofiler::category::JAVA_ANDROID,
{MarkerThreadId(threadId), std::move(timing)},
JavaMarker{});
} else {
// This marker has a text.
AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
geckoprofiler::category::JAVA_ANDROID,
{MarkerThreadId(threadId), std::move(timing)},
JavaMarkerWithDetails{}, text->ToCString());
}
}
}
#endif
UniquePtr<ProfilerCodeAddressService>
profiler_code_address_service_for_presymbolication() {
static const bool preSymbolicate = []() {
const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
return symbolicate && symbolicate[0] != '\0';
}();
return preSymbolicate ? MakeUnique<ProfilerCodeAddressService>() : nullptr;
}
static ProfilerResult<ProfileGenerationAdditionalInformation>
locked_profiler_stream_json_for_this_process(
PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
const PreRecordedMetaInformation& aPreRecordedMetaInformation,
bool aIsShuttingDown, ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
LOG("locked_profiler_stream_json_for_this_process");
#ifdef DEBUG
PRIntervalTime slowWithSleeps = 0;
if (!XRE_IsParentProcess()) {
for (const auto& filter : ActivePS::Filters(aLock)) {
if (filter == "test-debug-child-slow-json") {
LOG("test-debug-child-slow-json");
// There are 10 slow-downs below, each will sleep 250ms, for a total of
// 2.5s, which should trigger the first progress request after 1s, and
// the next progress which will have advanced further, so this profile
// shouldn't get dropped.
slowWithSleeps = PR_MillisecondsToInterval(250);
} else if (filter == "test-debug-child-very-slow-json") {
LOG("test-debug-child-very-slow-json");
// Wait for more than 2s without any progress, which should get this
// profile discarded.
PR_Sleep(PR_SecondsToInterval(5));
}
}
}
# define SLOW_DOWN_FOR_TESTING() \
if (slowWithSleeps != 0) { \
DEBUG_LOG("progress=%.0f%%, sleep...", \
aProgressLogger.GetGlobalProgress().ToDouble() * 100.0); \
PR_Sleep(slowWithSleeps); \
}
#else // #ifdef DEBUG
# define SLOW_DOWN_FOR_TESTING() /* No slow-downs */
#endif // #ifdef DEBUG #else
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process);
const double collectionStartMs = profiler_time();
ProfileBuffer& buffer = ActivePS::Buffer(aLock);
aProgressLogger.SetLocalProgress(1_pc, "Locked profile buffer");
SLOW_DOWN_FOR_TESTING();
// If there is a set "Window length", discard older data.
Maybe<double> durationS = ActivePS::Duration(aLock);
if (durationS.isSome()) {
const double durationStartMs = collectionStartMs - *durationS * 1000;
buffer.DiscardSamplesBeforeTime(durationStartMs);
}
aProgressLogger.SetLocalProgress(2_pc, "Discarded old data");
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
#if defined(GP_OS_android)
// Java thread profile data should be collected before serializing the meta
// object. This is because Java thread adds some markers with marker schema
// objects. And these objects should be added before the serialization of the
// `profile.meta.markerSchema` array, so these marker schema objects can also
// be serialized properly. That's why java thread profile data needs to be
// done before everything.
// We are allocating it chunk by chunk. So this will not allocate 64 MiB
// at once. This size should be more than enough for java threads.
// This buffer is being created for each process but Android has
// relatively fewer processes compared to desktop, so it's okay here.
mozilla::ProfileBufferChunkManagerWithLocalLimit javaChunkManager(
64 * 1024 * 1024, 1024 * 1024);
ProfileChunkedBuffer javaBufferManager(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex, javaChunkManager);
ProfileBuffer javaBuffer(javaBufferManager);
nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef> javaThreads;
if (ActivePS::FeatureJava(aLock)) {
CollectJavaThreadProfileData(javaThreads, javaBuffer);
aProgressLogger.SetLocalProgress(3_pc, "Collected Java thread");
}
#endif
// Put shared library info
aWriter.StartArrayProperty("libs");
SharedLibraryInfo sharedLibraryInfo = SharedLibraryInfo::GetInfoForSelf();
sharedLibraryInfo.SortByAddress();
AppendSharedLibraries(aWriter, sharedLibraryInfo);
aWriter.EndArray();
aProgressLogger.SetLocalProgress(4_pc, "Wrote library information");
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Put meta data
aWriter.StartObjectProperty("meta");
{
StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown,
aPreRecordedMetaInformation);
}
aWriter.EndObject();
aProgressLogger.SetLocalProgress(5_pc, "Wrote profile metadata");
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Put page data
aWriter.StartArrayProperty("pages");
{ StreamPages(aLock, aWriter); }
aWriter.EndArray();
aProgressLogger.SetLocalProgress(6_pc, "Wrote pages");
buffer.StreamProfilerOverheadToJSON(
aWriter, CorePS::ProcessStartTime(), aSinceTime,
aProgressLogger.CreateSubLoggerTo(10_pc, "Wrote profiler overheads"));
buffer.StreamCountersToJSON(
aWriter, CorePS::ProcessStartTime(), aSinceTime,
aProgressLogger.CreateSubLoggerTo(14_pc, "Wrote counters"));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Lists the samples for each thread profile
aWriter.StartArrayProperty("threads");
{
ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
aProgressLogger.SetLocalProgress(15_pc, "Discarded expired profiles");
ThreadRegistry::LockedRegistry lockedRegistry;
ActivePS::ProfiledThreadList threads =
ActivePS::ProfiledThreads(lockedRegistry, aLock);
const uint32_t threadCount = uint32_t(threads.length());
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Prepare the streaming context for each thread.
ProcessStreamingContext processStreamingContext(
threadCount, aWriter.SourceFailureLatch(), CorePS::ProcessStartTime(),
aSinceTime);
for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
20_pc, threadCount, "Preparing thread streaming contexts...")) {
ActivePS::ProfiledThreadListElement& thread = threads[i];
MOZ_RELEASE_ASSERT(thread.mProfiledThreadData);
processStreamingContext.AddThreadStreamingContext(
*thread.mProfiledThreadData, buffer, thread.mJSContext, aService,
std::move(progressLogger));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
}
SLOW_DOWN_FOR_TESTING();
// Read the buffer once, and extract all samples and markers that the
// context expects.
buffer.StreamSamplesAndMarkersToJSON(
processStreamingContext, aProgressLogger.CreateSubLoggerTo(
"Processing samples and markers...", 80_pc,
"Processed samples and markers"));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
SLOW_DOWN_FOR_TESTING();
// Stream each thread from the pre-filled context.
ThreadStreamingContext* const contextListBegin =
processStreamingContext.begin();
MOZ_ASSERT(uint32_t(processStreamingContext.end() - contextListBegin) ==
threadCount);
for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
92_pc, threadCount, "Streaming threads...")) {
ThreadStreamingContext& threadStreamingContext = contextListBegin[i];
threadStreamingContext.FinalizeWriter();
threadStreamingContext.mProfiledThreadData.StreamJSON(
std::move(threadStreamingContext), aWriter,
CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
CorePS::ProcessStartTime(), aService, std::move(progressLogger));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
}
aProgressLogger.SetLocalProgress(92_pc, "Wrote samples and markers");
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(aLock)) {
for (java::GeckoJavaSampler::ThreadInfo::LocalRef& threadInfo :
javaThreads) {
ProfiledThreadData threadData(ThreadRegistrationInfo{
threadInfo->GetName()->ToCString().BeginReading(),
ProfilerThreadId::FromNumber(threadInfo->GetId()), false,
CorePS::ProcessStartTime()});
threadData.StreamJSON(
javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock),
CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
nullptr,
aProgressLogger.CreateSubLoggerTo("Streaming Java thread...", 96_pc,
"Streamed Java thread"));
}
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
} else {
aProgressLogger.SetLocalProgress(96_pc, "No Java thread");
}
#endif
UniquePtr<char[]> baseProfileThreads =
ActivePS::MoveBaseProfileThreads(aLock);
if (baseProfileThreads) {
aWriter.Splice(MakeStringSpan(baseProfileThreads.get()));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
aProgressLogger.SetLocalProgress(97_pc, "Wrote baseprofiler data");
} else {
aProgressLogger.SetLocalProgress(97_pc, "No baseprofiler data");
}
}
aWriter.EndArray();
SLOW_DOWN_FOR_TESTING();
aWriter.StartArrayProperty("pausedRanges");
{
buffer.StreamPausedRangesToJSON(
aWriter, aSinceTime,
aProgressLogger.CreateSubLoggerTo("Streaming pauses...", 99_pc,
"Streamed pauses"));
}
aWriter.EndArray();
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
ProfilingLog::Access([&](Json::Value& aProfilingLogObject) {
aProfilingLogObject[Json::StaticString{
"profilingLogEnd" TIMESTAMP_JSON_SUFFIX}] = ProfilingLog::Timestamp();
aWriter.StartObjectProperty("profilingLog");
{
nsAutoCString pid;
pid.AppendInt(int64_t(profiler_current_process_id().ToNumber()));
Json::String logString = ToCompactString(aProfilingLogObject);
aWriter.SplicedJSONProperty(pid, logString);
}
aWriter.EndObject();
});
const double collectionEndMs = profiler_time();
// Record timestamps for the collection into the buffer, so that consumers
// know why we didn't collect any samples for its duration.
// We put these entries into the buffer after we've collected the profile,
// so they'll be visible for the *next* profile collection (if they haven't
// been overwritten due to buffer wraparound by then).
buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
#ifdef DEBUG
if (slowWithSleeps != 0) {
LOG("locked_profiler_stream_json_for_this_process done");
}
#endif // DEBUG
return ProfileGenerationAdditionalInformation{std::move(sharedLibraryInfo)};
}
// Keep this internal function non-static, so it may be used by tests.
ProfilerResult<ProfileGenerationAdditionalInformation>
do_profiler_stream_json_for_this_process(
SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
LOG("profiler_stream_json_for_this_process");
MOZ_RELEASE_ASSERT(CorePS::Exists());
const auto preRecordedMetaInformation = PreRecordMetaInformation();
aProgressLogger.SetLocalProgress(2_pc, "PreRecordMetaInformation done");
if (profiler_is_active()) {
invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile);
}
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return Err(ProfilerError::IsInactive);
}
ProfileGenerationAdditionalInformation additionalInfo;
MOZ_TRY_VAR(
additionalInfo,
locked_profiler_stream_json_for_this_process(
lock, aWriter, aSinceTime, preRecordedMetaInformation,
aIsShuttingDown, aService,
aProgressLogger.CreateSubLoggerFromTo(
3_pc, "locked_profiler_stream_json_for_this_process started",
100_pc, "locked_profiler_stream_json_for_this_process done")));
if (aWriter.Failed()) {
return Err(ProfilerError::JsonGenerationFailed);
}
return additionalInfo;
}
ProfilerResult<ProfileGenerationAdditionalInformation>
profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
double aSinceTime, bool aIsShuttingDown,
ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
MOZ_RELEASE_ASSERT(
!XRE_IsParentProcess() || NS_IsMainThread(),
"In the parent process, profiles should only be generated from the main "
"thread, otherwise they will be incomplete.");
ProfileGenerationAdditionalInformation additionalInfo;
MOZ_TRY_VAR(additionalInfo, do_profiler_stream_json_for_this_process(
aWriter, aSinceTime, aIsShuttingDown,
aService, std::move(aProgressLogger)));
return additionalInfo;
}
// END saving/streaming code
////////////////////////////////////////////////////////////////////////
static char FeatureCategory(uint32_t aFeature) {
if (aFeature & DefaultFeatures()) {
if (aFeature & AvailableFeatures()) {
return 'D';
}
return 'd';
}
if (aFeature & StartupExtraDefaultFeatures()) {
if (aFeature & AvailableFeatures()) {
return 'S';
}
return 's';
}
if (aFeature & AvailableFeatures()) {
return '-';
}
return 'x';
}
static void PrintUsage() {
MOZ_RELEASE_ASSERT(NS_IsMainThread());
printf(
"\n"
"Profiler environment variable usage:\n"
"\n"
" MOZ_PROFILER_HELP\n"
" If set to any value, prints this message.\n"
" Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n"
"\n"
" MOZ_LOG\n"
" Enables logging. The levels of logging available are\n"
" 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
"\n"
" MOZ_PROFILER_STARTUP\n"
" If set to any value other than '' or '0'/'N'/'n', starts the\n"
" profiler immediately on start-up.\n"
" Useful if you want profile code that runs very early.\n"
"\n"
" MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n"
" process in the profiler's circular buffer when the profiler is first\n"
" started.\n"
" If unset, the platform default is used:\n"
" %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
" (%u bytes per entry -> %u or %u total bytes per process)\n"
" Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n"
"\n"
" MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n"
" entries in the the profiler's circular buffer when the profiler is\n"
" first started, in seconds.\n"
" If unset, the life time of the entries will only be restricted by\n"
" MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
" additional time duration restriction will be applied.\n"
"\n"
" MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
" measured in milliseconds, when the profiler is first started.\n"
" If unset, the platform default is used.\n"
"\n"
" MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
" the integer value of the features bitfield.\n"
" If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
"\n"
" MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
" a comma-separated list of strings.\n"
" Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
" If unset, the platform default is used.\n"
"\n"
" Features: (x=unavailable, D/d=default/unavailable,\n"
" S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n",
unsigned(scMinimumBufferEntries), unsigned(scMaximumBufferEntries),
unsigned(PROFILER_DEFAULT_ENTRIES.Value()),
unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
unsigned(scBytesPerEntry),
unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry),
PROFILER_MAX_INTERVAL);
#define PRINT_FEATURE(n_, str_, Name_, desc_) \
printf(" %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \
ProfilerFeature::Name_, str_, desc_);
PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
#undef PRINT_FEATURE
printf(
" - \"default\" (All above D+S defaults)\n"
"\n"
" MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
" If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n"
" comma-separated list of strings. A given thread will be sampled if\n"
" any of the filters is a case-insensitive substring of the thread\n"
" name. If unset, a default is used.\n"
"\n"
" MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n"
" This variable is used to propagate the activeTabID of\n"
" the profiler init params to subprocesses.\n"
"\n"
" MOZ_PROFILER_SHUTDOWN=<Filename>\n"
" If set, the profiler saves a profile to the named file on shutdown.\n"
" If the Filename contains \"%%p\", this will be replaced with the'\n"
" process id of the parent process.\n"
"\n"
" MOZ_PROFILER_SYMBOLICATE\n"
" If set, the profiler will pre-symbolicate profiles.\n"
" *Note* This will add a significant pause when gathering data, and\n"
" is intended mainly for local development.\n"
"\n"
" MOZ_PROFILER_LUL_TEST\n"
" If set to any value, runs LUL unit tests at startup.\n"
"\n"
" This platform %s native unwinding.\n"
"\n",
#if defined(HAVE_NATIVE_UNWIND)
"supports"
#else
"does not support"
#endif
);
}
////////////////////////////////////////////////////////////////////////
// BEGIN Sampler
#if defined(GP_OS_linux) || defined(GP_OS_android)
struct SigHandlerCoordinator;
#endif
// Sampler performs setup and teardown of the state required to sample with the
// profiler. Sampler may exist when ActivePS is not present.
//
// SuspendAndSampleAndResumeThread must only be called from a single thread,
// and must not sample the thread it is being called from. A separate Sampler
// instance must be used for each thread which wants to capture samples.
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
//
// With the exception of SamplerThread, all Sampler objects must be Disable-d
// before releasing the lock which was used to create them. This avoids races
// on linux with the SIGPROF signal handler.
class Sampler {
public:
// Sets up the profiler such that it can begin sampling.
explicit Sampler(PSLockRef aLock);
// Disable the sampler, restoring it to its previous state. This must be
// called once, and only once, before the Sampler is destroyed.
void Disable(PSLockRef aLock);
// This method suspends and resumes the samplee thread. It calls the passed-in
// function-like object aProcessRegs (passing it a populated |const
// Registers&| arg) while the samplee thread is suspended. Note that
// the aProcessRegs function must be very careful not to do anything that
// requires a lock, since we may have interrupted the thread at any point.
// As an example, you can't call TimeStamp::Now() since on windows it
// takes a lock on the performance counter.
//
// Func must be a function-like object of type `void()`.
template <typename Func>
void SuspendAndSampleAndResumeThread(
PSLockRef aLock,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
const TimeStamp& aNow, const Func& aProcessRegs);
private:
#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
// Used to restore the SIGPROF handler when ours is removed.
struct sigaction mOldSigprofHandler;
// This process' ID. Needed as an argument for tgkill in
// SuspendAndSampleAndResumeThread.
ProfilerProcessId mMyPid;
// The sampler thread's ID. Used to assert that it is not sampling itself,
// which would lead to deadlock.
ProfilerThreadId mSamplerTid;
public:
// This is the one-and-only variable used to communicate between the sampler
// thread and the samplee thread's signal handler. It's static because the
// samplee thread's signal handler is static.
static struct SigHandlerCoordinator* sSigHandlerCoordinator;
#endif
};
// END Sampler
////////////////////////////////////////////////////////////////////////
// Platform-specific function that retrieves per-thread CPU measurements.
static RunningTimes GetThreadRunningTimesDiff(
PSLockRef aLock,
ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData);
// Platform-specific function that *may* discard CPU measurements since the
// previous call to GetThreadRunningTimesDiff, if the way to suspend threads on
// this platform may add running times to that thread.
// No-op otherwise, if suspending a thread doesn't make it work.
static void DiscardSuspendedThreadRunningTimes(
PSLockRef aLock,
ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData);
// Platform-specific function that retrieves process CPU measurements.
static RunningTimes GetProcessRunningTimesDiff(
PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated);
// Template function to be used by `GetThreadRunningTimesDiff()` (unless some
// platform has a better way to achieve this).
// It help perform CPU measurements and tie them to a timestamp, such that the
// measurements and timestamp are very close together.
// This is necessary, because the relative CPU usage is computed by dividing
// consecutive CPU measurements by their timestamp difference; if there was an
// unexpected big gap, it could skew this computation and produce impossible
// Note that this may call the measurement function more than once; it is
// assumed to normally be fast.
// This was verified experimentally, but there is currently no regression
template <typename GetCPURunningTimesFunction>
RunningTimes GetRunningTimesWithTightTimestamp(
GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) {
// Once per process, compute a threshold over which running times and their
// timestamp is considered too far apart.
static const TimeDuration scMaxRunningTimesReadDuration = [&]() {
// Run the main CPU measurements + timestamp a number of times and capture
// their durations.
constexpr int loops = 128;
TimeDuration durations[loops];
RunningTimes runningTimes;
TimeStamp before = TimeStamp::Now();
for (int i = 0; i < loops; ++i) {
AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration);
aGetCPURunningTimesFunction(runningTimes);
const TimeStamp after = TimeStamp::Now();
durations[i] = after - before;
before = after;
}
// Move median duration to the middle.
std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]);
// Use median*8 as cut-off point.
// Typical durations should be around a microsecond, the cut-off should then
// be around 10 microseconds, well below the expected minimum inter-sample
// interval (observed as a few milliseconds), so overall this should keep
// cpu/interval spikes
return durations[loops / 2] * 8;
}();
// Record CPU measurements between two timestamps.
RunningTimes runningTimes;
TimeStamp before = TimeStamp::Now();
aGetCPURunningTimesFunction(runningTimes);
TimeStamp after = TimeStamp::Now();
const TimeDuration duration = after - before;
// In most cases, the above should be quick enough. But if not (e.g., because
// of an OS context switch), repeat once:
if (MOZ_UNLIKELY(duration > scMaxRunningTimesReadDuration)) {
AUTO_PROFILER_STATS(GetRunningTimes_REDO);
RunningTimes runningTimes2;
aGetCPURunningTimesFunction(runningTimes2);
TimeStamp after2 = TimeStamp::Now();
const TimeDuration duration2 = after2 - after;
if (duration2 < duration) {
// We did it faster, use the new results. (But it could still be slower
// than expected, see note below for why it's acceptable.)
// This must stay *after* the CPU measurements.
runningTimes2.SetPostMeasurementTimeStamp(after2);
return runningTimes2;
}
// Otherwise use the initial results, they were slow, but faster than the
// second attempt.
// This means that something bad happened twice in a row on the same thread!
// So trying more times would be unlikely to get much better, and would be
// more expensive than the precision is worth.
// At worst, it means that a spike of activity may be reported in the next
// time slice. But in the end, the cumulative work is conserved, so it
// should still be visible at about the correct time in the graph.
AUTO_PROFILER_STATS(GetRunningTimes_RedoWasWorse);
}
// This must stay *after* the CPU measurements.
runningTimes.SetPostMeasurementTimeStamp(after);
return runningTimes;
}
////////////////////////////////////////////////////////////////////////
// BEGIN SamplerThread
// The sampler thread controls sampling and runs whenever the profiler is
// active. It periodically runs through all registered threads, finds those
// that should be sampled, then pauses and samples them.
class SamplerThread {
public:
// Creates a sampler thread, but doesn't start it.
SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
double aIntervalMilliseconds, uint32_t aFeatures);
~SamplerThread();
// This runs on (is!) the sampler thread.
void Run();
#if defined(GP_OS_windows)
// This runs on (is!) the thread to spy on unregistered threads.
void RunUnregisteredThreadSpy();
#endif
// This runs on the main thread.
void Stop(PSLockRef aLock);
void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) {
// We are under lock, so it's safe to just modify the list pointer.
// Also this means the sampler has not started its run yet, so any callback
// added now will be invoked at the end of the next loop; this guarantees
// that the callback will be invoked after at least one full sampling loop.
mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>(
std::move(mPostSamplingCallbackList), std::move(aCallback));
}
private:
void SpyOnUnregisteredThreads();
// Item containing a post-sampling callback, and a tail-list of more items.
// Using a linked list means no need to move items when adding more, and
// "stealing" the whole list is one pointer move.
struct PostSamplingCallbackListItem {
UniquePtr<PostSamplingCallbackListItem> mPrev;
PostSamplingCallback mCallback;
PostSamplingCallbackListItem(UniquePtr<PostSamplingCallbackListItem> aPrev,
PostSamplingCallback&& aCallback)
: mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {}
};
[[nodiscard]] UniquePtr<PostSamplingCallbackListItem>
TakePostSamplingCallbacks(PSLockRef) {
return std::move(mPostSamplingCallbackList);
}
static void InvokePostSamplingCallbacks(
UniquePtr<PostSamplingCallbackListItem> aCallbacks,
SamplingState aSamplingState) {
if (!aCallbacks) {
return;
}
// We want to drill down to the last element in this list, which is the
// oldest one, so that we invoke them in FIFO order.
// We don't expect many callbacks, so it's safe to recurse. Note that we're
// moving-from the UniquePtr, so the tail will implicitly get destroyed.
InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState);
// We are going to destroy this item, so we can safely move-from the
// callback before calling it (in case it has an rvalue-ref-qualified call
// operator).
std::move(aCallbacks->mCallback)(aSamplingState);
// It may be tempting for a future maintainer to change aCallbacks into an
// rvalue reference; this will remind them not to do that!
static_assert(
std::is_same_v<decltype(aCallbacks),
UniquePtr<PostSamplingCallbackListItem>>,
"We need to capture the list by-value, to implicitly destroy it");
}
// This suspends the calling thread for the given number of microseconds.
// Best effort timing.
void SleepMicro(uint32_t aMicroseconds);
// The sampler used to suspend and sample threads.
Sampler mSampler;
// The activity generation, for detecting when the sampler thread must stop.
const uint32_t mActivityGeneration;
// The interval between samples, measured in microseconds.
const int mIntervalMicroseconds;
// The OS-specific handle for the sampler thread.
#if defined(GP_OS_windows)
HANDLE mThread;
HANDLE mUnregisteredThreadSpyThread = nullptr;
enum class SpyingState {
NoSpying,
Spy_Initializing,
// Spy is waiting for SamplerToSpy_Start or MainToSpy_Shutdown.
Spy_Waiting,
// Sampler requests spy to start working. May be pre-empted by
// MainToSpy_Shutdown.
SamplerToSpy_Start,
// Spy is currently working, cannot be interrupted, only the spy is allowed
// to change the state again.
Spy_Working,
// Main control requests spy to shut down.
MainToSpy_Shutdown,
// Spy notified main control that it's out of the loop, about to exit.
SpyToMain_ShuttingDown
};
SpyingState mSpyingState = SpyingState::NoSpying;
// The sampler will increment this while the spy is working, then while the
// spy is waiting the sampler will decrement it until <=0 before starting the
// spy. This will ensure that the work doesn't take more than 50% of a CPU
// core.
int mDelaySpyStart = 0;
Monitor mSpyingStateMonitor MOZ_UNANNOTATED{
"SamplerThread::mSpyingStateMonitor"};
#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
defined(GP_OS_android) || defined(GP_OS_freebsd)
pthread_t mThread;
#endif
// Post-sampling callbacks are kept in a simple linked list, which will be
// stolen by the sampler thread at the end of its next run.
UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList;
#if defined(GP_OS_windows)
bool mNoTimerResolutionChange = true;
#endif
struct SpiedThread {
base::ProcessId mThreadId;
nsCString mName;
uint64_t mCPUTimeNs;
SpiedThread(base::ProcessId aThreadId, const nsACString& aName,
uint64_t aCPUTimeNs)
: mThreadId(aThreadId), mName(aName), mCPUTimeNs(aCPUTimeNs) {}
// Comparisons with just a thread id, for easy searching in an array.
friend bool operator==(const SpiedThread& aSpiedThread,
base::ProcessId aThreadId) {
return aSpiedThread.mThreadId == aThreadId;
}
friend bool operator==(base::ProcessId aThreadId,
const SpiedThread& aSpiedThread) {
return aThreadId == aSpiedThread.mThreadId;
}
};
// Time at which mSpiedThreads was previously updated. Null before 1st update.
TimeStamp mLastSpying;
// Unregistered threads that have been found, and are being spied on.
using SpiedThreads = AutoTArray<SpiedThread, 128>;
SpiedThreads mSpiedThreads;
SamplerThread(const SamplerThread&) = delete;
void operator=(const SamplerThread&) = delete;
};
namespace geckoprofiler::markers {
struct CPUSpeedMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("CPUSpeed");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
uint32_t aCPUSpeedMHz) {
aWriter.DoubleProperty("speed", double(aCPUSpeedMHz) / 1000);
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.SetTableLabel("{marker.name} Speed = {marker.data.speed}GHz");
schema.AddKeyLabelFormat("speed", "CPU Speed (GHz)", MS::Format::String);
schema.AddChartColor("speed", MS::GraphType::Bar, MS::GraphColor::Ink);
return schema;
}
};
} // namespace geckoprofiler::markers
// [[nodiscard]] static
bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock,
PostSamplingCallback&& aCallback) {
if (!sInstance || !sInstance->mSamplerThread) {
return false;
}
sInstance->mSamplerThread->AppendPostSamplingCallback(aLock,
std::move(aCallback));
return true;
}
// This function is required because we need to create a SamplerThread within
// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
// could probably be removed by moving some code around.
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
double aInterval, uint32_t aFeatures) {
return new SamplerThread(aLock, aGeneration, aInterval, aFeatures);
}
// This function is the sampler thread. This implementation is used for all
// targets.
void SamplerThread::Run() {
NS_SetCurrentThreadName("SamplerThread");
// Features won't change during this SamplerThread's lifetime, so we can read
// them once and store them locally.
const uint32_t features = []() -> uint32_t {
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
// If there is no active profiler, it doesn't matter what we return,
// because this thread will exit before any feature is used.
return 0;
}
return ActivePS::Features(lock);
}();
// Not *no*-stack-sampling means we do want stack sampling.
const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features);
// Use local ProfileBuffer and underlying buffer to capture the stack.
// (This is to avoid touching the core buffer lock while a thread is
// suspended, because that thread could be working with the core buffer as
// well.
mozilla::ProfileBufferChunkManagerSingle localChunkManager(
ProfileBufferChunkManager::scExpectedMaximumStackSize);
ProfileChunkedBuffer localBuffer(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
ProfileBuffer localProfileBuffer(localBuffer);
// Will be kept between collections, to know what each collection does.
auto previousState = localBuffer.GetState();
// This will be filled at every loop, to be used by the next loop to compute
// the CPU utilization between samples.
RunningTimes processRunningTimes;
// This will be set inside the loop, from inside the lock scope, to capture
// all callbacks added before that, but none after the lock is released.
UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks;
// This will be set inside the loop, before invoking callbacks outside.
SamplingState samplingState{};
const TimeDuration sampleInterval =
TimeDuration::FromMicroseconds(mIntervalMicroseconds);
const uint32_t minimumIntervalSleepUs =
static_cast<uint32_t>(mIntervalMicroseconds / 4);
// This is the scheduled time at which each sampling loop should start.
// It will determine the ideal next sampling start by adding the expected
// interval, unless when sampling runs late -- See end of while() loop.
TimeStamp scheduledSampleStart = TimeStamp::Now();
#if defined(HAVE_CPU_FREQ_SUPPORT)
// Used to collect CPU core frequencies, if the cpufreq feature is on.
Vector<uint32_t> CPUSpeeds;
if (XRE_IsParentProcess() && ProfilerFeature::HasCPUFrequency(features) &&
CPUSpeeds.resize(GetNumberOfProcessors())) {
{
PSAutoLock lock;
if (ProfilerCPUFreq* cpuFreq = ActivePS::MaybeCPUFreq(lock); cpuFreq) {
cpuFreq->Sample();
for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
CPUSpeeds[i] = cpuFreq->GetCPUSpeedMHz(i);
}
}
}
TimeStamp now = TimeStamp::Now();
for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
nsAutoCString name;
name.AssignLiteral("CPU ");
name.AppendInt(i);
PROFILER_MARKER(name, OTHER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalStart(now)),
CPUSpeedMarker, CPUSpeeds[i]);
}
}
#endif
while (true) {
const TimeStamp sampleStart = TimeStamp::Now();
// This scope is for |lock|. It ends before we sleep below.
{
// There should be no local callbacks left from a previous loop.
MOZ_ASSERT(!postSamplingCallbacks);
PSAutoLock lock;
TimeStamp lockAcquired = TimeStamp::Now();
// Move all the post-sampling callbacks locally, so that new ones cannot
// sneak in between the end of the lock scope and the invocation after it.
postSamplingCallbacks = TakePostSamplingCallbacks(lock);
if (!ActivePS::Exists(lock)) {
// Exit the `while` loop, including the lock scope, before invoking
// callbacks and returning.
samplingState = SamplingState::JustStopped;
break;
}
// At this point profiler_stop() might have been called, and
// profiler_start() might have been called on another thread. If this
// happens the generation won't match.
if (ActivePS::Generation(lock) != mActivityGeneration) {
samplingState = SamplingState::JustStopped;
// Exit the `while` loop, including the lock scope, before invoking
// callbacks and returning.
break;
}
ActivePS::ClearExpiredExitProfiles(lock);
TimeStamp expiredMarkersCleaned = TimeStamp::Now();
if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) {
double sampleStartDeltaMs =
(sampleStart - CorePS::ProcessStartTime()).ToMilliseconds();
ProfileBuffer& buffer = ActivePS::Buffer(lock);
// Before sampling counters, update the process CPU counter if active.
if (ActivePS::ProcessCPUCounter* processCPUCounter =
ActivePS::MaybeProcessCPUCounter(lock);
processCPUCounter) {
RunningTimes processRunningTimesDiff =
GetProcessRunningTimesDiff(lock, processRunningTimes);
Maybe<uint64_t> cpu = processRunningTimesDiff.GetJsonThreadCPUDelta();
if (cpu) {
processCPUCounter->Add(static_cast<int64_t>(*cpu));
}
}
#if defined(HAVE_CPU_FREQ_SUPPORT)
if (XRE_IsParentProcess() && CPUSpeeds.length() > 0) {
unsigned newSpeed[CPUSpeeds.length()];
if (ProfilerCPUFreq* cpuFreq = ActivePS::MaybeCPUFreq(lock);
cpuFreq) {
cpuFreq->Sample();
for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
newSpeed[i] = cpuFreq->GetCPUSpeedMHz(i);
}
}
TimeStamp now = TimeStamp::Now();
for (size_t i = 0; i < CPUSpeeds.length(); ++i) {
if (newSpeed[i] == CPUSpeeds[i]) {
continue;
}
nsAutoCString name;
name.AssignLiteral("CPU ");
name.AppendInt(i);
PROFILER_MARKER_UNTYPED(
name, OTHER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalEnd(now)));
PROFILER_MARKER(name, OTHER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalStart(now)),
CPUSpeedMarker, newSpeed[i]);
CPUSpeeds[i] = newSpeed[i];
}
}
#endif
if (PowerCounters* powerCounters = ActivePS::MaybePowerCounters(lock);
powerCounters) {
powerCounters->Sample();
}
// handle per-process generic counters
double counterSampleStartDeltaMs =
(TimeStamp::Now() - CorePS::ProcessStartTime()).ToMilliseconds();
const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
for (auto& counter : counters) {
if (auto sample = counter->Sample(); sample.isSampleNew) {
// create Buffer entries for each counter
buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
buffer.AddEntry(
ProfileBufferEntry::Time(counterSampleStartDeltaMs));
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (ActivePS::IsMemoryCounter(counter)) {
// For the memory counter, substract the size of our buffer to
// avoid giving the misleading impression that the memory use
// keeps on growing when it's just the profiler session that's
// using a larger buffer as it gets longer.
sample.count -= static_cast<int64_t>(
ActivePS::ControlledChunkManager(lock).TotalSize());
}
#endif
buffer.AddEntry(ProfileBufferEntry::Count(sample.count));
if (sample.number) {
buffer.AddEntry(ProfileBufferEntry::Number(sample.number));
}
}
}
TimeStamp countersSampled = TimeStamp::Now();
if (stackSampling || cpuUtilization) {
samplingState = SamplingState::SamplingCompleted;
// Prevent threads from ending (or starting) and allow access to all
// OffThreadRef's.
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
ThreadRegistration::UnlockedRWForLockedProfiler&
unlockedThreadData =
offThreadRef.UnlockedRWForLockedProfilerRef();
ProfiledThreadData* profiledThreadData =
unlockedThreadData.GetProfiledThreadData(lock);
if (!profiledThreadData) {
// This thread is not being profiled, continue with the next one.
continue;
}
const ThreadProfilingFeatures whatToProfile =
unlockedThreadData.ProfilingFeatures();
const bool threadCPUUtilization =
cpuUtilization &&
DoFeaturesIntersect(whatToProfile,
ThreadProfilingFeatures::CPUUtilization);
const bool threadStackSampling =
stackSampling &&
DoFeaturesIntersect(whatToProfile,
ThreadProfilingFeatures::Sampling);
if (!threadCPUUtilization && !threadStackSampling) {
// Nothing to profile on this thread, continue with the next one.
continue;
}
const ProfilerThreadId threadId =
unlockedThreadData.Info().ThreadId();
const RunningTimes runningTimesDiff = [&]() {
if (!threadCPUUtilization) {
// If we don't need CPU measurements, we only need a timestamp.
return RunningTimes(TimeStamp::Now());
}
return GetThreadRunningTimesDiff(lock, unlockedThreadData);
}();
const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp();
double threadSampleDeltaMs =
(now - CorePS::ProcessStartTime()).ToMilliseconds();
// If the thread is asleep and has been sampled before in the same
// sleep episode, or otherwise(*) if there was zero CPU activity
// since the previous sampling, find and copy the previous sample,
// as that's cheaper than taking a new sample.
// (*) Tech note: The asleep check is done first and always, because
// it is more reliable, and knows if it's the first asleep
// sample, which cannot be duplicated; if the test was the other
// way around, it could find zero CPU and then short-circuit
// that state-changing second-asleep-check operation, which
// could result in an unneeded sample.
// However we're using current running times (instead of copying the
// old ones) because some work could have happened.
if (threadStackSampling &&
(unlockedThreadData.CanDuplicateLastSampleDueToSleep() ||
runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0)))) {
const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
threadId, threadSampleDeltaMs,
profiledThreadData->LastSample(), runningTimesDiff);
if (dup_ok) {
continue;
}
}
AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample);
// Record the global profiler buffer's range start now, before
// adding the first entry for this thread's sample.
const uint64_t bufferRangeStart = buffer.BufferRangeStart();
// Add the thread ID now, so we know its position in the main
// buffer, which is used by some JS data.
// (DoPeriodicSample only knows about the temporary local buffer.)
const uint64_t samplePos = buffer.AddThreadIdEntry(threadId);
profiledThreadData->LastSample() = Some(samplePos);
// Also add the time, so it's always there after the thread ID, as
// expected by the parser. (Other stack data is optional.)
buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack(
threadSampleDeltaMs));
Maybe<double> unresponsiveDuration_ms;
// If we have RunningTimes data, store it before the CompactStack.
// Note: It is not stored inside the CompactStack so that it doesn't
// get incorrectly duplicated when the thread is sleeping.
if (!runningTimesDiff.IsEmpty()) {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff);
}
if (threadStackSampling) {
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock
lockedThreadData = offThreadRef.GetLockedRWFromAnyThread();
// Suspend the thread and collect its stack data in the local
// buffer.
mSampler.SuspendAndSampleAndResumeThread(
lock, lockedThreadData.DataCRef(), now,
[&](const Registers& aRegs, const TimeStamp& aNow) {
DoPeriodicSample(lock, lockedThreadData.DataCRef(), aRegs,
samplePos, bufferRangeStart,
localProfileBuffer);
// For "eventDelay", we want the input delay - but if
// there are no events in the input queue (or even if there
// are), we're interested in how long the delay *would* be
// for an input event now, which would be the time to finish
// the current event + the delay caused by any events
// already in the input queue (plus any High priority
// events). Events at lower priorities (in a
// PrioritizedEventQueue) than Input count for input delay
// only for the duration that they're running, since when
// they finish, any queued input event would run.
//
// Unless we record the time state of all events and queue
// states at all times, this is hard to precisely calculate,
// but we can approximate it well in post-processing with
// RunningEventDelay and RunningEventStart.
//
// RunningEventDelay is the time duration the event was
// queued before starting execution. RunningEventStart is
// the time the event started. (Note: since we care about
// Input event delays on MainThread, for
// PrioritizedEventQueues we return 0 for RunningEventDelay
// if the currently running event has a lower priority than
// Input (since Input events won't queue behind them).
//
// To directly measure this we would need to record the time
// at which the newest event currently in each queue at time
// X (the sample time) finishes running. This of course
// would require looking into the future, or recording all
// this state and then post-processing it later. If we were
// to trace every event start and end we could do this, but
// it would have significant overhead to do so (and buffer
// usage). From a recording of RunningEventDelays and
// RunningEventStarts we can infer the actual delay:
//
// clang-format off
// Event queue: <tail> D : C : B : A <head>
// Time inserted (ms): 40 : 20 : 10 : 0
// Run Time (ms): 30 : 100 : 40 : 30
//
// 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170
// [A||||||||||||]
// ----------[B|||||||||||||||||]
// -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
// -----------------------------------------------------------------[D|||||||||...]
//
// Calculate the delay of a new event added at time t: (run every sample)
// TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
// effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
// delta = (now - last_sample_time);
// last_sample_time = now;
// for (t=effective_submission to now) {
// delay[t] += delta;
// }
//
// Can be reduced in overhead by:
// TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
// effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
// if (effective_submission != last_submission) {
// delta = (now - last_submision);
// // this loop should be made to match each sample point in the range
// // intead of assuming 1ms sampling as this pseudocode does
// for (t=last_submission to effective_submission-1) {
// delay[t] += delta;
// delta -= 1; // assumes 1ms; adjust as needed to match for()
// }
// last_submission = effective_submission;
// }
//
// Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final
// hypothetical Submission Running @ result
// event E
// 0 Empty A 0 30 0 0 @0=10 30
// 10 B A 0 60 0 0 @0=20, @10=10 60
// 20 B A 0 150 0 0 @0=30, @10=20, @20=10 150
// 30 C B 20 140 10 30 @10=20, @20=10, @30=0 140
// 40 C B 20 160 @10=30, @20=20... 160
// 50 C B 20 150 150
// 60 C B 20 140 @10=50, @20=40... 140
// 70 D C 50 130 20 70 @20=50, @30=40... 130
// ...
// 160 D C 50 40 @20=140, @30=130... 40
// 170 <empty> D 140 30 40 @40=140, @50=130... (rounding) 30
// 180 <empty> D 140 20 40 @40=150 20
// 190 <empty> D 140 10 40 @40=160 10
// 200 <empty> <empty> 0 0 NA 0
//
// Function Delay(t) = the time between t and the time at which a hypothetical
// event e would start executing, if e was enqueued at time t.
//
// Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
// // instantly.
// Delay(0) = 30 // The hypothetical event e got enqueued just after A got
// // enqueued. It can start running at 30, when A is done.
// Delay(5) = 25
// Delay(10) = 60 // Can start running at 70, after both A and B are done.
// Delay(19) = 51
// Delay(20) = 150 // Can start running at 170, after A, B & C.
// Delay(25) = 145
// Delay(30) = 170 // Can start running at 200, after A, B, C & D.
// Delay(120) = 80
// Delay(200) = 0 // (assuming nothing was enqueued after D)
//
// For every event that gets enqueued, the Delay time will go up by the
// event's running time at the time at which the event is enqueued.
// The Delay function will be a sawtooth of the following shape:
//
// |\ |...
// | \ |
// |\ | \ |
// | \ | \ |
// |\ | \ | \ |
// |\ | \| \| \ |
// | \| \ |
// _| \____|
//
//
// A more complex example with a PrioritizedEventQueue:
//
// Event queue: <tail> D : C : B : A <head>
// Time inserted (ms): 40 : 20 : 10 : 0
// Run Time (ms): 30 : 100 : 40 : 30
// Priority: Input: Norm: Norm: Norm
//
// 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170
// [A||||||||||||]
// ----------[B|||||||||||||||||]
// ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
// ---------------[D||||||||||||]
//
//
// Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final
// hypothetical Submission Running @ result
// event
// 0 Empty A 0 30 0 0 @0=10 30
// 10 B A 0 20 0 0 @0=20, @10=10 20
// 20 B A 0 10 0 0 @0=30, @10=20, @20=10 10
// 30 C B 0 40 30 30 @30=10 40
// 40 C B 0 60 30 @40=10, @30=20 60
// 50 C B 0 50 30 @50=10, @40=20, @30=30 50
// 60 C B 0 40 30 @60=10, @50=20, @40=30, @30=40 40
// 70 C D 30 30 40 70 @60=20, @50=30, @40=40 30
// 80 C D 30 20 40 70 ...@50=40, @40=50 20
// 90 C D 30 10 40 70 ...@60=40, @50=50, @40=60 10
// 100 <empty> C 0 100 100 100 @100=10 100
// 110 <empty> C 0 90 100 100 @110=10, @100=20 90
//
// For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority.
// Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
// // instantly.
// Delay(0) = 30 // The hypothetical input event e got enqueued just after A got
// // enqueued. It can start running at 30, when A is done.
// Delay(5) = 25
// Delay(10) = 20
// Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not.
// // So e can start running at 30, when A is done.
// Delay(30) = 40 // Can start running at 70, after B is done.
// Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority)
// Delay(80) = 20
// Delay(100) = 100 // Wait for C to finish
// clang-format on
//
// Alternatively we could insert (recycled instead of
// allocated/freed) input events at every sample period
// (1ms...), and use them to back-calculate the delay. This
// might also be somewhat expensive, and would require
// guessing at the maximum delay, which would likely be in
// the seconds, and so you'd need 1000's of pre-allocated
// events per queue per thread - so there would be a memory
// impact as well.
TimeDuration currentEventDelay;
TimeDuration currentEventRunning;
lockedThreadData->GetRunningEventDelay(
aNow, currentEventDelay, currentEventRunning);
// Note: eventDelay is a different definition of
// responsiveness than the 16ms event injection.
// Don't suppress 0's for now; that can be a future
// optimization. We probably want one zero to be stored
// before we start suppressing, which would be more
// complex.
unresponsiveDuration_ms =
Some(currentEventDelay.ToMilliseconds() +
currentEventRunning.ToMilliseconds());
});
if (cpuUtilization) {
// Suspending the thread for sampling could have added some
// running time to it, discard any since the call to
// GetThreadRunningTimesDiff above.
DiscardSuspendedThreadRunningTimes(lock, unlockedThreadData);
}
// If we got eventDelay data, store it before the CompactStack.
// Note: It is not stored inside the CompactStack so that it
// doesn't get incorrectly duplicated when the thread is sleeping.
if (unresponsiveDuration_ms.isSome()) {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::UnresponsiveDurationMs,
*unresponsiveDuration_ms);
}
}
// There *must* be a CompactStack after a TimeBeforeCompactStack;
// but note that other entries may have been concurrently inserted
// between the TimeBeforeCompactStack above and now. If the captured
// sample from `DoPeriodicSample` is complete, copy it into the
// global buffer, otherwise add an empty one to satisfy the parser
// that expects one.
auto state = localBuffer.GetState();
if (NS_WARN_IF(state.mFailedPutBytes !=
previousState.mFailedPutBytes)) {
LOG("Stack sample too big for local storage, failed to store %u "
"bytes",
unsigned(state.mFailedPutBytes -
previousState.mFailedPutBytes));
// There *must* be a CompactStack after a TimeBeforeCompactStack,
// even an empty one.
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack,
UniquePtr<ProfileChunkedBuffer>(nullptr));
} else if (state.mRangeEnd - previousState.mRangeEnd >=
*profiler_get_core_buffer().BufferLength()) {
LOG("Stack sample too big for profiler storage, needed %u bytes",
unsigned(state.mRangeEnd - previousState.mRangeEnd));
// There *must* be a CompactStack after a TimeBeforeCompactStack,
// even an empty one.
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack,
UniquePtr<ProfileChunkedBuffer>(nullptr));
} else {
profiler_get_core_buffer().PutObjects(
ProfileBufferEntry::Kind::CompactStack, localBuffer);
}
// Clean up for the next run.
localBuffer.Clear();
previousState = localBuffer.GetState();
}
} else {
samplingState = SamplingState::NoStackSamplingCompleted;
}
#if defined(USE_LUL_STACKWALK)
// The LUL unwind object accumulates frame statistics. Periodically we
// should poke it to give it a chance to print those statistics. This
// involves doing I/O (fprintf, __android_log_print, etc.) and so
// can't safely be done from the critical section inside
// SuspendAndSampleAndResumeThread, which is why it is done here.
lul::LUL* lul = CorePS::Lul();
if (lul) {
lul->MaybeShowStats();
}
#endif
TimeStamp threadsSampled = TimeStamp::Now();
{
AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
ActivePS::FulfillChunkRequests(lock);
}
buffer.CollectOverheadStats(sampleStartDeltaMs,
lockAcquired - sampleStart,
expiredMarkersCleaned - lockAcquired,
countersSampled - expiredMarkersCleaned,
threadsSampled - countersSampled);
} else {
samplingState = SamplingState::SamplingPaused;
}
}
// gPSMutex is not held after this point.
// Invoke end-of-sampling callbacks outside of the locked scope.
InvokePostSamplingCallbacks(std::move(postSamplingCallbacks),
samplingState);
ProfilerChild::ProcessPendingUpdate();
if (ProfilerFeature::HasUnregisteredThreads(features)) {
#if defined(GP_OS_windows)
{
MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
switch (mSpyingState) {
case SpyingState::SamplerToSpy_Start:
case SpyingState::Spy_Working:
// If the spy is working (or about to work), record this loop
// iteration to delay the next start.
++mDelaySpyStart;
break;
case SpyingState::Spy_Waiting:
// The Spy is idle, waiting for instructions. Should we delay?
if (--mDelaySpyStart <= 0) {
mDelaySpyStart = 0;
mSpyingState = SpyingState::SamplerToSpy_Start;
mSpyingStateMonitor.NotifyAll();
}
break;
default:
// Otherwise the spy should be initializing or shutting down.
MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing ||
mSpyingState == SpyingState::MainToSpy_Shutdown ||
mSpyingState == SpyingState::SpyToMain_ShuttingDown);
break;
}
}
#else
// On non-Windows platforms, this is fast enough to run in this thread,
// each sampling loop.
SpyOnUnregisteredThreads();
#endif
}
// We expect the next sampling loop to start `sampleInterval` after this
// loop here was scheduled to start.
scheduledSampleStart += sampleInterval;
// Try to sleep until we reach that next scheduled time.
const TimeStamp beforeSleep = TimeStamp::Now();
if (scheduledSampleStart >= beforeSleep) {
// There is still time before the next scheduled sample time.
const uint32_t sleepTimeUs = static_cast<uint32_t>(
(scheduledSampleStart - beforeSleep).ToMicroseconds());
if (sleepTimeUs >= minimumIntervalSleepUs) {
SleepMicro(sleepTimeUs);
} else {
// If we're too close to that time, sleep the minimum amount of time.
// Note that the next scheduled start is not shifted, so at the end of
// the next loop, sleep may again be adjusted to get closer to schedule.
SleepMicro(minimumIntervalSleepUs);
}
} else {
// This sampling loop ended after the next sampling should have started!
// There is little point to try and keep up to schedule now, it would
// require more work, while it's likely we're late because the system is
// already busy. Try and restart a normal schedule from now.
scheduledSampleStart = beforeSleep + sampleInterval;
SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds()));
}
}
// End of `while` loop. We can only be here from a `break` inside the loop.
InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState);
}
namespace geckoprofiler::markers {
struct UnregisteredThreadLifetimeMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("UnregisteredThreadLifetime");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
base::ProcessId aThreadId,
const ProfilerString8View& aName,
const ProfilerString8View& aEndEvent) {
aWriter.IntProperty("Thread Id", aThreadId);
aWriter.StringProperty("Thread Name", aName.Length() != 0
? aName.AsSpan()
: MakeStringSpan("~Unnamed~"));
if (aEndEvent.Length() != 0) {
aWriter.StringProperty("End Event", aEndEvent);
}
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer,
MS::Searchable::Searchable);
schema.AddKeyFormatSearchable("Thread Name", MS::Format::String,
MS::Searchable::Searchable);
schema.AddKeyFormat("End Event", MS::Format::String);
schema.AddStaticLabelValue(
"Note",
"Start and end are approximate, based on first and last appearances.");
schema.SetChartLabel(
"{marker.data.Thread Name} (tid {marker.data.Thread Id})");
schema.SetTableLabel("{marker.name} lifetime");
return schema;
}
};
struct UnregisteredThreadCPUMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("UnregisteredThreadCPU");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
base::ProcessId aThreadId,
int64_t aCPUDiffNs, const TimeStamp& aStart,
const TimeStamp& aEnd) {
aWriter.IntProperty("Thread Id", aThreadId);
aWriter.IntProperty("CPU Time", aCPUDiffNs);
aWriter.DoubleProperty(
"CPU Utilization",
double(aCPUDiffNs) / ((aEnd - aStart).ToMicroseconds() * 1000.0));
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer,
MS::Searchable::Searchable);
schema.AddKeyFormat("CPU Time", MS::Format::Nanoseconds);
schema.AddKeyFormat("CPU Utilization", MS::Format::Percentage);
schema.SetChartLabel("{marker.data.CPU Utilization}");
schema.SetTableLabel(
"{marker.name} - Activity: {marker.data.CPU Utilization}");
return schema;
}
};
} // namespace geckoprofiler::markers
static bool IsThreadIdRegistered(ProfilerThreadId aThreadId) {
ThreadRegistry::LockedRegistry lockedRegistry;
const auto registryEnd = lockedRegistry.end();
return std::find_if(
lockedRegistry.begin(), registryEnd,
[aThreadId](const ThreadRegistry::OffThreadRef& aOffThreadRef) {
return aOffThreadRef.UnlockedConstReaderCRef()
.Info()
.ThreadId() == aThreadId;
}) != registryEnd;
}
static nsAutoCString MakeThreadInfoMarkerName(base::ProcessId aThreadId,
const nsACString& aName) {
nsAutoCString markerName{"tid "};
markerName.AppendInt(int64_t(aThreadId));
if (!aName.IsEmpty()) {
markerName.AppendLiteral(" ");
markerName.Append(aName);
}
return markerName;
}
void SamplerThread::SpyOnUnregisteredThreads() {
const TimeStamp unregisteredThreadSearchStart = TimeStamp::Now();
const base::ProcessId currentProcessId =
base::ProcessId(profiler_current_process_id().ToNumber());
nsTArray<ProcInfoRequest> request(1);
request.EmplaceBack(
/* aPid = */ currentProcessId,
/* aProcessType = */ ProcType::Unknown,
/* aOrigin = */ ""_ns,
/* aWindowInfo = */ nsTArray<WindowInfo>{},
/* aUtilityInfo = */ nsTArray<UtilityInfo>{},
/* aChild = */ 0
#ifdef XP_DARWIN
,
/* aChildTask = */ MACH_PORT_NULL
#endif // XP_DARWIN
);
const ProcInfoPromise::ResolveOrRejectValue procInfoOrError =
GetProcInfoSync(std::move(request));
if (!procInfoOrError.IsResolve()) {
PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)),
"Could not retrieve any process information");
return;
}
const auto& procInfoHashMap = procInfoOrError.ResolveValue();
// Expecting the requested (current) process information to be present in the
// hashmap.
const auto& procInfoPtr =
procInfoHashMap.readonlyThreadsafeLookup(currentProcessId);
if (!procInfoPtr) {
PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)),
"Could not retrieve information about this process");
return;
}
// Record the time spent so far, which is OS-bound...
PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)),
"Work to discover threads");
// ... and record the time needed to process the data, which we can control.
AUTO_PROFILER_MARKER_TEXT(
"Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread()),
"Work to process discovered threads and record unregistered ones"_ns);
const Span<const mozilla::ThreadInfo> threads = procInfoPtr->value().threads;
// mLastSpying timestamp should be null only at the beginning of a session,
// when mSpiedThreads is still empty.
MOZ_ASSERT_IF(mLastSpying.IsNull(), mSpiedThreads.IsEmpty());
const TimeStamp previousSpying = std::exchange(mLastSpying, TimeStamp::Now());
// Find threads that were spied on but are not present anymore.
const auto threadsBegin = threads.begin();
const auto threadsEnd = threads.end();
for (size_t spiedThreadIndexPlus1 = mSpiedThreads.Length();
spiedThreadIndexPlus1 != 0; --spiedThreadIndexPlus1) {
const SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndexPlus1 - 1];
if (std::find_if(threadsBegin, threadsEnd,
[spiedTid = spiedThread.mThreadId](
const mozilla::ThreadInfo& aThreadInfo) {
return aThreadInfo.tid == spiedTid;
}) == threadsEnd) {
// This spied thread is gone.
PROFILER_MARKER(
MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(),
// Place the end between this update and the previous one.
MarkerTiming::IntervalEnd(previousSpying +
(mLastSpying - previousSpying) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
spiedThread.mName, "Thread disappeared");
// Don't spy on it anymore, assuming it won't come back.
mSpiedThreads.RemoveElementAt(spiedThreadIndexPlus1 - 1);
}
}
for (const mozilla::ThreadInfo& threadInfo : threads) {
// Index of this encountered thread in mSpiedThreads, or NoIndex.
size_t spiedThreadIndex = mSpiedThreads.IndexOf(threadInfo.tid);
if (IsThreadIdRegistered(ProfilerThreadId::FromNumber(threadInfo.tid))) {
// This thread id is already officially registered.
if (spiedThreadIndex != SpiedThreads::NoIndex) {
// This now-registered thread was previously being spied.
SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
PROFILER_MARKER(
MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(),
// Place the end between this update and the previous one.
// TODO: Find the real time from the thread registration?
MarkerTiming::IntervalEnd(previousSpying +
(mLastSpying - previousSpying) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
spiedThread.mName, "Thread registered itself");
// Remove from mSpiedThreads, since it can be profiled normally.
mSpiedThreads.RemoveElement(threadInfo.tid);
}
} else {
// This thread id is not registered.
if (spiedThreadIndex == SpiedThreads::NoIndex) {
// This unregistered thread has not been spied yet, store it now.
NS_ConvertUTF16toUTF8 name(threadInfo.name);
mSpiedThreads.EmplaceBack(threadInfo.tid, name, threadInfo.cpuTime);
PROFILER_MARKER(
MakeThreadInfoMarkerName(threadInfo.tid, name), PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(),
// Place the start between this update and the previous one (or
// the start of this search if it's the first one).
MarkerTiming::IntervalStart(
mLastSpying -
(mLastSpying - (previousSpying.IsNull()
? unregisteredThreadSearchStart
: previousSpying)) /
int64_t(2))),
UnregisteredThreadLifetimeMarker, threadInfo.tid, name,
/* aEndEvent */ "");
} else {
// This unregistered thread was already being spied, record its work.
SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
int64_t diffCPUTimeNs =
int64_t(threadInfo.cpuTime) - int64_t(spiedThread.mCPUTimeNs);
spiedThread.mCPUTimeNs = threadInfo.cpuTime;
if (diffCPUTimeNs != 0) {
PROFILER_MARKER(
MakeThreadInfoMarkerName(threadInfo.tid, spiedThread.mName),
PROFILER,
MarkerOptions(
MarkerThreadId::MainThread(),
MarkerTiming::Interval(previousSpying, mLastSpying)),
UnregisteredThreadCPUMarker, threadInfo.tid, diffCPUTimeNs,
previousSpying, mLastSpying);
}
}
}
}
PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
MarkerOptions(MarkerThreadId::MainThread(),
MarkerTiming::IntervalUntilNowFrom(
unregisteredThreadSearchStart)),
"Work to discover and record unregistered threads");
}
// We #include these files directly because it means those files can use
// declarations from this file trivially. These provide target-specific
// implementations of all SamplerThread methods except Run().
#if defined(GP_OS_windows)
# include "platform-win32.cpp"
#elif defined(GP_OS_darwin)
# include "platform-macos.cpp"
#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
# include "platform-linux-android.cpp"
#else
# error "bad platform"
#endif
// END SamplerThread
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// BEGIN externally visible functions
MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)
NS_IMETHODIMP GeckoProfilerReporter::CollectReports(
nsIHandleReportCallback* aHandleReport, nsISupports* aData,
bool aAnonymize) {
MOZ_RELEASE_ASSERT(NS_IsMainThread());
size_t profSize = 0;
size_t lulSize = 0;
{
PSAutoLock lock;
if (CorePS::Exists()) {
CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
}
if (ActivePS::Exists(lock)) {
profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
}
}
MOZ_COLLECT_REPORT(
"explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
"Memory used by the Gecko Profiler's global state (excluding memory used "
"by LUL).");
#if defined(USE_LUL_STACKWALK)
MOZ_COLLECT_REPORT(
"explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
"Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
#endif
return NS_OK;
}
NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)
static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
if (strcmp(aFeature, "default") == 0) {
return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
: DefaultFeatures()) &
AvailableFeatures();
}
#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
if (strcmp(aFeature, str_) == 0) { \
return ProfilerFeature::Name_; \
}
PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
#undef PARSE_FEATURE_BIT
printf("\nUnrecognized feature \"%s\".\n\n", aFeature);
// Since we may have an old feature we don't implement anymore, don't exit.
PrintUsage();
return 0;
}
uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
uint32_t aFeatureCount,
bool aIsStartup /* = false */) {
uint32_t features = 0;
for (size_t i = 0; i < aFeatureCount; i++) {
features |= ParseFeature(aFeatures[i], aIsStartup);
}
return features;
}
static ProfilingStack* locked_register_thread(
PSLockRef aLock, ThreadRegistry::OffThreadRef aOffThreadRef) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
VTUNE_REGISTER_THREAD(aOffThreadRef.UnlockedConstReaderCRef().Info().Name());
if (ActivePS::Exists(aLock)) {
ThreadProfilingFeatures threadProfilingFeatures =
ActivePS::ProfilingFeaturesForThread(
aLock, aOffThreadRef.UnlockedConstReaderCRef().Info());
if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) {
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock
lockedRWFromAnyThread = aOffThreadRef.GetLockedRWFromAnyThread();
ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
aLock, MakeUnique<ProfiledThreadData>(
aOffThreadRef.UnlockedConstReaderCRef().Info()));
lockedRWFromAnyThread->SetProfilingFeaturesAndData(
threadProfilingFeatures, profiledThreadData, aLock);
if (ActivePS::FeatureJS(aLock)) {
lockedRWFromAnyThread->StartJSSampling(ActivePS::JSFlags(aLock));
if (lockedRWFromAnyThread->GetJSContext()) {
profiledThreadData->NotifyReceivedJSContext(
ActivePS::Buffer(aLock).BufferRangeEnd());
if (ActivePS::FeatureTracing(aLock)) {
CycleCollectedJSContext* ctx =
lockedRWFromAnyThread->GetCycleCollectedJSContext();
ctx->BeginExecutionTracingAsync();
}
}
}
}
}
return &aOffThreadRef.UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef();
}
static void NotifyObservers(const char* aTopic,
nsISupports* aSubject = nullptr) {
if (!NS_IsMainThread()) {
// Dispatch a task to the main thread that notifies observers.
// If NotifyObservers is called both on and off the main thread within a
// short time, the order of the notifications can be different from the
// order of the calls to NotifyObservers.
// Getting the order 100% right isn't that important at the moment, because
// these notifications are only observed in the parent process, where the
// profiler_* functions are currently only called on the main thread.
nsCOMPtr<nsISupports> subject = aSubject;
NS_DispatchToMainThread(NS_NewRunnableFunction(
"NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
return;
}
if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
os->NotifyObservers(aSubject, aTopic, nullptr);
}
}
[[nodiscard]] static RefPtr<GenericPromise> NotifyProfilerStarted(
const PowerOfTwo32& aCapacity, const Maybe<double>& aDuration,
double aInterval, uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID) {
nsTArray<nsCString> filtersArray;
for (size_t i = 0; i < aFilterCount; ++i) {
filtersArray.AppendElement(aFilters[i]);
}
nsCOMPtr<nsIProfilerStartParams> params = new nsProfilerStartParams(
aCapacity.Value(), aDuration, aInterval, aFeatures,
std::move(filtersArray), aActiveTabID);
RefPtr<GenericPromise> startPromise = ProfilerParent::ProfilerStarted(params);
NotifyObservers("profiler-started", params);
return startPromise;
}
static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount,
uint64_t aActiveTabID,
const Maybe<double>& aDuration);
// This basically duplicates AutoProfilerLabel's constructor.
static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString,
void* aSp) {
ThreadRegistration::OnThreadPtr onThreadPtr =
ThreadRegistration::GetOnThreadPtr();
if (!onThreadPtr) {
return nullptr;
}
ProfilingStack& profilingStack =
onThreadPtr->UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef();
profilingStack.pushLabelFrame(aLabel, aDynamicString, aSp,
JS::ProfilingCategoryPair::OTHER);
return &profilingStack;
}
// This basically duplicates AutoProfilerLabel's destructor.
static void MozGlueLabelExit(void* aProfilingStack) {
if (aProfilingStack) {
reinterpret_cast<ProfilingStack*>(aProfilingStack)->pop();
}
}
static Vector<const char*> SplitAtCommas(const char* aString,
UniquePtr<char[]>& aStorage) {
size_t len = strlen(aString);
aStorage = MakeUnique<char[]>(len + 1);
PodCopy(aStorage.get(), aString, len + 1);
// Iterate over all characters in aStorage and split at commas, by
// overwriting commas with the null char.
Vector<const char*> array;
size_t currentElementStart = 0;
for (size_t i = 0; i <= len; i++) {
if (aStorage[i] == ',') {
aStorage[i] = '\0';
}
if (aStorage[i] == '\0') {
// Only add non-empty elements, otherwise ParseFeatures would later
// complain about unrecognized features.
if (currentElementStart != i) {
MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
}
currentElementStart = i + 1;
}
}
return array;
}
void profiler_init_threadmanager() {
LOG("profiler_init_threadmanager");
ThreadRegistration::WithOnThreadRef(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.WithLockedRWOnThread(
[](ThreadRegistration::LockedRWOnThread& aThreadData) {
if (!aThreadData.GetEventTarget()) {
aThreadData.ResetMainThread(NS_GetCurrentThreadNoCreate());
}
});
});
}
static const char* get_size_suffix(const char* str) {
const char* ptr = str;
while (isdigit(*ptr)) {
ptr++;
}
return ptr;
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
static void profiler_start_signal_handler(int signal, siginfo_t* info,
void* context) {
// Starting the profiler from a signal handler is a risky business: Both of
// the main tasks that we would like to accomplish (allocating memory, and
// starting a thread) are illegal within a UNIX signal handler. Conversely,
// we cannot dispatch to the main thread, as this may be "stuck" (why else
// would we be using a signal handler to start the profiler?).
// Instead, we have a background thread running that watches a pipe for a
// given "control" character. In this handler, we can simply write to that
// pipe to get the background thread to start the profiler for us!
// Note that `write` is async-signal safe (see signal-safety(7)):
// This means that it's safe for us to call within a signal handler.
if (sAsyncSignalControlWriteFd != -1) {
char signalControlCharacter = sAsyncSignalControlCharStart;
Unused << write(sAsyncSignalControlWriteFd, &signalControlCharacter,
sizeof(signalControlCharacter));
}
}
static void profiler_stop_signal_handler(int signal, siginfo_t* info,
void* context) {
// Signal handlers are limited in what functions they can call, for more
// As we have a background thread already running for checking whether or
// not we want to start the profiler, we can re-use the same machinery to
// stop the profiler. We use the same mechanism of writing to a pipe/file
// descriptor, but with a different control character. Note that `write` is
// signal safe.
if (sAsyncSignalControlWriteFd != -1) {
char signalControlCharacter = sAsyncSignalControlCharStop;
Unused << write(sAsyncSignalControlWriteFd, &signalControlCharacter,
sizeof(signalControlCharacter));
}
}
#endif
// This may fail if we have previously had an issue finding the download
// directory, or if the directory has moved since we cached the path.
Maybe<nsAutoCString> profiler_find_dump_path() {
// Note, this is currently a posix-only implementation, as we currently have
#if defined(XP_WIN)
return Nothing();
#else
Maybe<nsCOMPtr<nsIFile>> directory = Nothing();
nsAutoCString path;
{
// Acquire the lock so that we can get things from CorePS
PSAutoLock lock;
Maybe<nsCOMPtr<nsIFile>> downloadDir = Nothing();
downloadDir = CorePS::AsyncSignalDumpDirectory(lock);
// This needs to be done within the context of the lock, as otherwise
// another thread might modify CorePS::mAsyncSignalDumpDirectory while we're
// cloning the pointer.
if (downloadDir) {
nsCOMPtr<nsIFile> d;
downloadDir.value()->Clone(getter_AddRefs(d));
directory = Some(d);
} else {
return Nothing();
}
}
// Now, we can check to see if we have a directory, and use it to construct
// the output file
if (directory) {
// Set up the name of our profile file
path.AppendPrintf("profile_%i_%i.json", XRE_GetProcessType(), getpid());
// Append it to the directory we found
nsresult rv = directory.value()->AppendNative(path);
if (NS_FAILED(rv)) {
LOG("Failed to append path to profile file");
return Nothing();
}
// Write the result *back* to the original path
rv = directory.value()->GetNativePath(path);
if (NS_FAILED(rv)) {
LOG("Failed to get native path for temp path");
return Nothing();
}
return Some(path);
}
return Nothing();
#endif
}
void profiler_start_from_signal() {
// Do nothing unless we're the parent process, as we're sandboxed and can't
// write any data that we gather anyway.
if (XRE_IsParentProcess()) {
// Start the profiler here directly, as we're on a background thread.
// Enabling the JS feature leaks an 8-byte object during testing, but is too
// more details.
uint32_t features = ProfilerFeature::JS | ProfilerFeature::StackWalk |
ProfilerFeature::CPUUtilization;
// as we often don't know what threads we'll care about, tell the
// profiler to profile all threads.
const char* filters[] = {"*"};
if (MOZ_UNLIKELY(NS_IsMainThread())) {
// We are on the main thread here, so `NotifyProfilerStarted` will
// start the profiler in content/child processes.
profiler_start(PROFILER_DEFAULT_SIGHANDLE_ENTRIES,
PROFILER_DEFAULT_INTERVAL, features, filters,
std::size(filters), 0);
} else {
// Directly start the profiler on this thread. We know we're not the main
// thread here, so this will not start the profiler in child processes,
// but we want to make sure that we do it here in case the main thread is
// stuck.
profiler_start(PROFILER_DEFAULT_SIGHANDLE_ENTRIES,
PROFILER_DEFAULT_INTERVAL, features, filters,
std::size(filters), 0);
// Now also try and start the profiler from the main thread, so that the
// ParentProfiler will start child threads.
NS_DispatchToMainThread(
NS_NewRunnableFunction("StartProfilerInChildProcesses", [=] {
Unused << NotifyProfilerStarted(
PROFILER_DEFAULT_SIGHANDLE_ENTRIES, Nothing(),
PROFILER_DEFAULT_INTERVAL, features,
const_cast<const char**>(filters), std::size(filters), 0);
}));
}
}
}
void profiler_dump_and_stop() {
// Do nothing unless we're the parent process, as we're sandboxed and can't
// open a file handle anyway.
if (!XRE_IsParentProcess()) {
return;
}
// pause the profiler until we are done dumping
profiler_pause();
// Try to save the profile to a file
auto path = profiler_find_dump_path();
// Exit quickly if we can't find the path, while stopping the profiler
if (!path) {
LOG("Failed to find a valid dump path to write profile to disk");
profiler_stop();
return;
}
// Dump the profile of this process first, in case the multi-process
// gathering is unsuccessful (e.g. due to a blocked main threaed).
profiler_save_profile_to_file(path.value().get());
// We are probably not the main thread, but check anyway, and dispatch
// directly.
if (NS_IsMainThread()) {
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService("@mozilla.org/tools/profiler;1"));
nsProfiler->DumpProfileToFileAsyncNoJs(path.value(), 0)
->Then(
GetMainThreadSerialEventTarget(), __func__,
[](void_t ok) {
LOG("Stopping profiler after dumping profile to disk");
profiler_stop();
},
[](nsresult aRv) {
LOG("Dumping to disk failed with error \"%s\", stopping "
"profiler.",
GetStaticErrorName(aRv));
profiler_stop();
});
} else {
// Dispatch a runnable, as nsProfiler classes are currently main-thread
// only. We also stop the profiler within the runnable, as otherwise we
// may find ourselves stopping the profiler before the runnable has
// gathered all the profile data.
NS_DispatchToMainThread(
NS_NewRunnableFunction("WriteProfileDataToFile", [=] {
nsCOMPtr<nsIProfiler> nsProfiler(
do_GetService("@mozilla.org/tools/profiler;1"));
nsProfiler->DumpProfileToFileAsyncNoJs(path.value(), 0)
->Then(
GetMainThreadSerialEventTarget(), __func__,
[](void_t ok) {
LOG("Stopping profiler after dumping profile to disk");
profiler_stop();
},
[](nsresult aRv) {
LOG("Dumping to disk failed with error \"%s\", stopping "
"profiler.",
GetStaticErrorName(aRv));
profiler_stop();
});
}));
}
}
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
void profiler_init_signal_handlers() {
// Set a handler to start the profiler
struct sigaction prof_start_sa {};
memset(&prof_start_sa, 0, sizeof(struct sigaction));
prof_start_sa.sa_sigaction = profiler_start_signal_handler;
prof_start_sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&prof_start_sa.sa_mask);
DebugOnly<int> rstart = sigaction(SIGUSR1, &prof_start_sa, nullptr);
MOZ_ASSERT(rstart == 0, "Failed to install Profiler SIGUSR1 handler");
// Set a handler to stop the profiler
struct sigaction prof_stop_sa {};
memset(&prof_stop_sa, 0, sizeof(struct sigaction));
prof_stop_sa.sa_sigaction = profiler_stop_signal_handler;
prof_stop_sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&prof_stop_sa.sa_mask);
DebugOnly<int> rstop = sigaction(SIGUSR2, &prof_stop_sa, nullptr);
MOZ_ASSERT(rstop == 0, "Failed to install Profiler SIGUSR2 handler");
}
#endif
static void PollJSSamplingForCurrentThread() {
// Don't call into the JS engine with the global profiler mutex held as this
// can deadlock.
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ThreadRegistration::WithOnThreadRef(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.WithLockedRWOnThread(
[](ThreadRegistration::LockedRWOnThread& aThreadData) {
aThreadData.PollJSSampling();
});
});
}
void profiler_init(void* aStackTop) {
LOG("profiler_init");
profiler_init_main_thread_id();
VTUNE_INIT();
ETW::Init();
InitPerfetto();
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
mozilla::profiler::memory_hooks_tls_init();
#endif
MOZ_RELEASE_ASSERT(!CorePS::Exists());
if (getenv("MOZ_PROFILER_HELP")) {
PrintUsage();
exit(0);
}
SharedLibraryInfo::Initialize();
// We initialize here as well as in baseprofiler because
// baseprofiler init doesn't happen in child processes.
Flow::Init();
uint32_t features = DefaultFeatures() & AvailableFeatures();
UniquePtr<char[]> filterStorage;
Vector<const char*> filters;
MOZ_RELEASE_ASSERT(filters.append("GeckoMain"));
MOZ_RELEASE_ASSERT(filters.append("Compositor"));
MOZ_RELEASE_ASSERT(filters.append("Renderer"));
MOZ_RELEASE_ASSERT(filters.append("DOM Worker"));
PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES;
Maybe<double> duration = Nothing();
double interval = PROFILER_DEFAULT_INTERVAL;
uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID;
ThreadRegistration::RegisterThread(kMainThreadName, aStackTop);
{
PSAutoLock lock;
// We've passed the possible failure point. Instantiate CorePS, which
// indicates that the profiler has initialized successfully.
CorePS::Create(lock);
// Make sure threads already in the ThreadRegistry (like the main thread)
// get registered in CorePS as well.
{
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
locked_register_thread(lock, offThreadRef);
}
}
// Platform-specific initialization.
PlatformInit(lock);
#if defined(GECKO_PROFILER_ASYNC_POSIX_SIGNAL_CONTROL)
// Initialise the background thread to listen for signal handler
// communication
CorePS::SetAsyncSignalControlThread(new AsyncSignalControlThread);
// Initialise the signal handlers needed to start/stop the profiler
profiler_init_signal_handlers();
#endif
#if defined(GP_OS_android)
if (jni::IsAvailable()) {
GeckoJavaSampler::Init();
}
#endif
// (Linux-only) We could create CorePS::mLul and read unwind info into it
// at this point. That would match the lifetime implied by destruction of
// it in profiler_shutdown() just below. However, that gives a big delay on
// startup, even if no profiling is actually to be done. So, instead, it is
// created on demand at the first call to PlatformStart().
const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
if (!startupEnv || startupEnv[0] == '\0' ||
((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
startupEnv[0] == 'n') &&
startupEnv[1] == '\0')) {
return;
}
LOG("- MOZ_PROFILER_STARTUP is set");
// Startup default capacity may be different.
capacity = PROFILER_DEFAULT_STARTUP_ENTRIES;
const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
if (startupCapacity && startupCapacity[0] != '\0') {
errno = 0;
long capacityLong = strtol(startupCapacity, nullptr, 10);
std::string_view sizeSuffix = get_size_suffix(startupCapacity);
if (sizeSuffix == "KB") {
capacityLong *= 1000 / scBytesPerEntry;
} else if (sizeSuffix == "KiB") {
capacityLong *= 1024 / scBytesPerEntry;
} else if (sizeSuffix == "MB") {
capacityLong *= (1000 * 1000) / scBytesPerEntry;
} else if (sizeSuffix == "MiB") {
capacityLong *= (1024 * 1024) / scBytesPerEntry;
} else if (sizeSuffix == "GB") {
capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry;
} else if (sizeSuffix == "GiB") {
capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry;
} else if (!sizeSuffix.empty()) {
LOG("- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the "
"following: KB, KiB, MB, MiB, GB, GiB");
PrintUsage();
exit(1);
}
// `long` could be 32 or 64 bits, so we force a 64-bit comparison with
// the maximum 32-bit signed number (as more than that is clamped down to
// 2^31 anyway).
if (errno == 0 && capacityLong > 0 &&
static_cast<uint64_t>(capacityLong) <=
static_cast<uint64_t>(INT32_MAX)) {
capacity = PowerOfTwo32(
ClampToAllowedEntries(static_cast<uint32_t>(capacityLong)));
LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
} else {
LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
startupCapacity);
PrintUsage();
exit(1);
}
}
const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
if (startupDuration && startupDuration[0] != '\0') {
errno = 0;
double durationVal = PR_strtod(startupDuration, nullptr);
if (errno == 0 && durationVal >= 0.0) {
if (durationVal > 0.0) {
duration = Some(durationVal);
}
LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal);
} else {
LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
startupDuration);
PrintUsage();
exit(1);
}
}
const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
if (startupInterval && startupInterval[0] != '\0') {
errno = 0;
interval = PR_strtod(startupInterval, nullptr);
if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) {
LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
} else {
LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
startupInterval);
PrintUsage();
exit(1);
}
}
features |= StartupExtraDefaultFeatures() & AvailableFeatures();
const char* startupFeaturesBitfield =
getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
errno = 0;
features = strtol(startupFeaturesBitfield, nullptr, 10);
if (errno == 0) {
LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
} else {
LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
startupFeaturesBitfield);
PrintUsage();
exit(1);
}
} else {
const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
if (startupFeatures) {
// Interpret startupFeatures as a list of feature strings, separated by
// commas.
UniquePtr<char[]> featureStringStorage;
Vector<const char*> featureStringArray =
SplitAtCommas(startupFeatures, featureStringStorage);
features = ParseFeaturesFromStringArray(featureStringArray.begin(),
featureStringArray.length(),
/* aIsStartup */ true);
LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
}
}
const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
if (startupFilters && startupFilters[0] != '\0') {
filters = SplitAtCommas(startupFilters, filterStorage);
LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
if (mozilla::profiler::detail::FiltersExcludePid(filters)) {
LOG(" -> This process is excluded and won't be profiled");
return;
}
}
const char* startupActiveTabID =
getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID");
if (startupActiveTabID && startupActiveTabID[0] != '\0') {
std::istringstream iss(startupActiveTabID);
iss >> activeTabID;
if (!iss.fail()) {
LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID);
} else {
LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid "
"uint64_t: %s",
startupActiveTabID);
PrintUsage();
exit(1);
}
}
locked_profiler_start(lock, capacity, interval, features, filters.begin(),
filters.length(), activeTabID, duration);
}
PollJSSamplingForCurrentThread();
// The GeckoMain thread registration happened too early to record a marker,
// so let's record it again now.
profiler_mark_thread_awake();
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (ProfilerFeature::ShouldInstallMemoryHooks(features)) {
// Start counting memory allocations (outside of lock because this may call
// profiler_add_sampled_counter which would attempt to take the lock.)
ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
} else {
// Unregister the memory counter in case it was registered before. This will
// make sure that the empty memory counter from the previous profiler run is
// removed completely and we don't serialize the memory counters.
mozilla::profiler::unregister_memory_counter();
}
#endif
invoke_profiler_state_change_callbacks(ProfilingState::Started);
// We do this with gPSMutex unlocked. The comment in profiler_stop() explains
// why.
Unused << NotifyProfilerStarted(capacity, duration, interval, features,
filters.begin(), filters.length(), 0);
}
static void locked_profiler_save_profile_to_file(
PSLockRef aLock, const char* aFilename,
const PreRecordedMetaInformation& aPreRecordedMetaInformation,
bool aIsShuttingDown);
static SamplerThread* locked_profiler_stop(PSLockRef aLock);
void profiler_shutdown(IsFastShutdown aIsFastShutdown) {
LOG("profiler_shutdown");
VTUNE_SHUTDOWN();
ETW::Shutdown();
MOZ_RELEASE_ASSERT(NS_IsMainThread());
MOZ_RELEASE_ASSERT(CorePS::Exists());
if (profiler_is_active()) {
invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
}
invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown);
const auto preRecordedMetaInformation =
PreRecordMetaInformation(/* aShutdown = */ true);
ProfilerParent::ProfilerWillStopIfStarted();
// If the profiler is active we must get a handle to the SamplerThread before
// ActivePS is destroyed, in order to delete it.
SamplerThread* samplerThread = nullptr;
{
PSAutoLock lock;
// Save the profile on shutdown if requested.
if (ActivePS::Exists(lock)) {
const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
if (filename && filename[0] != '\0') {
locked_profiler_save_profile_to_file(lock, filename,
preRecordedMetaInformation,
/* aIsShuttingDown */ true);
}
if (aIsFastShutdown == IsFastShutdown::Yes) {
return;
}
samplerThread = locked_profiler_stop(lock);
} else if (aIsFastShutdown == IsFastShutdown::Yes) {
return;
}
CorePS::Destroy(lock);
}
PollJSSamplingForCurrentThread();
// We do these operations with gPSMutex unlocked. The comments in
// profiler_stop() explain why.
if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped");
delete samplerThread;
}
// Reverse the registration done in profiler_init.
ThreadRegistration::UnregisterThread();
}
static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
double aSinceTime, bool aIsShuttingDown,
ProfilerCodeAddressService* aService,
mozilla::ProgressLogger aProgressLogger) {
LOG("WriteProfileToJSONWriter");
MOZ_RELEASE_ASSERT(CorePS::Exists());
aWriter.Start();
{
auto rv = profiler_stream_json_for_this_process(
aWriter, aSinceTime, aIsShuttingDown, aService,
aProgressLogger.CreateSubLoggerFromTo(
0_pc,
"WriteProfileToJSONWriter: "
"profiler_stream_json_for_this_process started",
100_pc,
"WriteProfileToJSONWriter: "
"profiler_stream_json_for_this_process done"));
if (rv.isErr()) {
return false;
}
// Don't include profiles from other processes because this is a
// synchronous function.
aWriter.StartArrayProperty("processes");
aWriter.EndArray();
}
aWriter.End();
return !aWriter.Failed();
}
void profiler_set_process_name(const nsACString& aProcessName,
const nsACString* aETLDplus1) {
LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(),
aETLDplus1 ? aETLDplus1->Data() : "<none>");
PSAutoLock lock;
CorePS::SetProcessName(lock, aProcessName);
if (aETLDplus1) {
CorePS::SetETLDplus1(lock, *aETLDplus1);
}
}
UniquePtr<char[]> profiler_get_profile(double aSinceTime,
bool aIsShuttingDown) {
LOG("profiler_get_profile");
UniquePtr<ProfilerCodeAddressService> service =
profiler_code_address_service_for_presymbolication();
FailureLatchSource failureLatch;
SpliceableChunkedJSONWriter b{failureLatch};
if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, service.get(),
ProgressLogger{})) {
return nullptr;
}
return b.ChunkedWriteFunc().CopyData();
}
[[nodiscard]] bool profiler_get_profile_json(
SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter,
double aSinceTime, bool aIsShuttingDown,
mozilla::ProgressLogger aProgressLogger) {
LOG("profiler_get_profile_json");
UniquePtr<ProfilerCodeAddressService> service =
profiler_code_address_service_for_presymbolication();
return WriteProfileToJSONWriter(
aSpliceableChunkedJSONWriter, aSinceTime, aIsShuttingDown, service.get(),
aProgressLogger.CreateSubLoggerFromTo(
0.1_pc, "profiler_get_profile_json: WriteProfileToJSONWriter started",
99.9_pc, "profiler_get_profile_json: WriteProfileToJSONWriter done"));
}
void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
double* aInterval, uint32_t* aFeatures,
Vector<const char*>* aFilters,
uint64_t* aActiveTabID) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) ||
NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) ||
NS_WARN_IF(!aFilters)) {
return;
}
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
*aCapacity = 0;
*aDuration = Nothing();
*aInterval = 0;
*aFeatures = 0;
*aActiveTabID = 0;
aFilters->clear();
return;
}
*aCapacity = ActivePS::Capacity(lock).Value();
*aDuration = ActivePS::Duration(lock);
*aInterval = ActivePS::Interval(lock);
*aFeatures = ActivePS::Features(lock);
*aActiveTabID = ActivePS::ActiveTabID(lock);
const Vector<std::string>& filters = ActivePS::Filters(lock);
MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
for (uint32_t i = 0; i < filters.length(); ++i) {
(*aFilters)[i] = filters[i].c_str();
}
}
ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (NS_WARN_IF(!ActivePS::Exists(lock))) {
return nullptr;
}
return &ActivePS::ControlledChunkManager(lock);
}
namespace mozilla {
void GetProfilerEnvVarsForChildProcess(
std::function<void(const char* key, const char* value)>&& aSetEnv) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
aSetEnv("MOZ_PROFILER_STARTUP", "");
return;
}
aSetEnv("MOZ_PROFILER_STARTUP", "1");
// If MOZ_PROFILER_SHUTDOWN is defined, make sure it's empty in children, so
// that they don't attempt to write over that file.
if (getenv("MOZ_PROFILER_SHUTDOWN")) {
aSetEnv("MOZ_PROFILER_SHUTDOWN", "");
}
// Hidden option to stop Base Profiler, mostly due to Talos intermittents,
// TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1");
}
auto capacityString =
Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
// Use AppendFloat instead of Smprintf with %f because the decimal
// separator used by %f is locale-dependent. But the string we produce needs
// to be parseable by strtod, which only accepts the period character as a
// decimal separator. AppendFloat always uses the period character.
nsCString intervalString;
intervalString.AppendFloat(ActivePS::Interval(lock));
aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get());
auto featuresString = Smprintf("%d", ActivePS::Features(lock));
aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
std::string filtersString;
const Vector<std::string>& filters = ActivePS::Filters(lock);
for (uint32_t i = 0; i < filters.length(); ++i) {
if (i != 0) {
filtersString += ",";
}
filtersString += filters[i];
}
aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock));
aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get());
}
} // namespace mozilla
void profiler_received_exit_profile(const nsACString& aExitProfile) {
MOZ_RELEASE_ASSERT(NS_IsMainThread());
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return;
}
ActivePS::AddExitProfile(lock, aExitProfile);
}
Vector<nsCString> profiler_move_exit_profiles() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
Vector<nsCString> profiles;
if (ActivePS::Exists(lock)) {
profiles = ActivePS::MoveExitProfiles(lock);
}
return profiles;
}
static void locked_profiler_save_profile_to_file(
PSLockRef aLock, const char* aFilename,
const PreRecordedMetaInformation& aPreRecordedMetaInformation,
bool aIsShuttingDown = false) {
nsAutoCString processedFilename(aFilename);
const auto processInsertionIndex = processedFilename.Find("%p");
if (processInsertionIndex != kNotFound) {
// Replace "%p" with the process id.
nsAutoCString process;
process.AppendInt(profiler_current_process_id().ToNumber());
processedFilename.Replace(processInsertionIndex, 2, process);
LOG("locked_profiler_save_profile_to_file(\"%s\" -> \"%s\")", aFilename,
processedFilename.get());
} else {
LOG("locked_profiler_save_profile_to_file(\"%s\")", aFilename);
}
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
std::ofstream stream;
stream.open(processedFilename.get());
if (stream.is_open()) {
OStreamJSONWriteFunc sw(stream);
SpliceableJSONWriter w(sw, FailureLatchInfallibleSource::Singleton());
w.Start();
{
Unused << locked_profiler_stream_json_for_this_process(
aLock, w, /* sinceTime */ 0, aPreRecordedMetaInformation,
aIsShuttingDown, nullptr, ProgressLogger{});
w.StartArrayProperty("processes");
Vector<nsCString> exitProfiles = ActivePS::MoveExitProfiles(aLock);
for (auto& exitProfile : exitProfiles) {
if (!exitProfile.IsEmpty() && exitProfile[0] != '*') {
w.Splice(exitProfile);
}
}
w.EndArray();
}
w.End();
stream.close();
}
}
void profiler_save_profile_to_file(const char* aFilename) {
LOG("profiler_save_profile_to_file(%s)", aFilename);
MOZ_RELEASE_ASSERT(CorePS::Exists());
const auto preRecordedMetaInformation = PreRecordMetaInformation();
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return;
}
locked_profiler_save_profile_to_file(lock, aFilename,
preRecordedMetaInformation);
}
uint32_t profiler_get_available_features() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
return AvailableFeatures();
}
Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return Nothing();
}
return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
}
// When the profiler is started on a background thread, we can't synchronously
// call PollJSSampling on the main thread's ThreadInfo. And the next regular
// call to PollJSSampling on the main thread would only happen once the main
// thread triggers a JS interrupt callback.
// This means that all the JS execution between profiler_start() and the first
// JS interrupt would happen with JS sampling disabled, and we wouldn't get any
// JS function information for that period of time.
// So in order to start JS sampling as soon as possible, we dispatch a runnable
// to the main thread which manually calls PollJSSamplingForCurrentThread().
// In some cases this runnable will lose the race with the next JS interrupt.
// That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls.
static void TriggerPollJSSamplingOnMainThread() {
nsCOMPtr<nsIThread> mainThread;
nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread));
if (NS_SUCCEEDED(rv) && mainThread) {
nsCOMPtr<nsIRunnable> task =
NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread",
[]() { PollJSSamplingForCurrentThread(); });
SchedulerGroup::Dispatch(task.forget());
}
}
static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
double aInterval, uint32_t aFeatures,
const char** aFilters, uint32_t aFilterCount,
uint64_t aActiveTabID,
const Maybe<double>& aDuration) {
TimeStamp profilingStartTime = TimeStamp::Now();
if (LOG_TEST) {
LOG("locked_profiler_start");
LOG("- capacity = %u", unsigned(aCapacity.Value()));
LOG("- duration = %.2f", aDuration ? *aDuration : -1);
LOG("- interval = %.2f", aInterval);
LOG("- tab ID = %" PRIu64, aActiveTabID);
#define LOG_FEATURE(n_, str_, Name_, desc_) \
if (ProfilerFeature::Has##Name_(aFeatures)) { \
LOG("- feature = %s", str_); \
}
PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
#undef LOG_FEATURE
for (uint32_t i = 0; i < aFilterCount; i++) {
LOG("- threads = %s", aFilters[i]);
}
}
MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
// Do this before the Base Profiler is stopped, to keep the existing buffer
// (if any) alive for our use.
if (NS_IsMainThread()) {
mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker();
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("EnsureBufferForMainThreadAddMarker",
&mozilla::base_profiler_markers_detail::
EnsureBufferForMainThreadAddMarker));
}
UniquePtr<ProfileBufferChunkManagerWithLocalLimit> baseChunkManager;
bool profilersHandOver = false;
if (baseprofiler::profiler_is_active()) {
// Note that we still hold the lock, so the sampler cannot run yet and
// interact negatively with the still-active BaseProfiler sampler.
// Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP.
// Take ownership of the chunk manager from the Base Profiler, to extend its
// lifetime during the new Gecko Profiler session. Since we're using the
// same core buffer, all the base profiler data remains.
baseChunkManager = baseprofiler::detail::ExtractBaseProfilerChunkManager();
if (baseChunkManager) {
profilersHandOver = true;
if (const TimeStamp baseProfilingStartTime =
baseprofiler::detail::GetProfilingStartTime();
!baseProfilingStartTime.IsNull()) {
profilingStartTime = baseProfilingStartTime;
}
BASE_PROFILER_MARKER_TEXT(
"Profilers handover", PROFILER, MarkerTiming::IntervalStart(),
"Transition from Base to Gecko Profiler, some data may be missing");
}
// Now stop Base Profiler (BP), as further recording will be ignored anyway,
// and so that it won't clash with Gecko Profiler (GP) sampling starting
// after the lock is dropped.
// On Linux this is especially important to do before creating the GP
// sampler, because the BP sampler may send a signal (to stop threads to be
// sampled), which the GP would intercept before its own initialization is
// complete and ready to handle such signals.
// Note that even though `profiler_stop()` doesn't immediately destroy and
// join the sampler thread, it safely deactivates it in such a way that the
// thread will soon exit without doing any actual work.
// TODO: Allow non-sampling profiling to continue.
// TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown.
baseprofiler::profiler_stop();
}
#if defined(GP_PLAT_amd64_windows) || defined(GP_PLAT_arm64_windows)
mozilla::WindowsStackWalkInitialization();
#endif
// Fall back to the default values if the passed-in values are unreasonable.
// We want to be able to store at least one full stack.
PowerOfTwo32 capacity =
(aCapacity.Value() >=
ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
? aCapacity
: PROFILER_DEFAULT_ENTRIES;
Maybe<double> duration = aDuration;
if (aDuration && *aDuration <= 0) {
duration = Nothing();
}
double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;
ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures,
aFilters, aFilterCount, aActiveTabID, duration,
std::move(baseChunkManager));
// ActivePS::Create can only succeed or crash.
MOZ_ASSERT(ActivePS::Exists(aLock));
// Set up profiling for each registered thread, if appropriate.
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
bool isMainThreadBeingProfiled = false;
#endif
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
const ThreadRegistrationInfo& info =
offThreadRef.UnlockedConstReaderCRef().Info();
ThreadProfilingFeatures threadProfilingFeatures =
ActivePS::ProfilingFeaturesForThread(aLock, info);
if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) {
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
aLock, MakeUnique<ProfiledThreadData>(info));
lockedThreadData->SetProfilingFeaturesAndData(threadProfilingFeatures,
profiledThreadData, aLock);
lockedThreadData->GetNewCpuTimeInNs();
if (ActivePS::FeatureJS(aLock)) {
lockedThreadData->StartJSSampling(ActivePS::JSFlags(aLock));
if (!lockedThreadData.GetLockedRWOnThread() && info.IsMainThread()) {
// Dispatch a runnable to the main thread to call
// PollJSSampling(), so that we don't have wait for the next JS
// interrupt callback in order to start profiling JS.
TriggerPollJSSamplingOnMainThread();
}
}
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (info.IsMainThread()) {
isMainThreadBeingProfiled = true;
}
#endif
lockedThreadData->ReinitializeOnResume();
if (ActivePS::FeatureJS(aLock) && lockedThreadData->GetJSContext()) {
profiledThreadData->NotifyReceivedJSContext(0);
if (ActivePS::FeatureTracing(aLock)) {
CycleCollectedJSContext* ctx =
lockedThreadData->GetCycleCollectedJSContext();
ctx->BeginExecutionTracingAsync();
}
}
}
}
// Setup support for pushing/popping labels in mozglue.
RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(aLock)) {
int javaInterval = interval;
// Java sampling doesn't accurately keep up with the sampling rate that is
// lower than 1ms.
if (javaInterval < 1) {
javaInterval = 1;
}
JNIEnv* env = jni::GetEnvForThread();
const auto& filters = ActivePS::Filters(aLock);
jni::ObjectArray::LocalRef javaFilters =
jni::ObjectArray::New<jni::String>(filters.length());
for (size_t i = 0; i < filters.length(); i++) {
javaFilters->SetElement(i, jni::StringParam(filters[i].data(), env));
}
// Send the interval-relative entry count, but we have 100000 hard cap in
// the java code, it can't be more than that.
java::GeckoJavaSampler::Start(
javaFilters, javaInterval,
std::round((double)(capacity.Value()) * interval /
(double)(javaInterval)));
}
#endif
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (ActivePS::FeatureNativeAllocations(aLock)) {
if (isMainThreadBeingProfiled) {
mozilla::profiler::enable_native_allocations();
} else {
NS_WARNING(
"The nativeallocations feature is turned on, but the main thread is "
"not being profiled. The allocations are only stored on the main "
"thread.");
}
}
#endif
if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) {
StartAudioCallbackTracing();
}
// At the very end, set up RacyFeatures.
RacyFeatures::SetActive(ActivePS::Features(aLock));
if (profilersHandOver) {
PROFILER_MARKER_UNTYPED("Profilers handover", PROFILER,
MarkerTiming::IntervalEnd());
}
}
RefPtr<GenericPromise> profiler_start(PowerOfTwo32 aCapacity, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount,
uint64_t aActiveTabID,
const Maybe<double>& aDuration) {
LOG("profiler_start");
ProfilerParent::ProfilerWillStopIfStarted();
SamplerThread* samplerThread = nullptr;
{
PSAutoLock lock;
// Initialize if necessary.
if (!CorePS::Exists()) {
profiler_init(nullptr);
}
// Reset the current state if the profiler is running.
if (ActivePS::Exists(lock)) {
// Note: Not invoking callbacks with ProfilingState::Stopping, because
// we're under lock, and also it would not be useful: Any profiling data
// will be discarded, and we're immediately restarting the profiler below
// and then notifying ProfilingState::Started.
samplerThread = locked_profiler_stop(lock);
}
locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
}
PollJSSamplingForCurrentThread();
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (ProfilerFeature::ShouldInstallMemoryHooks(aFeatures)) {
// Start counting memory allocations (outside of lock because this may call
// profiler_add_sampled_counter which would attempt to take the lock.)
ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
} else {
// Unregister the memory counter in case it was registered before. This will
// make sure that the empty memory counter from the previous profiler run is
// removed completely and we don't serialize the memory counters.
mozilla::profiler::unregister_memory_counter();
}
#endif
invoke_profiler_state_change_callbacks(ProfilingState::Started);
// We do these operations with gPSMutex unlocked. The comments in
// profiler_stop() explain why.
if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped");
delete samplerThread;
}
return NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
aFilters, aFilterCount, aActiveTabID);
}
void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration) {
LOG("profiler_ensure_started");
ProfilerParent::ProfilerWillStopIfStarted();
bool startedProfiler = false;
SamplerThread* samplerThread = nullptr;
{
PSAutoLock lock;
// Initialize if necessary.
if (!CorePS::Exists()) {
profiler_init(nullptr);
}
if (ActivePS::Exists(lock)) {
// The profiler is active.
if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
aFilters, aFilterCount, aActiveTabID)) {
// Stop and restart with different settings.
// Note: Not invoking callbacks with ProfilingState::Stopping, because
// we're under lock, and also it would not be useful: Any profiling data
// will be discarded, and we're immediately restarting the profiler
// below and then notifying ProfilingState::Started.
samplerThread = locked_profiler_stop(lock);
locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
startedProfiler = true;
}
} else {
// The profiler is stopped.
locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
startedProfiler = true;
}
}
PollJSSamplingForCurrentThread();
// We do these operations with gPSMutex unlocked. The comments in
// profiler_stop() explain why.
if (samplerThread) {
Unused << ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped");
delete samplerThread;
}
if (startedProfiler) {
invoke_profiler_state_change_callbacks(ProfilingState::Started);
Unused << NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
aFilters, aFilterCount, aActiveTabID);
}
}
[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
LOG("locked_profiler_stop");
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
// At the very start, clear RacyFeatures.
RacyFeatures::SetInactive();
if (ActivePS::FeatureAudioCallbackTracing(aLock)) {
StopAudioCallbackTracing();
}
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(aLock)) {
java::GeckoJavaSampler::Stop();
}
#endif
// Remove support for pushing/popping labels in mozglue.
RegisterProfilerLabelEnterExit(nullptr, nullptr);
// Stop sampling live threads.
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
if (offThreadRef.UnlockedRWForLockedProfilerRef().ProfilingFeatures() ==
ThreadProfilingFeatures::NotProfiled) {
continue;
}
ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
offThreadRef.GetLockedRWFromAnyThread();
lockedThreadData->ClearProfilingFeaturesAndData(aLock);
if (ActivePS::FeatureJS(aLock)) {
if (ActivePS::FeatureTracing(aLock)) {
CycleCollectedJSContext* ctx =
lockedThreadData->GetCycleCollectedJSContext();
if (ctx) {
ctx->EndExecutionTracingAsync();
}
}
lockedThreadData->StopJSSampling();
if (!lockedThreadData.GetLockedRWOnThread() &&
lockedThreadData->Info().IsMainThread()) {
// Dispatch a runnable to the main thread to call PollJSSampling(),
// so that we don't have wait for the next JS interrupt callback in
// order to start profiling JS.
TriggerPollJSSamplingOnMainThread();
}
}
}
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (ActivePS::FeatureNativeAllocations(aLock) &&
ActivePS::ShouldInstallMemoryHooks(aLock)) {
mozilla::profiler::disable_native_allocations();
}
#endif
// The Stop() call doesn't actually stop Run(); that happens in this
// function's caller when the sampler thread is destroyed. Stop() just gives
// the SamplerThread a chance to do some cleanup with gPSMutex locked.
SamplerThread* samplerThread = ActivePS::Destroy(aLock);
samplerThread->Stop(aLock);
if (NS_IsMainThread()) {
mozilla::base_profiler_markers_detail::
ReleaseBufferForMainThreadAddMarker();
} else {
NS_DispatchToMainThread(
NS_NewRunnableFunction("ReleaseBufferForMainThreadAddMarker",
&mozilla::base_profiler_markers_detail::
ReleaseBufferForMainThreadAddMarker));
}
return samplerThread;
}
RefPtr<GenericPromise> profiler_stop() {
LOG("profiler_stop");
MOZ_RELEASE_ASSERT(CorePS::Exists());
if (profiler_is_active()) {
invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
}
ProfilerParent::ProfilerWillStopIfStarted();
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
// Remove the hooks early, as native allocations (if they are on) can be
// quite expensive.
mozilla::profiler::remove_memory_hooks();
#endif
SamplerThread* samplerThread;
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
samplerThread = locked_profiler_stop(lock);
}
PollJSSamplingForCurrentThread();
// We notify observers with gPSMutex unlocked. Otherwise we might get a
// deadlock, if code run by these functions calls a profiler function that
// locks gPSMutex, for example when it wants to insert a marker.
// these notifications synchronously.)
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerStopped();
NotifyObservers("profiler-stopped");
// We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
// would be waiting here with gPSMutex locked for SamplerThread::Run() to
// return so the join operation within the destructor can complete, but Run()
// needs to lock gPSMutex to return.
//
// Because this call occurs with gPSMutex unlocked, it -- including the final
// iteration of Run()'s loop -- must be able detect deactivation and return
// in a way that's safe with respect to other gPSMutex-locking operations
// that may have occurred in the meantime.
delete samplerThread;
return promise;
}
bool profiler_is_paused() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return false;
}
return ActivePS::IsPaused(lock);
}
/* [[nodiscard]] */ bool profiler_callback_after_sampling(
PostSamplingCallback&& aCallback) {
LOG("profiler_callback_after_sampling");
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback));
}
// See `ProfilerControl.h` for more details.
void profiler_lookup_async_signal_dump_directory() {
// only exists to support the posix signal handling (on non-windows platforms)
// we can remove it for now.
#if !defined(XP_WIN)
LOG("profiler_lookup_async_signal_dump_directory");
MOZ_ASSERT(
NS_IsMainThread(),
"We can only get access to the directory service from the main thread");
// Make sure the profiler is actually running~
MOZ_RELEASE_ASSERT(CorePS::Exists());
// take the lock so that we can write to CorePS
PSAutoLock lock;
nsresult rv;
// Check to see if we have a `MOZ_UPLOAD_DIR` first - i.e., check to see if
// we're running in CI.
LOG("Checking if MOZ_UPLOAD_DIR exists");
const char* mozUploadDir = getenv("MOZ_UPLOAD_DIR");
if (mozUploadDir && mozUploadDir[0] != '\0') {
LOG("Found MOZ_UPLOAD_DIR at: %s", mozUploadDir);
// We want to do the right thing, and turn this into an nsIFile. Go through
// the motions here:
nsCOMPtr<nsIFile> mozUploadDirFile;
rv = NS_NewNativeLocalFile(nsDependentCString(mozUploadDir),
getter_AddRefs(mozUploadDirFile));
if (NS_FAILED(rv)) {
LOG("Failed to assign a filepath while creating MOZ_UPLOAD_DIR file "
"%s, Error %s ",
mozUploadDir, GetStaticErrorName(rv));
return;
}
CorePS::SetAsyncSignalDumpDirectory(lock, Some(mozUploadDirFile));
} else {
LOG("Defaulting to the user's Download directory for profile dumps");
nsCOMPtr<nsIFile> tDownloadDir;
rv = NS_GetSpecialDirectory(NS_OS_DEFAULT_DOWNLOAD_DIR,
getter_AddRefs(tDownloadDir));
if (NS_FAILED(rv)) {
LOG("Failed to find download directory. Profiler signal handling will "
"not be able to save to disk. Error: %s",
GetStaticErrorName(rv));
} else {
CorePS::SetAsyncSignalDumpDirectory(lock, Some(tDownloadDir));
}
}
#endif
}
RefPtr<GenericPromise> profiler_pause() {
LOG("profiler_pause");
MOZ_RELEASE_ASSERT(CorePS::Exists());
invoke_profiler_state_change_callbacks(ProfilingState::Pausing);
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
// Not paused yet, so this is the first pause, let Java know.
// TODO: Distinguish Pause and PauseSampling in Java.
java::GeckoJavaSampler::PauseSampling();
}
#endif
RacyFeatures::SetPaused();
ActivePS::SetIsPaused(lock, true);
ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPaused();
NotifyObservers("profiler-paused");
return promise;
}
RefPtr<GenericPromise> profiler_resume() {
LOG("profiler_resume");
MOZ_RELEASE_ASSERT(CorePS::Exists());
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
ActivePS::Buffer(lock).AddEntry(
ProfileBufferEntry::Resume(profiler_time()));
ActivePS::SetIsPaused(lock, false);
RacyFeatures::SetUnpaused();
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
// Not paused anymore, so this is the last unpause, let Java know.
// TODO: Distinguish Unpause and UnpauseSampling in Java.
java::GeckoJavaSampler::UnpauseSampling();
}
#endif
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumed();
NotifyObservers("profiler-resumed");
invoke_profiler_state_change_callbacks(ProfilingState::Resumed);
return promise;
}
bool profiler_is_sampling_paused() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return false;
}
return ActivePS::IsSamplingPaused(lock);
}
RefPtr<GenericPromise> profiler_pause_sampling() {
LOG("profiler_pause_sampling");
MOZ_RELEASE_ASSERT(CorePS::Exists());
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
// Not paused yet, so this is the first pause, let Java know.
// TODO: Distinguish Pause and PauseSampling in Java.
java::GeckoJavaSampler::PauseSampling();
}
#endif
RacyFeatures::SetSamplingPaused();
ActivePS::SetIsSamplingPaused(lock, true);
ActivePS::Buffer(lock).AddEntry(
ProfileBufferEntry::PauseSampling(profiler_time()));
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPausedSampling();
NotifyObservers("profiler-paused-sampling");
return promise;
}
RefPtr<GenericPromise> profiler_resume_sampling() {
LOG("profiler_resume_sampling");
MOZ_RELEASE_ASSERT(CorePS::Exists());
{
PSAutoLock lock;
if (!ActivePS::Exists(lock)) {
return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
}
ActivePS::Buffer(lock).AddEntry(
ProfileBufferEntry::ResumeSampling(profiler_time()));
ActivePS::SetIsSamplingPaused(lock, false);
RacyFeatures::SetSamplingUnpaused();
#if defined(GP_OS_android)
if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
// Not paused anymore, so this is the last unpause, let Java know.
// TODO: Distinguish Unpause and UnpauseSampling in Java.
java::GeckoJavaSampler::UnpauseSampling();
}
#endif
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumedSampling();
NotifyObservers("profiler-resumed-sampling");
return promise;
}
bool profiler_feature_active(uint32_t aFeature) {
// This function runs both on and off the main thread.
MOZ_RELEASE_ASSERT(CorePS::Exists());
// This function is hot enough that we use RacyFeatures, not ActivePS.
return RacyFeatures::IsActiveWithFeature(aFeature);
}
bool profiler_active_without_feature(uint32_t aFeature) {
// This function runs both on and off the main thread.
// This function is hot enough that we use RacyFeatures, not ActivePS.
return RacyFeatures::IsActiveWithoutFeature(aFeature);
}
void profiler_write_active_configuration(JSONWriter& aWriter) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
ActivePS::WriteActiveConfiguration(lock, aWriter);
}
void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
PSAutoLock lock;
locked_profiler_add_sampled_counter(lock, aCounter);
}
void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
PSAutoLock lock;
locked_profiler_remove_sampled_counter(lock, aCounter);
}
void profiler_count_bandwidth_bytes(int64_t aCount) {
NS_ASSERTION(profiler_feature_active(ProfilerFeature::Bandwidth),
"Should not call profiler_count_bandwidth_bytes when the "
"Bandwidth feature is not set");
ProfilerBandwidthCounter* counter = CorePS::GetBandwidthCounter();
if (MOZ_UNLIKELY(!counter)) {
counter = new ProfilerBandwidthCounter();
CorePS::SetBandwidthCounter(counter);
}
counter->Add(aCount);
}
ProfilingStack* profiler_register_thread(const char* aName,
void* aGuessStackTop) {
DEBUG_LOG("profiler_register_thread(%s)", aName);
// This will call `ThreadRegistry::Register()` (see below).
return ThreadRegistration::RegisterThread(aName, aGuessStackTop);
}
/* static */
void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) {
// Set the thread name (except for the main thread, which is controlled
// elsewhere, and influences the process name on some systems like Linux).
if (!aOnThreadRef.UnlockedConstReaderCRef().Info().IsMainThread()) {
// Make sure we have a nsThread wrapper for the current thread, and that
// NSPR knows its name.
(void)NS_GetCurrentThread();
NS_SetCurrentThreadName(
aOnThreadRef.UnlockedConstReaderCRef().Info().Name());
}
{
PSAutoLock lock;
{
RegistryLockExclusive lock{sRegistryMutex};
MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef}));
}
if (!CorePS::Exists()) {
// CorePS has not been created yet.
// If&when that happens, it will handle already-registered threads then.
return;
}
(void)locked_register_thread(lock, OffThreadRef{aOnThreadRef});
}
PollJSSamplingForCurrentThread();
}
void profiler_unregister_thread() {
// This will call `ThreadRegistry::Unregister()` (see below).
ThreadRegistration::UnregisterThread();
}
static void locked_unregister_thread(
PSLockRef lock, ThreadRegistration::OnThreadRef aOnThreadRef) {
if (!CorePS::Exists()) {
// This function can be called after the main thread has already shut
// down.
return;
}
// We don't call StopJSSampling() here; there's no point doing that for a JS
// thread that is in the process of disappearing.
ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
aOnThreadRef.GetLockedRWOnThread();
ProfiledThreadData* profiledThreadData =
lockedThreadData->GetProfiledThreadData(lock);
lockedThreadData->ClearProfilingFeaturesAndData(lock);
MOZ_RELEASE_ASSERT(
lockedThreadData->Info().ThreadId() == profiler_current_thread_id(),
"Thread being unregistered has changed its TID");
DEBUG_LOG("profiler_unregister_thread: %s", lockedThreadData->Info().Name());
if (profiledThreadData && ActivePS::Exists(lock)) {
ActivePS::UnregisterThread(lock, profiledThreadData);
}
}
/* static */
void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) {
PSAutoLock psLock;
locked_unregister_thread(psLock, aOnThreadRef);
RegistryLockExclusive lock{sRegistryMutex};
for (OffThreadRef& thread : sRegistryContainer) {
if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) {
sRegistryContainer.erase(&thread);
break;
}
}
}
void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
const nsCString& aUrl,
uint64_t aEmbedderInnerWindowID,
bool aIsPrivateBrowsing) {
DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64
", %s)",
aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID,
aIsPrivateBrowsing ? "true" : "false");
MOZ_RELEASE_ASSERT(CorePS::Exists());
PSAutoLock lock;
// When a Browsing context is first loaded, the first url loaded in it will be
// about:blank. Because of that, this call keeps the first non-about:blank
// registration of window and discards the previous one.
RefPtr<PageInformation> pageInfo = new PageInformation(
aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID, aIsPrivateBrowsing);
CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
// After appending the given page to CorePS, look for the expired
// pages and remove them if there are any.
if (ActivePS::Exists(lock)) {
ActivePS::DiscardExpiredPages(lock);
}
}
void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
PSAutoLock lock;
if (!CorePS::Exists()) {
// This function can be called after the main thread has already shut down.
return;
}
// During unregistration, if the profiler is active, we have to keep the
// page information since there may be some markers associated with the given
// page. But if profiler is not active. we have no reason to keep the
// page information here because there can't be any marker associated with it.
if (ActivePS::Exists(lock)) {
ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
} else {
CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
}
}
void profiler_clear_all_pages() {
{
PSAutoLock lock;
if (!CorePS::Exists()) {
// This function can be called after the main thread has already shut
// down.
return;
}
CorePS::ClearRegisteredPages(lock);
if (ActivePS::Exists(lock)) {
ActivePS::ClearUnregisteredPages(lock);
}
}
// gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
ProfilerParent::ClearAllPages();
}
namespace geckoprofiler::markers::detail {
Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
nsIDocShell* aDocshell) {
Maybe<uint64_t> innerWindowID = Nothing();
if (aDocshell) {
auto outerWindow = aDocshell->GetWindow();
if (outerWindow) {
auto innerWindow = outerWindow->GetCurrentInnerWindow();
if (innerWindow) {
innerWindowID = Some(innerWindow->WindowID());
}
}
}
return innerWindowID;
}
} // namespace geckoprofiler::markers::detail
namespace geckoprofiler::markers {
struct CPUAwakeMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("Awake");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
int64_t aCPUTimeNs, int64_t aCPUId
#ifdef GP_OS_darwin
,
uint32_t aQoS
#endif
#ifdef GP_OS_windows
,
int32_t aAbsolutePriority,
int32_t aRelativePriority,
int32_t aCurrentPriority
#endif
) {
if (aCPUTimeNs) {
constexpr double NS_PER_MS = 1'000'000;
aWriter.DoubleProperty("CPU Time", double(aCPUTimeNs) / NS_PER_MS);
// CPU Time is only provided for the end marker, the other fields are for
// the start marker.
return;
}
#ifndef GP_PLAT_arm64_darwin
aWriter.IntProperty("CPU Id", aCPUId);
#endif
#ifdef GP_OS_windows
if (aAbsolutePriority) {
aWriter.IntProperty("absPriority", aAbsolutePriority);
}
if (aCurrentPriority) {
aWriter.IntProperty("curPriority", aCurrentPriority);
}
aWriter.IntProperty("priority", aRelativePriority);
#endif
#ifdef GP_OS_darwin
const char* QoS = "";
switch (aQoS) {
case QOS_CLASS_USER_INTERACTIVE:
QoS = "User Interactive";
break;
case QOS_CLASS_USER_INITIATED:
QoS = "User Initiated";
break;
case QOS_CLASS_DEFAULT:
QoS = "Default";
break;
case QOS_CLASS_UTILITY:
QoS = "Utility";
break;
case QOS_CLASS_BACKGROUND:
QoS = "Background";
break;
default:
QoS = "Unspecified";
}
aWriter.StringProperty("QoS",
ProfilerString8View::WrapNullTerminatedString(QoS));
#endif
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.AddKeyFormat("CPU Time", MS::Format::Duration);
#ifndef GP_PLAT_arm64_darwin
schema.AddKeyFormat("CPU Id", MS::Format::Integer);
schema.SetTableLabel("Awake - CPU Id = {marker.data.CPU Id}");
#endif
#ifdef GP_OS_windows
schema.AddKeyLabelFormat("priority", "Relative Thread Priority",
MS::Format::Integer);
schema.AddKeyLabelFormat("absPriority", "Base Thread Priority",
MS::Format::Integer);
schema.AddKeyLabelFormat("curPriority", "Current Thread Priority",
MS::Format::Integer);
#endif
#ifdef GP_OS_darwin
schema.AddKeyLabelFormat("QoS", "Quality of Service", MS::Format::String);
#endif
return schema;
}
};
} // namespace geckoprofiler::markers
void profiler_mark_thread_asleep() {
if (!profiler_thread_is_being_profiled_for_markers()) {
return;
}
uint64_t cpuTimeNs = ThreadRegistration::WithOnThreadRefOr(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
return aOnThreadRef.UnlockedConstReaderAndAtomicRWRef()
.GetNewCpuTimeInNs();
},
0);
PROFILER_MARKER("Awake", OTHER, MarkerTiming::IntervalEnd(), CPUAwakeMarker,
cpuTimeNs, 0 /* cpuId */
#if defined(GP_OS_darwin)
,
0 /* qos_class */
#endif
#if defined(GP_OS_windows)
,
0 /* priority */, 0 /* thread priority */,
0 /* current priority */
#endif
);
}
void profiler_thread_sleep() {
profiler_mark_thread_asleep();
ThreadRegistration::WithOnThreadRef(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetSleeping();
});
}
#if defined(GP_OS_windows)
# if !defined(__MINGW32__)
enum {
ThreadBasicInformation,
};
# endif
struct THREAD_BASIC_INFORMATION {
NTSTATUS ExitStatus;
PVOID TebBaseAddress;
CLIENT_ID ClientId;
KAFFINITY AffMask;
DWORD Priority;
DWORD BasePriority;
};
#endif
static mozilla::Atomic<uint64_t, mozilla::MemoryOrdering::Relaxed> gWakeCount(
0);
namespace geckoprofiler::markers {
struct WakeUpCountMarker {
static constexpr Span<const char> MarkerTypeName() {
return MakeStringSpan("WakeUpCount");
}
static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
int32_t aCount,
const ProfilerString8View& aType) {
aWriter.IntProperty("Count", aCount);
aWriter.StringProperty("label", aType);
}
static MarkerSchema MarkerTypeDisplay() {
using MS = MarkerSchema;
MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
schema.AddKeyFormat("Count", MS::Format::Integer);
schema.SetTooltipLabel("{marker.name} - {marker.data.label}");
schema.SetTableLabel(
"{marker.name} - {marker.data.label}: {marker.data.count}");
return schema;
}
};
} // namespace geckoprofiler::markers
void profiler_record_wakeup_count(const nsACString& aProcessType) {
static uint64_t previousThreadWakeCount = 0;
uint64_t newWakeups = gWakeCount - previousThreadWakeCount;
if (newWakeups > 0) {
if (newWakeups < std::numeric_limits<int32_t>::max()) {
int32_t newWakeups32 = int32_t(newWakeups);
mozilla::glean::power::total_thread_wakeups.Add(newWakeups32);
mozilla::glean::power::wakeups_per_process_type.Get(aProcessType)
.Add(newWakeups32);
PROFILER_MARKER("Thread Wake-ups", OTHER, {}, WakeUpCountMarker,
newWakeups32, aProcessType);
}
previousThreadWakeCount += newWakeups;
}
#ifdef NIGHTLY_BUILD
ThreadRegistry::LockedRegistry lockedRegistry;
for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
const ThreadRegistry::UnlockedConstReaderAndAtomicRW& threadData =
offThreadRef.UnlockedConstReaderAndAtomicRWRef();
threadData.RecordWakeCount();
}
#endif
}
void profiler_mark_thread_awake() {
++gWakeCount;
if (!profiler_thread_is_being_profiled_for_markers()) {
return;
}
int64_t cpuId = 0;
#if defined(GP_OS_windows)
cpuId = GetCurrentProcessorNumber();
#elif defined(GP_OS_darwin)
# ifdef GP_PLAT_amd64_darwin
unsigned int eax, ebx, ecx, edx;
__cpuid_count(1, 0, eax, ebx, ecx, edx);
// Check if we have an APIC.
if ((edx & (1 << 9))) {
// APIC ID is bits 24-31 of EBX
cpuId = ebx >> 24;
}
# endif
#else
cpuId = sched_getcpu();
#endif
#if defined(GP_OS_windows)
LONG priority;
static const auto get_thread_information_fn =
reinterpret_cast<decltype(&::GetThreadInformation)>(::GetProcAddress(
::GetModuleHandle(L"Kernel32.dll"), "GetThreadInformation"));
if (!get_thread_information_fn ||
!get_thread_information_fn(GetCurrentThread(), ThreadAbsoluteCpuPriority,
&priority, sizeof(priority))) {
priority = 0;
}
static const auto nt_query_information_thread_fn =
reinterpret_cast<decltype(&::NtQueryInformationThread)>(::GetProcAddress(
::GetModuleHandle(L"ntdll.dll"), "NtQueryInformationThread"));
LONG currentPriority = 0;
if (nt_query_information_thread_fn) {
THREAD_BASIC_INFORMATION threadInfo;
auto status = (*nt_query_information_thread_fn)(
GetCurrentThread(), (THREADINFOCLASS)ThreadBasicInformation,
&threadInfo, sizeof(threadInfo), NULL);
if (NT_SUCCESS(status)) {
currentPriority = threadInfo.Priority;
}
}
#endif
PROFILER_MARKER("Awake", OTHER, MarkerTiming::IntervalStart(), CPUAwakeMarker,
0 /* CPU time */, cpuId
#if defined(GP_OS_darwin)
,
qos_class_self()
#endif
#if defined(GP_OS_windows)
,
priority, GetThreadPriority(GetCurrentThread()),
currentPriority
#endif
);
}
void profiler_thread_wake() {
profiler_mark_thread_awake();
ThreadRegistration::WithOnThreadRef(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetAwake();
});
}
void profiler_js_interrupt_callback() {
// This function runs on JS threads being sampled.
PollJSSamplingForCurrentThread();
}
double profiler_time() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
return delta.ToMilliseconds();
}
bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer,
StackCaptureOptions aCaptureOptions) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
if (!profiler_is_active() ||
aCaptureOptions == StackCaptureOptions::NoStack) {
return false;
}
return ThreadRegistration::WithOnThreadRefOr(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) {
mozilla::Maybe<uint32_t> maybeFeatures =
RacyFeatures::FeaturesIfActiveAndUnpaused();
if (!maybeFeatures) {
return false;
}
ProfileBuffer profileBuffer(aChunkedBuffer);
Registers regs;
#if defined(HAVE_NATIVE_UNWIND)
REGISTERS_SYNC_POPULATE(regs);
#else
regs.Clear();
#endif
DoSyncSample(*maybeFeatures,
aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef(),
TimeStamp::Now(), regs, profileBuffer, aCaptureOptions);
return true;
},
// If this was called from a non-registered thread, return false and do no
// more work. This can happen from a memory hook.
false);
}
bool profiler_backtrace_into_buffer(ProfileChunkedBuffer& aChunkedBuffer,
NativeStack& aNativeStack) {
MOZ_RELEASE_ASSERT(CorePS::Exists());
return ThreadRegistration::WithOnThreadRefOr(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) {
mozilla::Maybe<uint32_t> maybeFeatures =
RacyFeatures::FeaturesIfActiveAndUnpaused();
if (!maybeFeatures) {
return false;
}
ProfileBuffer profileBuffer(aChunkedBuffer);
const uint64_t bufferRangeStart = profileBuffer.BufferRangeStart();
const uint64_t samplePos = profileBuffer.AddThreadIdEntry(
aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef()
.Info()
.ThreadId());
TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
profileBuffer.AddEntry(
ProfileBufferEntry::Time(delta.ToMilliseconds()));
ProfileBufferCollector collector(profileBuffer, samplePos,
bufferRangeStart);
for (int nativeIndex = (int)(aNativeStack.mCount); nativeIndex >= 0;
--nativeIndex) {
collector.CollectNativeLeafAddr(
(void*)aNativeStack.mPCs[nativeIndex]);
}
return true;
},
// If this was called from a non-registered thread, return false and do no
// more work. This can happen from a memory hook.
false);
}
UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
AUTO_PROFILER_LABEL_HOT("profiler_capture_backtrace", PROFILER);
// Quick is-active and feature check before allocating a buffer.
// If NoMarkerStacks is set, we don't want to capture a backtrace.
if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) {
return nullptr;
}
auto buffer = MakeUnique<ProfileChunkedBuffer>(
ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
MakeUnique<ProfileBufferChunkManagerSingle>(
ProfileBufferChunkManager::scExpectedMaximumStackSize));
if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) {
return nullptr;
}
return buffer;
}
UniqueProfilerBacktrace profiler_get_backtrace() {
UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
if (!buffer) {
return nullptr;
}
return UniqueProfilerBacktrace(
new ProfilerBacktrace("SyncProfile", std::move(buffer)));
}
void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
delete aBacktrace;
}
bool profiler_is_locked_on_current_thread() {
// This function is used to help users avoid calling `profiler_...` functions
// when the profiler may already have a lock in place, which would prevent a
// 2nd recursive lock (resulting in a crash or a never-ending wait), or a
// deadlock between any two mutexes. So we must return `true` for any of:
// - The main profiler mutex, used by most functions, and/or
// - The buffer mutex, used directly in some functions without locking the
// main mutex, e.g., marker-related functions.
// - The ProfilerParent or ProfilerChild mutex, used to store and process
// buffer chunk updates.
return PSAutoLock::IsLockedOnCurrentThread() ||
ThreadRegistry::IsRegistryMutexLockedOnCurrentThread() ||
ThreadRegistration::IsDataMutexLockedOnCurrentThread() ||
profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread() ||
ProfilerParent::IsLockedOnCurrentThread() ||
ProfilerChild::IsLockedOnCurrentThread();
}
void profiler_set_js_context(CycleCollectedJSContext* aCx) {
MOZ_ASSERT(aCx);
ThreadRegistration::WithOnThreadRef(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) {
// The profiler mutex must be locked before the ThreadRegistration's.
PSAutoLock lock;
aOnThreadRef.WithLockedRWOnThread(
[&](ThreadRegistration::LockedRWOnThread& aThreadData) {
aThreadData.SetCycleCollectedJSContext(aCx);
if (!ActivePS::Exists(lock) || !ActivePS::FeatureJS(lock)) {
return;
}
if (ProfiledThreadData* profiledThreadData =
aThreadData.GetProfiledThreadData(lock);
profiledThreadData) {
profiledThreadData->NotifyReceivedJSContext(
ActivePS::Buffer(lock).BufferRangeEnd());
if (ActivePS::FeatureTracing(lock)) {
aCx->BeginExecutionTracingAsync();
}
}
});
});
// This call is on-thread, so we can call PollJSSampling() to start JS
// sampling immediately.
PollJSSamplingForCurrentThread();
}
void profiler_clear_js_context() {
MOZ_RELEASE_ASSERT(CorePS::Exists());
ThreadRegistration::WithOnThreadRef(
[](ThreadRegistration::OnThreadRef aOnThreadRef) {
CycleCollectedJSContext* cccx =
aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef()
.GetCycleCollectedJSContext();
if (!cccx) {
return;
}
JSContext* cx = cccx->Context();
// The profiler mutex must be locked before the ThreadRegistration's.
{
PSAutoLock lock;
ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
aOnThreadRef.GetLockedRWOnThread();
ProfiledThreadData* profiledThreadData =
lockedThreadData->GetProfiledThreadData(lock);
if (!(profiledThreadData && ActivePS::Exists(lock) &&
ActivePS::FeatureJS(lock))) {
// This thread is not being profiled or JS profiling is off, we only
// need to clear the context pointer.
lockedThreadData->ClearCycleCollectedJSContext();
return;
}
profiledThreadData->NotifyAboutToLoseJSContext(
cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock));
if (ActivePS::FeatureTracing(lock)) {
cccx->EndExecutionTracingAsync();
}
// Notify the JS context that profiling for this context has
// stopped. Do this by calling StopJSSampling and PollJSSampling
// before nulling out the JSContext.
lockedThreadData->StopJSSampling();
}
// Drop profiler mutex for call into JS engine. This must happen before
// ClearCycleCollectedJSContext below.
PollJSSamplingForCurrentThread();
{
PSAutoLock lock;
ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
aOnThreadRef.GetLockedRWOnThread();
lockedThreadData->ClearCycleCollectedJSContext();
// Tell the thread that we'd like to have JS sampling on this
// thread again, once it gets a new JSContext (if ever).
lockedThreadData->StartJSSampling(ActivePS::JSFlags(lock));
}
});
}
static void profiler_suspend_and_sample_thread(
const PSAutoLock* aLockIfAsynchronousSampling,
const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
JsFrame* aJsFrames, uint32_t aFeatures, ProfilerStackCollector& aCollector,
bool aSampleNative) {
const ThreadRegistrationInfo& info = aThreadData.Info();
if (info.IsMainThread()) {
aCollector.SetIsMainThread();
}
// Allocate the space for the native stack
NativeStack nativeStack{.mCount = 0};
auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
// The target thread is now suspended. Collect a native backtrace,
// and call the callback.
StackWalkControl* stackWalkControlIfSupported = nullptr;
#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
StackWalkControl stackWalkControl;
if constexpr (StackWalkControl::scIsSupported) {
if (aSampleNative) {
stackWalkControlIfSupported = &stackWalkControl;
}
}
#endif
const uint32_t jsFramesCount =
aJsFrames ? ExtractJsFrames(!aLockIfAsynchronousSampling, aThreadData,
aRegs, aCollector, aJsFrames,
stackWalkControlIfSupported)
: 0;
#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
if (aSampleNative) {
// We can only use FramePointerStackWalk or MozStackWalk from
// suspend_and_sample_thread as other stackwalking methods may not be
// initialized.
# if defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(aThreadData, aRegs, nativeStack,
stackWalkControlIfSupported);
# elif defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack,
stackWalkControlIfSupported);
# else
# error "Invalid configuration"
# endif
MergeStacks(!aLockIfAsynchronousSampling, aThreadData, nativeStack,
aCollector, aJsFrames, jsFramesCount);
} else
#endif
{
MergeStacks(!aLockIfAsynchronousSampling, aThreadData, nativeStack,
aCollector, aJsFrames, jsFramesCount);
aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
}
};
if (!aLockIfAsynchronousSampling) {
// Sampling the current thread, do NOT suspend it!
Registers regs;
#if defined(HAVE_NATIVE_UNWIND)
REGISTERS_SYNC_POPULATE(regs);
#else
regs.Clear();
#endif
collectStack(regs, TimeStamp::Now());
} else {
// Suspend, sample, and then resume the target thread.
Sampler sampler(*aLockIfAsynchronousSampling);
TimeStamp now = TimeStamp::Now();
sampler.SuspendAndSampleAndResumeThread(*aLockIfAsynchronousSampling,
aThreadData, now, collectStack);
// NOTE: Make sure to disable the sampler before it is destroyed, in
// case the profiler is running at the same time.
sampler.Disable(*aLockIfAsynchronousSampling);
}
}
// NOTE: aCollector's methods will be called while the target thread is paused.
// Doing things in those methods like allocating -- which may try to claim
// locks -- is a surefire way to deadlock.
void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
uint32_t aFeatures,
ProfilerStackCollector& aCollector,
bool aSampleNative /* = true */) {
if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) {
// Sampling the current thread. Get its information from the TLS (no locking
// required.)
ThreadRegistration::WithOnThreadRef(
[&](ThreadRegistration::OnThreadRef aOnThreadRef) {
aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
[&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
aThreadData) {
if (!aThreadData.GetJSContext()) {
// No JSContext, there is no JS frame buffer (and no need for
// it).
profiler_suspend_and_sample_thread(
/* aLockIfAsynchronousSampling = */ nullptr, aThreadData,
/* aJsFrames = */ nullptr, aFeatures, aCollector,
aSampleNative);
} else {
// JSContext is present, we need to lock the thread data to
// access the JS frame buffer.
aOnThreadRef.WithConstLockedRWOnThread(
[&](const ThreadRegistration::LockedRWOnThread&
aLockedThreadData) {
profiler_suspend_and_sample_thread(
/* aLockIfAsynchronousSampling = */ nullptr,
aThreadData, aLockedThreadData.GetJsFrameBuffer(),
aFeatures, aCollector, aSampleNative);
});
}
});
});
} else {
// Lock the profiler before accessing the ThreadRegistry.
PSAutoLock lock;
ThreadRegistry::WithOffThreadRef(
aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) {
aOffThreadRef.WithLockedRWFromAnyThread(
[&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
aThreadData) {
JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
profiler_suspend_and_sample_thread(&lock, aThreadData, jsFrames,
aFeatures, aCollector,
aSampleNative);
});
});
}
}
// END externally visible functions
////////////////////////////////////////////////////////////////////////