Revision control

Copy as Markdown

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This helps you perform a sync of multiple engines and helps you manage
// global and local state between syncs.
use super::state::{EngineChangesNeeded, GlobalState, PersistedGlobalState, SetupStateMachine};
use super::status::{ServiceStatus, SyncResult};
use super::storage_client::{BackoffListener, Sync15StorageClient, Sync15StorageClientInit};
use crate::clients_engine::{self, CommandProcessor, CLIENTS_TTL_REFRESH};
use crate::engine::{EngineSyncAssociation, SyncEngine};
use crate::error::Error;
use crate::telemetry;
use crate::KeyBundle;
use interrupt_support::Interruptee;
use std::collections::HashMap;
use std::result;
use std::time::{Duration, SystemTime};
/// Info about the client to use. We reuse the client unless
/// we discover the client_init has changed, in which case we re-create one.
#[derive(Debug)]
struct ClientInfo {
// the client_init used to create `client`.
client_init: Sync15StorageClientInit,
// the client (our tokenserver state machine state, and our http library's state)
client: Sync15StorageClient,
}
impl ClientInfo {
fn new(ci: &Sync15StorageClientInit) -> Result<Self, Error> {
Ok(Self {
client_init: ci.clone(),
client: Sync15StorageClient::new(ci.clone())?,
})
}
}
/// Info we want callers to engine *in memory* for us so that subsequent
/// syncs are faster. This should never be persisted to storage as it holds
/// sensitive information, such as the sync decryption keys.
#[derive(Debug, Default)]
pub struct MemoryCachedState {
last_client_info: Option<ClientInfo>,
last_global_state: Option<GlobalState>,
// These are just engined in memory, as persisting an invalid value far in the
// future has the potential to break sync for good.
next_sync_after: Option<SystemTime>,
next_client_refresh_after: Option<SystemTime>,
}
impl MemoryCachedState {
// Called we notice the cached state is stale.
pub fn clear_sensitive_info(&mut self) {
self.last_client_info = None;
self.last_global_state = None;
// Leave the backoff time, as there's no reason to think it's not still
// true.
}
pub fn get_next_sync_after(&self) -> Option<SystemTime> {
self.next_sync_after
}
pub fn should_refresh_client(&self) -> bool {
match self.next_client_refresh_after {
Some(t) => SystemTime::now() > t,
None => true,
}
}
pub fn note_client_refresh(&mut self) {
self.next_client_refresh_after =
Some(SystemTime::now() + Duration::from_secs(CLIENTS_TTL_REFRESH));
}
}
/// Sync multiple engines
/// * `engines` - The engines to sync
/// * `persisted_global_state` - The global state to use, or None if never
/// before provided. At the end of the sync, and even when the sync fails,
/// the value in this cell should be persisted to permanent storage and
/// provided next time the sync is called.
/// * `last_client_info` - The client state to use, or None if never before
/// provided. At the end of the sync, the value should be persisted
/// *in memory only* - it should not be persisted to disk.
/// * `storage_init` - Information about how the sync http client should be
/// configured.
/// * `root_sync_key` - The KeyBundle used for encryption.
///
/// Returns a map, keyed by name and holding an error value - if any engine
/// fails, the sync will continue on to other engines, but the error will be
/// places in this map. The absence of a name in the map implies the engine
/// succeeded.
pub fn sync_multiple(
engines: &[&dyn SyncEngine],
persisted_global_state: &mut Option<String>,
mem_cached_state: &mut MemoryCachedState,
storage_init: &Sync15StorageClientInit,
root_sync_key: &KeyBundle,
interruptee: &dyn Interruptee,
req_info: Option<SyncRequestInfo<'_>>,
) -> SyncResult {
sync_multiple_with_command_processor(
None,
engines,
persisted_global_state,
mem_cached_state,
storage_init,
root_sync_key,
interruptee,
req_info,
)
}
/// Like `sync_multiple`, but specifies an optional command processor to handle
/// commands from the clients collection. This function is called by the sync
/// manager, which provides its own processor.
#[allow(clippy::too_many_arguments)]
pub fn sync_multiple_with_command_processor(
command_processor: Option<&dyn CommandProcessor>,
engines: &[&dyn SyncEngine],
persisted_global_state: &mut Option<String>,
mem_cached_state: &mut MemoryCachedState,
storage_init: &Sync15StorageClientInit,
root_sync_key: &KeyBundle,
interruptee: &dyn Interruptee,
req_info: Option<SyncRequestInfo<'_>>,
) -> SyncResult {
log::info!("Syncing {} engines", engines.len());
let mut sync_result = SyncResult {
service_status: ServiceStatus::OtherError,
result: Ok(()),
declined: None,
next_sync_after: None,
engine_results: HashMap::with_capacity(engines.len()),
telemetry: telemetry::SyncTelemetryPing::new(),
};
let backoff = super::storage_client::new_backoff_listener();
let req_info = req_info.unwrap_or_default();
let driver = SyncMultipleDriver {
command_processor,
engines,
storage_init,
interruptee,
engines_to_state_change: req_info.engines_to_state_change,
backoff: backoff.clone(),
root_sync_key,
result: &mut sync_result,
persisted_global_state,
mem_cached_state,
saw_auth_error: false,
ignore_soft_backoff: req_info.is_user_action,
};
match driver.sync() {
Ok(()) => {
log::debug!(
"sync was successful, final status={:?}",
sync_result.service_status
);
}
Err(e) => {
log::warn!(
"sync failed: {}, final status={:?}",
e,
sync_result.service_status,
);
sync_result.result = Err(e);
}
}
// Respect `backoff` value when computing the next sync time even if we were
// ignoring it during the sync
sync_result.set_sync_after(backoff.get_required_wait(false).unwrap_or_default());
mem_cached_state.next_sync_after = sync_result.next_sync_after;
log::trace!("Sync result: {:?}", sync_result);
sync_result
}
/// This is essentially a bag of information that the sync manager knows, but
/// otherwise we won't. It should probably be rethought if it gains many more
/// fields.
#[derive(Debug, Default)]
pub struct SyncRequestInfo<'a> {
pub engines_to_state_change: Option<&'a HashMap<String, bool>>,
pub is_user_action: bool,
}
// The sync multiple driver
struct SyncMultipleDriver<'info, 'res, 'pgs, 'mcs> {
command_processor: Option<&'info dyn CommandProcessor>,
engines: &'info [&'info dyn SyncEngine],
storage_init: &'info Sync15StorageClientInit,
root_sync_key: &'info KeyBundle,
interruptee: &'info dyn Interruptee,
backoff: BackoffListener,
engines_to_state_change: Option<&'info HashMap<String, bool>>,
result: &'res mut SyncResult,
persisted_global_state: &'pgs mut Option<String>,
mem_cached_state: &'mcs mut MemoryCachedState,
ignore_soft_backoff: bool,
saw_auth_error: bool,
}
impl<'info, 'res, 'pgs, 'mcs> SyncMultipleDriver<'info, 'res, 'pgs, 'mcs> {
/// The actual worker for sync_multiple.
fn sync(mut self) -> result::Result<(), Error> {
log::info!("Loading/initializing persisted state");
let mut pgs = self.prepare_persisted_state();
log::info!("Preparing client info");
let client_info = self.prepare_client_info()?;
if self.was_interrupted() {
return Ok(());
}
log::info!("Entering sync state machine");
// Advance the state machine to the point where it can perform a full
// sync. This may involve uploading meta/global, crypto/keys etc.
let mut global_state = self.run_state_machine(&client_info, &mut pgs)?;
if self.was_interrupted() {
return Ok(());
}
// Set the service status to OK here - we may adjust it based on an individual
// engine failing.
self.result.service_status = ServiceStatus::Ok;
let clients_engine = if let Some(command_processor) = self.command_processor {
log::info!("Synchronizing clients engine");
let should_refresh = self.mem_cached_state.should_refresh_client();
let mut engine = clients_engine::Engine::new(command_processor, self.interruptee);
if let Err(e) = engine.sync(
&client_info.client,
&global_state,
self.root_sync_key,
should_refresh,
) {
// Record telemetry with the error just in case...
let mut telem_sync = telemetry::SyncTelemetry::new();
let mut telem_engine = telemetry::Engine::new("clients");
telem_engine.failure(&e);
telem_sync.engine(telem_engine);
self.result.service_status = ServiceStatus::from_err(&e);
// ...And bail, because a clients engine sync failure is fatal.
return Err(e);
}
// We don't record telemetry for successful clients engine
// syncs, since we only keep client records in memory, we
// expect the counts to be the same most times, and a
// failure aborts the entire sync.
if self.was_interrupted() {
return Ok(());
}
self.mem_cached_state.note_client_refresh();
Some(engine)
} else {
None
};
log::info!("Synchronizing engines");
let telem_sync =
self.sync_engines(&client_info, &mut global_state, clients_engine.as_ref());
self.result.telemetry.sync(telem_sync);
log::info!("Finished syncing engines.");
if !self.saw_auth_error {
log::trace!("Updating persisted global state");
self.mem_cached_state.last_client_info = Some(client_info);
self.mem_cached_state.last_global_state = Some(global_state);
}
Ok(())
}
fn was_interrupted(&mut self) -> bool {
if self.interruptee.was_interrupted() {
log::info!("Interrupted, bailing out");
self.result.service_status = ServiceStatus::Interrupted;
true
} else {
false
}
}
fn sync_engines(
&mut self,
client_info: &ClientInfo,
global_state: &mut GlobalState,
clients: Option<&clients_engine::Engine<'_>>,
) -> telemetry::SyncTelemetry {
let mut telem_sync = telemetry::SyncTelemetry::new();
for engine in self.engines {
let name = engine.collection_name();
if self
.backoff
.get_required_wait(self.ignore_soft_backoff)
.is_some()
{
log::warn!("Got backoff, bailing out of sync early");
break;
}
if global_state.global.declined.iter().any(|e| e == &*name) {
log::info!("The {} engine is declined. Skipping", name);
continue;
}
log::info!("Syncing {} engine!", name);
let mut telem_engine = telemetry::Engine::new(&*name);
let result = super::sync::synchronize_with_clients_engine(
&client_info.client,
global_state,
self.root_sync_key,
clients,
*engine,
true,
&mut telem_engine,
self.interruptee,
);
match result {
Ok(()) => log::info!("Sync of {} was successful!", name),
Err(ref e) => {
log::warn!("Sync of {} failed! {:?}", name, e);
let this_status = ServiceStatus::from_err(e);
// The only error which forces us to discard our state is an
// auth error.
self.saw_auth_error =
self.saw_auth_error || this_status == ServiceStatus::AuthenticationError;
telem_engine.failure(e);
// If the failure from the engine looks like anything other than
// a "engine error" we don't bother trying the others.
if this_status != ServiceStatus::OtherError {
telem_sync.engine(telem_engine);
self.result.engine_results.insert(name.into(), result);
self.result.service_status = this_status;
break;
}
}
}
telem_sync.engine(telem_engine);
self.result.engine_results.insert(name.into(), result);
if self.was_interrupted() {
break;
}
}
telem_sync
}
fn run_state_machine(
&mut self,
client_info: &ClientInfo,
pgs: &mut PersistedGlobalState,
) -> result::Result<GlobalState, Error> {
let last_state = self.mem_cached_state.last_global_state.take();
let mut state_machine = SetupStateMachine::for_full_sync(
&client_info.client,
self.root_sync_key,
pgs,
self.engines_to_state_change,
self.interruptee,
);
log::info!("Advancing state machine to ready (full)");
let res = state_machine.run_to_ready(last_state);
// Grab this now even though we don't need it until later to avoid a
// lifetime issue
let changes = state_machine.changes_needed.take();
// The state machine might have updated our persisted_global_state, so
// update the caller's repr of it.
*self.persisted_global_state = Some(serde_json::to_string(&pgs)?);
// Now that we've gone through the state machine, engine the declined list in
// the sync_result
self.result.declined = Some(pgs.get_declined().to_vec());
log::debug!(
"Declined engines list after state machine set to: {:?}",
self.result.declined,
);
if let Some(c) = changes {
self.wipe_or_reset_engines(c, &client_info.client)?;
}
let state = match res {
Err(e) => {
self.result.service_status = ServiceStatus::from_err(&e);
return Err(e);
}
Ok(state) => state,
};
self.result.telemetry.uid(client_info.client.hashed_uid()?);
// As for client_info, put None back now so we start from scratch on error.
self.mem_cached_state.last_global_state = None;
Ok(state)
}
fn wipe_or_reset_engines(
&mut self,
changes: EngineChangesNeeded,
client: &Sync15StorageClient,
) -> result::Result<(), Error> {
if changes.local_resets.is_empty() && changes.remote_wipes.is_empty() {
return Ok(());
}
for e in &changes.remote_wipes {
log::info!("Engine {:?} just got disabled locally, wiping server", e);
client.wipe_remote_engine(e)?;
}
for s in self.engines {
let name = s.collection_name();
if changes.local_resets.contains(&*name) {
log::info!("Resetting engine {}, as it was declined remotely", name);
s.reset(&EngineSyncAssociation::Disconnected)?;
}
}
Ok(())
}
fn prepare_client_info(&mut self) -> result::Result<ClientInfo, Error> {
let mut client_info = match self.mem_cached_state.last_client_info.take() {
Some(client_info) => {
// if our storage_init has changed it probably means the user has
// changed, courtesy of the 'kid' in the structure. Thus, we can't
// reuse the client or the memory cached state. We do keep the disk
// state as currently that's only the declined list.
if client_info.client_init != *self.storage_init {
log::info!("Discarding all state as the account might have changed");
*self.mem_cached_state = MemoryCachedState::default();
ClientInfo::new(self.storage_init)?
} else {
log::debug!("Reusing memory-cached client_info");
// we can reuse it (which should be the common path)
client_info
}
}
None => {
log::debug!("mem_cached_state was stale or missing, need setup");
// We almost certainly have no other state here, but to be safe, we
// throw away any memory state we do have.
self.mem_cached_state.clear_sensitive_info();
ClientInfo::new(self.storage_init)?
}
};
// Ensure we use the correct listener here rather than on all the branches
// above, since it seems less error prone.
client_info.client.backoff = self.backoff.clone();
Ok(client_info)
}
fn prepare_persisted_state(&mut self) -> PersistedGlobalState {
// Note that any failure to use a persisted state means we also decline
// to use our memory cached state, so that we fully rebuild that
// persisted state for next time.
match self.persisted_global_state {
Some(persisted_string) if !persisted_string.is_empty() => {
match serde_json::from_str::<PersistedGlobalState>(persisted_string) {
Ok(state) => {
log::trace!("Read persisted state: {:?}", state);
// Note that we don't set `result.declined` from the
// data in state - it remains None, which explicitly
// indicates "we don't have updated info".
state
}
_ => {
// Don't log the error since it might contain sensitive
// info (although currently it only contains the declined engines list)
error_support::report_error!(
"sync15-prepare-persisted-state",
"Failed to parse PersistedGlobalState from JSON! Falling back to default"
);
*self.mem_cached_state = MemoryCachedState::default();
PersistedGlobalState::default()
}
}
}
_ => {
log::info!(
"The application didn't give us persisted state - \
this is only expected on the very first run for a given user."
);
*self.mem_cached_state = MemoryCachedState::default();
PersistedGlobalState::default()
}
}
}
}