/*
|
* Copyright (C) 2018 The Android Open Source Project
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*/
|
|
package com.android.server;
|
|
import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig;
|
|
import static java.lang.annotation.RetentionPolicy.SOURCE;
|
|
import android.annotation.IntDef;
|
import android.annotation.Nullable;
|
import android.content.Context;
|
import android.content.pm.PackageManager;
|
import android.content.pm.VersionedPackage;
|
import android.net.NetworkStackClient;
|
import android.os.Environment;
|
import android.os.Handler;
|
import android.os.Looper;
|
import android.os.SystemClock;
|
import android.provider.DeviceConfig;
|
import android.text.TextUtils;
|
import android.util.ArrayMap;
|
import android.util.ArraySet;
|
import android.util.AtomicFile;
|
import android.util.Slog;
|
import android.util.Xml;
|
|
import com.android.internal.annotations.GuardedBy;
|
import com.android.internal.annotations.VisibleForTesting;
|
import com.android.internal.os.BackgroundThread;
|
import com.android.internal.util.FastXmlSerializer;
|
import com.android.internal.util.XmlUtils;
|
|
import libcore.io.IoUtils;
|
|
import org.xmlpull.v1.XmlPullParser;
|
import org.xmlpull.v1.XmlPullParserException;
|
import org.xmlpull.v1.XmlSerializer;
|
|
import java.io.File;
|
import java.io.FileNotFoundException;
|
import java.io.FileOutputStream;
|
import java.io.IOException;
|
import java.io.InputStream;
|
import java.lang.annotation.Retention;
|
import java.nio.charset.StandardCharsets;
|
import java.util.ArrayList;
|
import java.util.Collections;
|
import java.util.Iterator;
|
import java.util.List;
|
import java.util.Map;
|
import java.util.Set;
|
import java.util.concurrent.TimeUnit;
|
|
/**
|
* Monitors the health of packages on the system and notifies interested observers when packages
|
* fail. On failure, the registered observer with the least user impacting mitigation will
|
* be notified.
|
*/
|
public class PackageWatchdog {
|
private static final String TAG = "PackageWatchdog";
|
|
static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS =
|
"watchdog_trigger_failure_duration_millis";
|
static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT =
|
"watchdog_trigger_failure_count";
|
static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED =
|
"watchdog_explicit_health_check_enabled";
|
|
// Duration to count package failures before it resets to 0
|
private static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS =
|
(int) TimeUnit.MINUTES.toMillis(1);
|
// Number of package failures within the duration above before we notify observers
|
private static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;
|
// Whether explicit health checks are enabled or not
|
private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;
|
|
private static final int DB_VERSION = 1;
|
private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
|
private static final String TAG_PACKAGE = "package";
|
private static final String TAG_OBSERVER = "observer";
|
private static final String ATTR_VERSION = "version";
|
private static final String ATTR_NAME = "name";
|
private static final String ATTR_DURATION = "duration";
|
private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration";
|
private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check";
|
|
@GuardedBy("PackageWatchdog.class")
|
private static PackageWatchdog sPackageWatchdog;
|
|
private final Object mLock = new Object();
|
// System server context
|
private final Context mContext;
|
// Handler to run short running tasks
|
private final Handler mShortTaskHandler;
|
// Handler for processing IO and long running tasks
|
private final Handler mLongTaskHandler;
|
// Contains (observer-name -> observer-handle) that have ever been registered from
|
// previous boots. Observers with all packages expired are periodically pruned.
|
// It is saved to disk on system shutdown and repouplated on startup so it survives reboots.
|
@GuardedBy("mLock")
|
private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>();
|
// File containing the XML data of monitored packages /data/system/package-watchdog.xml
|
private final AtomicFile mPolicyFile;
|
private final ExplicitHealthCheckController mHealthCheckController;
|
private final NetworkStackClient mNetworkStackClient;
|
@GuardedBy("mLock")
|
private boolean mIsPackagesReady;
|
// Flag to control whether explicit health checks are supported or not
|
@GuardedBy("mLock")
|
private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED;
|
@GuardedBy("mLock")
|
private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
|
@GuardedBy("mLock")
|
private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
|
// SystemClock#uptimeMillis when we last executed #syncState
|
// 0 if no prune is scheduled.
|
@GuardedBy("mLock")
|
private long mUptimeAtLastStateSync;
|
|
private PackageWatchdog(Context context) {
|
// Needs to be constructed inline
|
this(context, new AtomicFile(
|
new File(new File(Environment.getDataDirectory(), "system"),
|
"package-watchdog.xml")),
|
new Handler(Looper.myLooper()), BackgroundThread.getHandler(),
|
new ExplicitHealthCheckController(context),
|
NetworkStackClient.getInstance());
|
}
|
|
/**
|
* Creates a PackageWatchdog that allows injecting dependencies.
|
*/
|
@VisibleForTesting
|
PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler,
|
Handler longTaskHandler, ExplicitHealthCheckController controller,
|
NetworkStackClient networkStackClient) {
|
mContext = context;
|
mPolicyFile = policyFile;
|
mShortTaskHandler = shortTaskHandler;
|
mLongTaskHandler = longTaskHandler;
|
mHealthCheckController = controller;
|
mNetworkStackClient = networkStackClient;
|
loadFromFile();
|
}
|
|
/** Creates or gets singleton instance of PackageWatchdog. */
|
public static PackageWatchdog getInstance(Context context) {
|
synchronized (PackageWatchdog.class) {
|
if (sPackageWatchdog == null) {
|
sPackageWatchdog = new PackageWatchdog(context);
|
}
|
return sPackageWatchdog;
|
}
|
}
|
|
/**
|
* Called during boot to notify when packages are ready on the device so we can start
|
* binding.
|
*/
|
public void onPackagesReady() {
|
synchronized (mLock) {
|
mIsPackagesReady = true;
|
mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName),
|
packages -> onSupportedPackages(packages),
|
() -> syncRequestsAsync());
|
setPropertyChangedListenerLocked();
|
updateConfigs();
|
registerNetworkStackHealthListener();
|
}
|
}
|
|
/**
|
* Registers {@code observer} to listen for package failures
|
*
|
* <p>Observers are expected to call this on boot. It does not specify any packages but
|
* it will resume observing any packages requested from a previous boot.
|
*/
|
public void registerHealthObserver(PackageHealthObserver observer) {
|
synchronized (mLock) {
|
ObserverInternal internalObserver = mAllObservers.get(observer.getName());
|
if (internalObserver != null) {
|
internalObserver.mRegisteredObserver = observer;
|
}
|
}
|
}
|
|
/**
|
* Starts observing the health of the {@code packages} for {@code observer} and notifies
|
* {@code observer} of any package failures within the monitoring duration.
|
*
|
* <p>If monitoring a package supporting explicit health check, at the end of the monitoring
|
* duration if {@link #onHealthCheckPassed} was never called,
|
* {@link PackageHealthObserver#execute} will be called as if the package failed.
|
*
|
* <p>If {@code observer} is already monitoring a package in {@code packageNames},
|
* the monitoring window of that package will be reset to {@code durationMs} and the health
|
* check state will be reset to a default depending on if the package is contained in
|
* {@link mPackagesWithExplicitHealthCheckEnabled}.
|
*
|
* @throws IllegalArgumentException if {@code packageNames} is empty
|
* or {@code durationMs} is less than 1
|
*/
|
public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames,
|
long durationMs) {
|
if (packageNames.isEmpty()) {
|
Slog.wtf(TAG, "No packages to observe, " + observer.getName());
|
return;
|
}
|
if (durationMs < 1) {
|
// TODO: Instead of failing, monitor for default? 48hrs?
|
throw new IllegalArgumentException("Invalid duration " + durationMs + "ms for observer "
|
+ observer.getName() + ". Not observing packages " + packageNames);
|
}
|
|
List<MonitoredPackage> packages = new ArrayList<>();
|
for (int i = 0; i < packageNames.size(); i++) {
|
// Health checks not available yet so health check state will start INACTIVE
|
packages.add(new MonitoredPackage(packageNames.get(i), durationMs, false));
|
}
|
|
// Sync before we add the new packages to the observers. This will #pruneObservers,
|
// causing any elapsed time to be deducted from all existing packages before we add new
|
// packages. This maintains the invariant that the elapsed time for ALL (new and existing)
|
// packages is the same.
|
syncState("observing new packages");
|
|
synchronized (mLock) {
|
ObserverInternal oldObserver = mAllObservers.get(observer.getName());
|
if (oldObserver == null) {
|
Slog.d(TAG, observer.getName() + " started monitoring health "
|
+ "of packages " + packageNames);
|
mAllObservers.put(observer.getName(),
|
new ObserverInternal(observer.getName(), packages));
|
} else {
|
Slog.d(TAG, observer.getName() + " added the following "
|
+ "packages to monitor " + packageNames);
|
oldObserver.updatePackagesLocked(packages);
|
}
|
}
|
|
// Register observer in case not already registered
|
registerHealthObserver(observer);
|
|
// Sync after we add the new packages to the observers. We may have received packges
|
// requiring an earlier schedule than we are currently scheduled for.
|
syncState("updated observers");
|
}
|
|
/**
|
* Unregisters {@code observer} from listening to package failure.
|
* Additionally, this stops observing any packages that may have previously been observed
|
* even from a previous boot.
|
*/
|
public void unregisterHealthObserver(PackageHealthObserver observer) {
|
synchronized (mLock) {
|
mAllObservers.remove(observer.getName());
|
}
|
syncState("unregistering observer: " + observer.getName());
|
}
|
|
/**
|
* Returns packages observed by {@code observer}
|
*
|
* @return an empty set if {@code observer} has some packages observerd from a previous boot
|
* but has not registered itself in the current boot to receive notifications. Returns null
|
* if there are no active packages monitored from any boot.
|
*/
|
@Nullable
|
public Set<String> getPackages(PackageHealthObserver observer) {
|
synchronized (mLock) {
|
for (int i = 0; i < mAllObservers.size(); i++) {
|
if (observer.getName().equals(mAllObservers.keyAt(i))) {
|
if (observer.equals(mAllObservers.valueAt(i).mRegisteredObserver)) {
|
return mAllObservers.valueAt(i).mPackages.keySet();
|
}
|
return Collections.emptySet();
|
}
|
}
|
}
|
return null;
|
}
|
|
/**
|
* Called when a process fails either due to a crash or ANR.
|
*
|
* <p>For each package contained in the process, one registered observer with the least user
|
* impact will be notified for mitigation.
|
*
|
* <p>This method could be called frequently if there is a severe problem on the device.
|
*/
|
public void onPackageFailure(List<VersionedPackage> packages) {
|
mLongTaskHandler.post(() -> {
|
synchronized (mLock) {
|
if (mAllObservers.isEmpty()) {
|
return;
|
}
|
|
for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
|
VersionedPackage versionedPackage = packages.get(pIndex);
|
// Observer that will receive failure for versionedPackage
|
PackageHealthObserver currentObserverToNotify = null;
|
int currentObserverImpact = Integer.MAX_VALUE;
|
|
// Find observer with least user impact
|
for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
|
ObserverInternal observer = mAllObservers.valueAt(oIndex);
|
PackageHealthObserver registeredObserver = observer.mRegisteredObserver;
|
if (registeredObserver != null
|
&& observer.onPackageFailureLocked(
|
versionedPackage.getPackageName())) {
|
int impact = registeredObserver.onHealthCheckFailed(versionedPackage);
|
if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
|
&& impact < currentObserverImpact) {
|
currentObserverToNotify = registeredObserver;
|
currentObserverImpact = impact;
|
}
|
}
|
}
|
|
// Execute action with least user impact
|
if (currentObserverToNotify != null) {
|
currentObserverToNotify.execute(versionedPackage);
|
}
|
}
|
}
|
});
|
}
|
|
// TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also
|
// avoid holding lock?
|
// This currently adds about 7ms extra to shutdown thread
|
/** Writes the package information to file during shutdown. */
|
public void writeNow() {
|
synchronized (mLock) {
|
// Must only run synchronous tasks as this runs on the ShutdownThread and no other
|
// thread is guaranteed to run during shutdown.
|
if (!mAllObservers.isEmpty()) {
|
mLongTaskHandler.removeCallbacks(this::saveToFileAsync);
|
pruneObserversLocked();
|
saveToFile();
|
Slog.i(TAG, "Last write to update package durations");
|
}
|
}
|
}
|
|
/**
|
* Enables or disables explicit health checks.
|
* <p> If explicit health checks are enabled, the health check service is started.
|
* <p> If explicit health checks are disabled, pending explicit health check requests are
|
* passed and the health check service is stopped.
|
*/
|
private void setExplicitHealthCheckEnabled(boolean enabled) {
|
synchronized (mLock) {
|
mIsHealthCheckEnabled = enabled;
|
mHealthCheckController.setEnabled(enabled);
|
// Prune to update internal state whenever health check is enabled/disabled
|
syncState("health check state " + (enabled ? "enabled" : "disabled"));
|
}
|
}
|
|
/** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */
|
@Retention(SOURCE)
|
@IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_NONE,
|
PackageHealthObserverImpact.USER_IMPACT_LOW,
|
PackageHealthObserverImpact.USER_IMPACT_MEDIUM,
|
PackageHealthObserverImpact.USER_IMPACT_HIGH})
|
public @interface PackageHealthObserverImpact {
|
/** No action to take. */
|
int USER_IMPACT_NONE = 0;
|
/* Action has low user impact, user of a device will barely notice. */
|
int USER_IMPACT_LOW = 1;
|
/* Action has medium user impact, user of a device will likely notice. */
|
int USER_IMPACT_MEDIUM = 3;
|
/* Action has high user impact, a last resort, user of a device will be very frustrated. */
|
int USER_IMPACT_HIGH = 5;
|
}
|
|
/** Register instances of this interface to receive notifications on package failure. */
|
public interface PackageHealthObserver {
|
/**
|
* Called when health check fails for the {@code versionedPackage}.
|
*
|
* @return any one of {@link PackageHealthObserverImpact} to express the impact
|
* to the user on {@link #execute}
|
*/
|
@PackageHealthObserverImpact int onHealthCheckFailed(VersionedPackage versionedPackage);
|
|
/**
|
* Executes mitigation for {@link #onHealthCheckFailed}.
|
*
|
* @return {@code true} if action was executed successfully, {@code false} otherwise
|
*/
|
boolean execute(VersionedPackage versionedPackage);
|
|
// TODO(b/120598832): Ensure uniqueness?
|
/**
|
* Identifier for the observer, should not change across device updates otherwise the
|
* watchdog may drop observing packages with the old name.
|
*/
|
String getName();
|
}
|
|
long getTriggerFailureCount() {
|
synchronized (mLock) {
|
return mTriggerFailureCount;
|
}
|
}
|
|
/**
|
* Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}.
|
*/
|
private void syncRequestsAsync() {
|
mShortTaskHandler.removeCallbacks(this::syncRequests);
|
mShortTaskHandler.post(this::syncRequests);
|
}
|
|
/**
|
* Syncs health check requests with the {@link ExplicitHealthCheckController}.
|
* Calls to this must be serialized.
|
*
|
* @see #syncRequestsAsync
|
*/
|
private void syncRequests() {
|
Set<String> packages = null;
|
synchronized (mLock) {
|
if (mIsPackagesReady) {
|
packages = getPackagesPendingHealthChecksLocked();
|
} // else, we will sync requests when packages become ready
|
}
|
|
// Call outside lock to avoid holding lock when calling into the controller.
|
if (packages != null) {
|
Slog.i(TAG, "Syncing health check requests for packages: " + packages);
|
mHealthCheckController.syncRequests(packages);
|
}
|
}
|
|
/**
|
* Updates the observers monitoring {@code packageName} that explicit health check has passed.
|
*
|
* <p> This update is strictly for registered observers at the time of the call
|
* Observers that register after this signal will have no knowledge of prior signals and will
|
* effectively behave as if the explicit health check hasn't passed for {@code packageName}.
|
*
|
* <p> {@code packageName} can still be considered failed if reported by
|
* {@link #onPackageFailureLocked} before the package expires.
|
*
|
* <p> Triggered by components outside the system server when they are fully functional after an
|
* update.
|
*/
|
private void onHealthCheckPassed(String packageName) {
|
Slog.i(TAG, "Health check passed for package: " + packageName);
|
boolean isStateChanged = false;
|
|
synchronized (mLock) {
|
for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) {
|
ObserverInternal observer = mAllObservers.valueAt(observerIdx);
|
MonitoredPackage monitoredPackage = observer.mPackages.get(packageName);
|
|
if (monitoredPackage != null) {
|
int oldState = monitoredPackage.getHealthCheckStateLocked();
|
int newState = monitoredPackage.tryPassHealthCheckLocked();
|
isStateChanged |= oldState != newState;
|
}
|
}
|
}
|
|
if (isStateChanged) {
|
syncState("health check passed for " + packageName);
|
}
|
}
|
|
private void onSupportedPackages(List<PackageConfig> supportedPackages) {
|
boolean isStateChanged = false;
|
|
Map<String, Long> supportedPackageTimeouts = new ArrayMap<>();
|
Iterator<PackageConfig> it = supportedPackages.iterator();
|
while (it.hasNext()) {
|
PackageConfig info = it.next();
|
supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis());
|
}
|
|
synchronized (mLock) {
|
Slog.d(TAG, "Received supported packages " + supportedPackages);
|
Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
|
while (oit.hasNext()) {
|
Iterator<MonitoredPackage> pit = oit.next().mPackages.values().iterator();
|
while (pit.hasNext()) {
|
MonitoredPackage monitoredPackage = pit.next();
|
String packageName = monitoredPackage.getName();
|
int oldState = monitoredPackage.getHealthCheckStateLocked();
|
int newState;
|
|
if (supportedPackageTimeouts.containsKey(packageName)) {
|
// Supported packages become ACTIVE if currently INACTIVE
|
newState = monitoredPackage.setHealthCheckActiveLocked(
|
supportedPackageTimeouts.get(packageName));
|
} else {
|
// Unsupported packages are marked as PASSED unless already FAILED
|
newState = monitoredPackage.tryPassHealthCheckLocked();
|
}
|
isStateChanged |= oldState != newState;
|
}
|
}
|
}
|
|
if (isStateChanged) {
|
syncState("updated health check supported packages " + supportedPackages);
|
}
|
}
|
|
@GuardedBy("mLock")
|
private Set<String> getPackagesPendingHealthChecksLocked() {
|
Slog.d(TAG, "Getting all observed packages pending health checks");
|
Set<String> packages = new ArraySet<>();
|
Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
|
while (oit.hasNext()) {
|
ObserverInternal observer = oit.next();
|
Iterator<MonitoredPackage> pit =
|
observer.mPackages.values().iterator();
|
while (pit.hasNext()) {
|
MonitoredPackage monitoredPackage = pit.next();
|
String packageName = monitoredPackage.getName();
|
if (monitoredPackage.isPendingHealthChecksLocked()) {
|
packages.add(packageName);
|
}
|
}
|
}
|
return packages;
|
}
|
|
/**
|
* Syncs the state of the observers.
|
*
|
* <p> Prunes all observers, saves new state to disk, syncs health check requests with the
|
* health check service and schedules the next state sync.
|
*/
|
private void syncState(String reason) {
|
synchronized (mLock) {
|
Slog.i(TAG, "Syncing state, reason: " + reason);
|
pruneObserversLocked();
|
|
saveToFileAsync();
|
syncRequestsAsync();
|
|
// Done syncing state, schedule the next state sync
|
scheduleNextSyncStateLocked();
|
}
|
}
|
|
private void syncStateWithScheduledReason() {
|
syncState("scheduled");
|
}
|
|
@GuardedBy("mLock")
|
private void scheduleNextSyncStateLocked() {
|
long durationMs = getNextStateSyncMillisLocked();
|
mShortTaskHandler.removeCallbacks(this::syncStateWithScheduledReason);
|
if (durationMs == Long.MAX_VALUE) {
|
Slog.i(TAG, "Cancelling state sync, nothing to sync");
|
mUptimeAtLastStateSync = 0;
|
} else {
|
Slog.i(TAG, "Scheduling next state sync in " + durationMs + "ms");
|
mUptimeAtLastStateSync = SystemClock.uptimeMillis();
|
mShortTaskHandler.postDelayed(this::syncStateWithScheduledReason, durationMs);
|
}
|
}
|
|
/**
|
* Returns the next duration in millis to sync the watchdog state.
|
*
|
* @returns Long#MAX_VALUE if there are no observed packages.
|
*/
|
@GuardedBy("mLock")
|
private long getNextStateSyncMillisLocked() {
|
long shortestDurationMs = Long.MAX_VALUE;
|
for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
|
ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex).mPackages;
|
for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
|
MonitoredPackage mp = packages.valueAt(pIndex);
|
long duration = mp.getShortestScheduleDurationMsLocked();
|
if (duration < shortestDurationMs) {
|
shortestDurationMs = duration;
|
}
|
}
|
}
|
return shortestDurationMs;
|
}
|
|
/**
|
* Removes {@code elapsedMs} milliseconds from all durations on monitored packages
|
* and updates other internal state.
|
*/
|
@GuardedBy("mLock")
|
private void pruneObserversLocked() {
|
long elapsedMs = mUptimeAtLastStateSync == 0
|
? 0 : SystemClock.uptimeMillis() - mUptimeAtLastStateSync;
|
if (elapsedMs <= 0) {
|
Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms");
|
return;
|
}
|
|
Slog.i(TAG, "Removing " + elapsedMs + "ms from all packages on all observers");
|
Iterator<ObserverInternal> it = mAllObservers.values().iterator();
|
while (it.hasNext()) {
|
ObserverInternal observer = it.next();
|
Set<MonitoredPackage> failedPackages =
|
observer.prunePackagesLocked(elapsedMs);
|
if (!failedPackages.isEmpty()) {
|
onHealthCheckFailed(observer, failedPackages);
|
}
|
if (observer.mPackages.isEmpty()) {
|
Slog.i(TAG, "Discarding observer " + observer.mName + ". All packages expired");
|
it.remove();
|
}
|
}
|
}
|
|
private void onHealthCheckFailed(ObserverInternal observer,
|
Set<MonitoredPackage> failedPackages) {
|
mLongTaskHandler.post(() -> {
|
synchronized (mLock) {
|
PackageHealthObserver registeredObserver = observer.mRegisteredObserver;
|
if (registeredObserver != null) {
|
Iterator<MonitoredPackage> it = failedPackages.iterator();
|
while (it.hasNext()) {
|
String failedPackage = it.next().getName();
|
Slog.i(TAG, "Explicit health check failed for package " + failedPackage);
|
VersionedPackage versionedPkg = getVersionedPackage(failedPackage);
|
if (versionedPkg == null) {
|
Slog.w(TAG, "Explicit health check failed but could not find package "
|
+ failedPackage);
|
// TODO(b/120598832): Skip. We only continue to pass tests for now since
|
// the tests don't install any packages
|
versionedPkg = new VersionedPackage(failedPackage, 0L);
|
}
|
registeredObserver.execute(versionedPkg);
|
}
|
}
|
}
|
});
|
}
|
|
@Nullable
|
private VersionedPackage getVersionedPackage(String packageName) {
|
final PackageManager pm = mContext.getPackageManager();
|
if (pm == null) {
|
return null;
|
}
|
try {
|
final long versionCode = pm.getPackageInfo(
|
packageName, 0 /* flags */).getLongVersionCode();
|
return new VersionedPackage(packageName, versionCode);
|
} catch (PackageManager.NameNotFoundException e) {
|
return null;
|
}
|
}
|
|
/**
|
* Loads mAllObservers from file.
|
*
|
* <p>Note that this is <b>not</b> thread safe and should only called be called
|
* from the constructor.
|
*/
|
private void loadFromFile() {
|
InputStream infile = null;
|
mAllObservers.clear();
|
try {
|
infile = mPolicyFile.openRead();
|
final XmlPullParser parser = Xml.newPullParser();
|
parser.setInput(infile, StandardCharsets.UTF_8.name());
|
XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG);
|
int outerDepth = parser.getDepth();
|
while (XmlUtils.nextElementWithin(parser, outerDepth)) {
|
ObserverInternal observer = ObserverInternal.read(parser, this);
|
if (observer != null) {
|
mAllObservers.put(observer.mName, observer);
|
}
|
}
|
} catch (FileNotFoundException e) {
|
// Nothing to monitor
|
} catch (IOException | NumberFormatException | XmlPullParserException e) {
|
Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e);
|
mPolicyFile.delete();
|
} finally {
|
IoUtils.closeQuietly(infile);
|
}
|
}
|
|
/** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */
|
private void setPropertyChangedListenerLocked() {
|
DeviceConfig.addOnPropertiesChangedListener(
|
DeviceConfig.NAMESPACE_ROLLBACK,
|
mContext.getMainExecutor(),
|
(properties) -> {
|
if (!DeviceConfig.NAMESPACE_ROLLBACK.equals(properties.getNamespace())) {
|
return;
|
}
|
updateConfigs();
|
});
|
}
|
|
/**
|
* Health check is enabled or disabled after reading the flags
|
* from DeviceConfig.
|
*/
|
private void updateConfigs() {
|
synchronized (mLock) {
|
mTriggerFailureCount = DeviceConfig.getInt(
|
DeviceConfig.NAMESPACE_ROLLBACK,
|
PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT,
|
DEFAULT_TRIGGER_FAILURE_COUNT);
|
if (mTriggerFailureCount <= 0) {
|
mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
|
}
|
|
mTriggerFailureDurationMs = DeviceConfig.getInt(
|
DeviceConfig.NAMESPACE_ROLLBACK,
|
PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS,
|
DEFAULT_TRIGGER_FAILURE_DURATION_MS);
|
if (mTriggerFailureDurationMs <= 0) {
|
mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_COUNT;
|
}
|
|
setExplicitHealthCheckEnabled(DeviceConfig.getBoolean(
|
DeviceConfig.NAMESPACE_ROLLBACK,
|
PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED,
|
DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED));
|
}
|
}
|
|
private void registerNetworkStackHealthListener() {
|
// TODO: have an internal method to trigger a rollback by reporting high severity errors,
|
// and rely on ActivityManager to inform the watchdog of severe network stack crashes
|
// instead of having this listener in parallel.
|
mNetworkStackClient.registerHealthListener(
|
packageName -> {
|
final VersionedPackage pkg = getVersionedPackage(packageName);
|
if (pkg == null) {
|
Slog.wtf(TAG, "NetworkStack failed but could not find its package");
|
return;
|
}
|
// This is a severe failure and recovery should be attempted immediately.
|
// TODO: have a better way to handle such failures.
|
final List<VersionedPackage> pkgList = Collections.singletonList(pkg);
|
final long failureCount = getTriggerFailureCount();
|
for (int i = 0; i < failureCount; i++) {
|
onPackageFailure(pkgList);
|
}
|
});
|
}
|
|
/**
|
* Persists mAllObservers to file. Threshold information is ignored.
|
*/
|
private boolean saveToFile() {
|
Slog.i(TAG, "Saving observer state to file");
|
synchronized (mLock) {
|
FileOutputStream stream;
|
try {
|
stream = mPolicyFile.startWrite();
|
} catch (IOException e) {
|
Slog.w(TAG, "Cannot update monitored packages", e);
|
return false;
|
}
|
|
try {
|
XmlSerializer out = new FastXmlSerializer();
|
out.setOutput(stream, StandardCharsets.UTF_8.name());
|
out.startDocument(null, true);
|
out.startTag(null, TAG_PACKAGE_WATCHDOG);
|
out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION));
|
for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
|
mAllObservers.valueAt(oIndex).writeLocked(out);
|
}
|
out.endTag(null, TAG_PACKAGE_WATCHDOG);
|
out.endDocument();
|
mPolicyFile.finishWrite(stream);
|
return true;
|
} catch (IOException e) {
|
Slog.w(TAG, "Failed to save monitored packages, restoring backup", e);
|
mPolicyFile.failWrite(stream);
|
return false;
|
} finally {
|
IoUtils.closeQuietly(stream);
|
}
|
}
|
}
|
|
private void saveToFileAsync() {
|
if (!mLongTaskHandler.hasCallbacks(this::saveToFile)) {
|
mLongTaskHandler.post(this::saveToFile);
|
}
|
}
|
|
/**
|
* Represents an observer monitoring a set of packages along with the failure thresholds for
|
* each package.
|
*
|
* <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
|
* instances of this class.
|
*/
|
//TODO(b/120598832): Remove 'm' from non-private fields
|
private static class ObserverInternal {
|
public final String mName;
|
//TODO(b/120598832): Add getter for mPackages
|
@GuardedBy("mLock")
|
public final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>();
|
@Nullable
|
@GuardedBy("mLock")
|
public PackageHealthObserver mRegisteredObserver;
|
|
ObserverInternal(String name, List<MonitoredPackage> packages) {
|
mName = name;
|
updatePackagesLocked(packages);
|
}
|
|
/**
|
* Writes important {@link MonitoredPackage} details for this observer to file.
|
* Does not persist any package failure thresholds.
|
*/
|
@GuardedBy("mLock")
|
public boolean writeLocked(XmlSerializer out) {
|
try {
|
out.startTag(null, TAG_OBSERVER);
|
out.attribute(null, ATTR_NAME, mName);
|
for (int i = 0; i < mPackages.size(); i++) {
|
MonitoredPackage p = mPackages.valueAt(i);
|
p.writeLocked(out);
|
}
|
out.endTag(null, TAG_OBSERVER);
|
return true;
|
} catch (IOException e) {
|
Slog.w(TAG, "Cannot save observer", e);
|
return false;
|
}
|
}
|
|
@GuardedBy("mLock")
|
public void updatePackagesLocked(List<MonitoredPackage> packages) {
|
for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
|
MonitoredPackage p = packages.get(pIndex);
|
mPackages.put(p.mName, p);
|
}
|
}
|
|
/**
|
* Reduces the monitoring durations of all packages observed by this observer by
|
* {@code elapsedMs}. If any duration is less than 0, the package is removed from
|
* observation. If any health check duration is less than 0, the health check result
|
* is evaluated.
|
*
|
* @return a {@link Set} of packages that were removed from the observer without explicit
|
* health check passing, or an empty list if no package expired for which an explicit health
|
* check was still pending
|
*/
|
@GuardedBy("mLock")
|
private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) {
|
Set<MonitoredPackage> failedPackages = new ArraySet<>();
|
Iterator<MonitoredPackage> it = mPackages.values().iterator();
|
while (it.hasNext()) {
|
MonitoredPackage p = it.next();
|
int oldState = p.getHealthCheckStateLocked();
|
int newState = p.handleElapsedTimeLocked(elapsedMs);
|
if (oldState != MonitoredPackage.STATE_FAILED
|
&& newState == MonitoredPackage.STATE_FAILED) {
|
Slog.i(TAG, "Package " + p.mName + " failed health check");
|
failedPackages.add(p);
|
}
|
if (p.isExpiredLocked()) {
|
it.remove();
|
}
|
}
|
return failedPackages;
|
}
|
|
/**
|
* Increments failure counts of {@code packageName}.
|
* @returns {@code true} if failure threshold is exceeded, {@code false} otherwise
|
*/
|
@GuardedBy("mLock")
|
public boolean onPackageFailureLocked(String packageName) {
|
MonitoredPackage p = mPackages.get(packageName);
|
if (p != null) {
|
return p.onFailureLocked();
|
}
|
return false;
|
}
|
|
/**
|
* Returns one ObserverInternal from the {@code parser} and advances its state.
|
*
|
* <p>Note that this method is <b>not</b> thread safe. It should only be called from
|
* #loadFromFile which in turn is only called on construction of the
|
* singleton PackageWatchdog.
|
**/
|
public static ObserverInternal read(XmlPullParser parser, PackageWatchdog watchdog) {
|
String observerName = null;
|
if (TAG_OBSERVER.equals(parser.getName())) {
|
observerName = parser.getAttributeValue(null, ATTR_NAME);
|
if (TextUtils.isEmpty(observerName)) {
|
Slog.wtf(TAG, "Unable to read observer name");
|
return null;
|
}
|
}
|
List<MonitoredPackage> packages = new ArrayList<>();
|
int innerDepth = parser.getDepth();
|
try {
|
while (XmlUtils.nextElementWithin(parser, innerDepth)) {
|
if (TAG_PACKAGE.equals(parser.getName())) {
|
try {
|
String packageName = parser.getAttributeValue(null, ATTR_NAME);
|
long duration = Long.parseLong(
|
parser.getAttributeValue(null, ATTR_DURATION));
|
long healthCheckDuration = Long.parseLong(
|
parser.getAttributeValue(null,
|
ATTR_EXPLICIT_HEALTH_CHECK_DURATION));
|
boolean hasPassedHealthCheck = Boolean.parseBoolean(
|
parser.getAttributeValue(null, ATTR_PASSED_HEALTH_CHECK));
|
if (!TextUtils.isEmpty(packageName)) {
|
packages.add(watchdog.new MonitoredPackage(packageName, duration,
|
healthCheckDuration, hasPassedHealthCheck));
|
}
|
} catch (NumberFormatException e) {
|
Slog.wtf(TAG, "Skipping package for observer " + observerName, e);
|
continue;
|
}
|
}
|
}
|
} catch (XmlPullParserException | IOException e) {
|
Slog.wtf(TAG, "Unable to read observer " + observerName, e);
|
return null;
|
}
|
if (packages.isEmpty()) {
|
return null;
|
}
|
return new ObserverInternal(observerName, packages);
|
}
|
}
|
|
/**
|
* Represents a package and its health check state along with the time
|
* it should be monitored for.
|
*
|
* <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
|
* instances of this class.
|
*/
|
class MonitoredPackage {
|
// Health check states
|
// TODO(b/120598832): Prefix with HEALTH_CHECK
|
// mName has not passed health check but has requested a health check
|
public static final int STATE_ACTIVE = 0;
|
// mName has not passed health check and has not requested a health check
|
public static final int STATE_INACTIVE = 1;
|
// mName has passed health check
|
public static final int STATE_PASSED = 2;
|
// mName has failed health check
|
public static final int STATE_FAILED = 3;
|
|
//TODO(b/120598832): VersionedPackage?
|
private final String mName;
|
// One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after
|
// methods that could change the health check state: handleElapsedTimeLocked and
|
// tryPassHealthCheckLocked
|
private int mHealthCheckState = STATE_INACTIVE;
|
// Whether an explicit health check has passed.
|
// This value in addition with mHealthCheckDurationMs determines the health check state
|
// of the package, see #getHealthCheckStateLocked
|
@GuardedBy("mLock")
|
private boolean mHasPassedHealthCheck;
|
// System uptime duration to monitor package.
|
@GuardedBy("mLock")
|
private long mDurationMs;
|
// System uptime duration to check the result of an explicit health check
|
// Initially, MAX_VALUE until we get a value from the health check service
|
// and request health checks.
|
// This value in addition with mHasPassedHealthCheck determines the health check state
|
// of the package, see #getHealthCheckStateLocked
|
@GuardedBy("mLock")
|
private long mHealthCheckDurationMs = Long.MAX_VALUE;
|
// System uptime of first package failure
|
@GuardedBy("mLock")
|
private long mUptimeStartMs;
|
// Number of failures since mUptimeStartMs
|
@GuardedBy("mLock")
|
private int mFailures;
|
|
MonitoredPackage(String name, long durationMs, boolean hasPassedHealthCheck) {
|
this(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck);
|
}
|
|
MonitoredPackage(String name, long durationMs, long healthCheckDurationMs,
|
boolean hasPassedHealthCheck) {
|
mName = name;
|
mDurationMs = durationMs;
|
mHealthCheckDurationMs = healthCheckDurationMs;
|
mHasPassedHealthCheck = hasPassedHealthCheck;
|
updateHealthCheckStateLocked();
|
}
|
|
/** Writes the salient fields to disk using {@code out}. */
|
@GuardedBy("mLock")
|
public void writeLocked(XmlSerializer out) throws IOException {
|
out.startTag(null, TAG_PACKAGE);
|
out.attribute(null, ATTR_NAME, mName);
|
out.attribute(null, ATTR_DURATION, String.valueOf(mDurationMs));
|
out.attribute(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION,
|
String.valueOf(mHealthCheckDurationMs));
|
out.attribute(null, ATTR_PASSED_HEALTH_CHECK,
|
String.valueOf(mHasPassedHealthCheck));
|
out.endTag(null, TAG_PACKAGE);
|
}
|
|
/**
|
* Increment package failures or resets failure count depending on the last package failure.
|
*
|
* @return {@code true} if failure count exceeds a threshold, {@code false} otherwise
|
*/
|
@GuardedBy("mLock")
|
public boolean onFailureLocked() {
|
final long now = SystemClock.uptimeMillis();
|
final long duration = now - mUptimeStartMs;
|
if (duration > mTriggerFailureDurationMs) {
|
// TODO(b/120598832): Reseting to 1 is not correct
|
// because there may be more than 1 failure in the last trigger window from now
|
// This is the RescueParty impl, will leave for now
|
mFailures = 1;
|
mUptimeStartMs = now;
|
} else {
|
mFailures++;
|
}
|
boolean failed = mFailures >= mTriggerFailureCount;
|
if (failed) {
|
mFailures = 0;
|
}
|
return failed;
|
}
|
|
/**
|
* Sets the initial health check duration.
|
*
|
* @return the new health check state
|
*/
|
@GuardedBy("mLock")
|
public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) {
|
if (initialHealthCheckDurationMs <= 0) {
|
Slog.wtf(TAG, "Cannot set non-positive health check duration "
|
+ initialHealthCheckDurationMs + "ms for package " + mName
|
+ ". Using total duration " + mDurationMs + "ms instead");
|
initialHealthCheckDurationMs = mDurationMs;
|
}
|
if (mHealthCheckState == STATE_INACTIVE) {
|
// Transitions to ACTIVE
|
mHealthCheckDurationMs = initialHealthCheckDurationMs;
|
}
|
return updateHealthCheckStateLocked();
|
}
|
|
/**
|
* Updates the monitoring durations of the package.
|
*
|
* @return the new health check state
|
*/
|
@GuardedBy("mLock")
|
public int handleElapsedTimeLocked(long elapsedMs) {
|
if (elapsedMs <= 0) {
|
Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + mName);
|
return mHealthCheckState;
|
}
|
// Transitions to FAILED if now <= 0 and health check not passed
|
mDurationMs -= elapsedMs;
|
if (mHealthCheckState == STATE_ACTIVE) {
|
// We only update health check durations if we have #setHealthCheckActiveLocked
|
// This ensures we don't leave the INACTIVE state for an unexpected elapsed time
|
// Transitions to FAILED if now <= 0 and health check not passed
|
mHealthCheckDurationMs -= elapsedMs;
|
}
|
return updateHealthCheckStateLocked();
|
}
|
|
/**
|
* Marks the health check as passed and transitions to {@link #STATE_PASSED}
|
* if not yet {@link #STATE_FAILED}.
|
*
|
* @return the new health check state
|
*/
|
@GuardedBy("mLock")
|
public int tryPassHealthCheckLocked() {
|
if (mHealthCheckState != STATE_FAILED) {
|
// FAILED is a final state so only pass if we haven't failed
|
// Transition to PASSED
|
mHasPassedHealthCheck = true;
|
}
|
return updateHealthCheckStateLocked();
|
}
|
|
/** Returns the monitored package name. */
|
private String getName() {
|
return mName;
|
}
|
|
//TODO(b/120598832): IntDef
|
/**
|
* Returns the current health check state, any of {@link #STATE_ACTIVE},
|
* {@link #STATE_INACTIVE} or {@link #STATE_PASSED}
|
*/
|
@GuardedBy("mLock")
|
public int getHealthCheckStateLocked() {
|
return mHealthCheckState;
|
}
|
|
/**
|
* Returns the shortest duration before the package should be scheduled for a prune.
|
*
|
* @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled
|
*/
|
@GuardedBy("mLock")
|
public long getShortestScheduleDurationMsLocked() {
|
// Consider health check duration only if #isPendingHealthChecksLocked is true
|
return Math.min(toPositive(mDurationMs),
|
isPendingHealthChecksLocked()
|
? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE);
|
}
|
|
/**
|
* Returns {@code true} if the total duration left to monitor the package is less than or
|
* equal to 0 {@code false} otherwise.
|
*/
|
@GuardedBy("mLock")
|
public boolean isExpiredLocked() {
|
return mDurationMs <= 0;
|
}
|
|
/**
|
* Returns {@code true} if the package, {@link #getName} is expecting health check results
|
* {@code false} otherwise.
|
*/
|
@GuardedBy("mLock")
|
public boolean isPendingHealthChecksLocked() {
|
return mHealthCheckState == STATE_ACTIVE || mHealthCheckState == STATE_INACTIVE;
|
}
|
|
/**
|
* Updates the health check state based on {@link #mHasPassedHealthCheck}
|
* and {@link #mHealthCheckDurationMs}.
|
*
|
* @return the new health check state
|
*/
|
@GuardedBy("mLock")
|
private int updateHealthCheckStateLocked() {
|
int oldState = mHealthCheckState;
|
if (mHasPassedHealthCheck) {
|
// Set final state first to avoid ambiguity
|
mHealthCheckState = STATE_PASSED;
|
} else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) {
|
// Set final state first to avoid ambiguity
|
mHealthCheckState = STATE_FAILED;
|
} else if (mHealthCheckDurationMs == Long.MAX_VALUE) {
|
mHealthCheckState = STATE_INACTIVE;
|
} else {
|
mHealthCheckState = STATE_ACTIVE;
|
}
|
Slog.i(TAG, "Updated health check state for package " + mName + ": "
|
+ toString(oldState) + " -> " + toString(mHealthCheckState));
|
return mHealthCheckState;
|
}
|
|
/** Returns a {@link String} representation of the current health check state. */
|
private String toString(int state) {
|
switch (state) {
|
case STATE_ACTIVE:
|
return "ACTIVE";
|
case STATE_INACTIVE:
|
return "INACTIVE";
|
case STATE_PASSED:
|
return "PASSED";
|
case STATE_FAILED:
|
return "FAILED";
|
default:
|
return "UNKNOWN";
|
}
|
}
|
|
/** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */
|
private long toPositive(long value) {
|
return value > 0 ? value : Long.MAX_VALUE;
|
}
|
}
|
}
|