# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
|
# Use of this source code is governed by a BSD-style license that can be
|
# found in the LICENSE file.
|
|
import glob
|
import logging
|
import os
|
import re
|
import urllib2
|
import urlparse
|
|
from autotest_lib.client.bin import utils
|
from autotest_lib.client.common_lib import error, global_config
|
from autotest_lib.client.common_lib.cros import dev_server
|
from autotest_lib.server import autotest
|
from autotest_lib.server import utils as server_utils
|
from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
|
from autotest_lib.server.cros.dynamic_suite import tools
|
from chromite.lib import retry_util
|
|
try:
|
from chromite.lib import metrics
|
except ImportError:
|
metrics = utils.metrics_mock
|
|
|
def _metric_name(base_name):
|
return 'chromeos/autotest/provision/' + base_name
|
|
|
# Local stateful update path is relative to the CrOS source directory.
|
UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
|
UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
|
# A list of update engine client states that occur after an update is triggered.
|
UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
|
'UPDATE_STATUS_UPDATE_AVAILABLE',
|
'UPDATE_STATUS_DOWNLOADING',
|
'UPDATE_STATUS_FINALIZING']
|
|
|
_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
|
_QUICK_PROVISION_SCRIPT = 'quick-provision'
|
|
_UPDATER_BIN = '/usr/bin/update_engine_client'
|
_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
|
|
_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
|
_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
|
|
# Time to wait for new kernel to be marked successful after
|
# auto update.
|
_KERNEL_UPDATE_TIMEOUT = 120
|
|
|
# PROVISION_FAILED - A flag file to indicate provision failures. The
|
# file is created at the start of any AU procedure (see
|
# `ChromiumOSUpdater._prepare_host()`). The file's location in
|
# stateful means that on successul update it will be removed. Thus, if
|
# this file exists, it indicates that we've tried and failed in a
|
# previous attempt to update.
|
PROVISION_FAILED = '/var/tmp/provision_failed'
|
|
|
# A flag file used to enable special handling in lab DUTs. Some
|
# parts of the system in Chromium OS test images will behave in ways
|
# convenient to the test lab when this file is present. Generally,
|
# we create this immediately after any update completes.
|
_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
|
|
|
# _TARGET_VERSION - A file containing the new version to which we plan
|
# to update. This file is used by the CrOS shutdown code to detect and
|
# handle certain version downgrade cases. Specifically: Downgrading
|
# may trigger an unwanted powerwash in the target build when the
|
# following conditions are met:
|
# * Source build is a v4.4 kernel with R69-10756.0.0 or later.
|
# * Target build predates the R69-10756.0.0 cutoff.
|
# When this file is present and indicates a downgrade, the OS shutdown
|
# code on the DUT knows how to prevent the powerwash.
|
_TARGET_VERSION = '/run/update_target_version'
|
|
|
# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
|
# when the Host.reboot() method fails. The source of this text comes
|
# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
|
|
_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
|
|
|
class RootFSUpdateError(error.TestFail):
|
"""Raised when the RootFS fails to update."""
|
|
|
class StatefulUpdateError(error.TestFail):
|
"""Raised when the stateful partition fails to update."""
|
|
|
class _AttributedUpdateError(error.TestFail):
|
"""Update failure with an attributed cause."""
|
|
def __init__(self, attribution, msg):
|
super(_AttributedUpdateError, self).__init__(
|
'%s: %s' % (attribution, msg))
|
self._message = msg
|
|
def _classify(self):
|
for err_pattern, classification in self._CLASSIFIERS:
|
if re.match(err_pattern, self._message):
|
return classification
|
return None
|
|
@property
|
def failure_summary(self):
|
"""Summarize this error for metrics reporting."""
|
classification = self._classify()
|
if classification:
|
return '%s: %s' % (self._SUMMARY, classification)
|
else:
|
return self._SUMMARY
|
|
|
class HostUpdateError(_AttributedUpdateError):
|
"""Failure updating a DUT attributable to the DUT.
|
|
This class of exception should be raised when the most likely cause
|
of failure was a condition existing on the DUT prior to the update,
|
such as a hardware problem, or a bug in the software on the DUT.
|
"""
|
|
DUT_DOWN = 'No answer to ssh'
|
|
_SUMMARY = 'DUT failed prior to update'
|
_CLASSIFIERS = [
|
(DUT_DOWN, DUT_DOWN),
|
(_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
|
]
|
|
def __init__(self, hostname, msg):
|
super(HostUpdateError, self).__init__(
|
'Error on %s prior to update' % hostname, msg)
|
|
|
class DevServerError(_AttributedUpdateError):
|
"""Failure updating a DUT attributable to the devserver.
|
|
This class of exception should be raised when the most likely cause
|
of failure was the devserver serving the target image for update.
|
"""
|
|
_SUMMARY = 'Devserver failed prior to update'
|
_CLASSIFIERS = []
|
|
def __init__(self, devserver, msg):
|
super(DevServerError, self).__init__(
|
'Devserver error on %s' % devserver, msg)
|
|
|
class ImageInstallError(_AttributedUpdateError):
|
"""Failure updating a DUT when installing from the devserver.
|
|
This class of exception should be raised when the target DUT fails
|
to download and install the target image from the devserver, and
|
either the devserver or the DUT might be at fault.
|
"""
|
|
_SUMMARY = 'Image failed to download and install'
|
_CLASSIFIERS = []
|
|
def __init__(self, hostname, devserver, msg):
|
super(ImageInstallError, self).__init__(
|
'Download and install failed from %s onto %s'
|
% (devserver, hostname), msg)
|
|
|
class NewBuildUpdateError(_AttributedUpdateError):
|
"""Failure updating a DUT attributable to the target build.
|
|
This class of exception should be raised when updating to a new
|
build fails, and the most likely cause of the failure is a bug in
|
the newly installed target build.
|
"""
|
|
CHROME_FAILURE = 'Chrome failed to reach login screen'
|
UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
|
'chromeos-setgoodkernel')
|
ROLLBACK_FAILURE = 'System rolled back to previous build'
|
|
_SUMMARY = 'New build failed'
|
_CLASSIFIERS = [
|
(CHROME_FAILURE, 'Chrome did not start'),
|
(UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
|
(ROLLBACK_FAILURE, ROLLBACK_FAILURE),
|
]
|
|
def __init__(self, update_version, msg):
|
super(NewBuildUpdateError, self).__init__(
|
'Failure in build %s' % update_version, msg)
|
|
@property
|
def failure_summary(self):
|
#pylint: disable=missing-docstring
|
return 'Build failed to work after installing'
|
|
|
def _url_to_version(update_url):
|
"""Return the version based on update_url.
|
|
@param update_url: url to the image to update to.
|
|
"""
|
# The Chrome OS version is generally the last element in the URL. The only
|
# exception is delta update URLs, which are rooted under the version; e.g.,
|
# http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
|
# strip off the au section of the path before reading the version.
|
return re.sub('/au/.*', '',
|
urlparse.urlparse(update_url).path).split('/')[-1].strip()
|
|
|
def url_to_image_name(update_url):
|
"""Return the image name based on update_url.
|
|
From a URL like:
|
http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
|
return lumpy-release/R27-3837.0.0
|
|
@param update_url: url to the image to update to.
|
@returns a string representing the image name in the update_url.
|
|
"""
|
return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
|
|
|
def get_update_failure_reason(exception):
|
"""Convert an exception into a failure reason for metrics.
|
|
The passed in `exception` should be one raised by failure of
|
`ChromiumOSUpdater.run_update`. The returned string will describe
|
the failure. If the input exception value is not a truish value
|
the return value will be `None`.
|
|
The number of possible return strings is restricted to a limited
|
enumeration of values so that the string may be safely used in
|
Monarch metrics without worrying about cardinality of the range of
|
string values.
|
|
@param exception Exception to be converted to a failure reason.
|
|
@return A string suitable for use in Monarch metrics, or `None`.
|
"""
|
if exception:
|
if isinstance(exception, _AttributedUpdateError):
|
return exception.failure_summary
|
else:
|
return 'Unknown Error: %s' % type(exception).__name__
|
return None
|
|
|
def _get_devserver_build_from_update_url(update_url):
|
"""Get the devserver and build from the update url.
|
|
@param update_url: The url for update.
|
Eg: http://devserver:port/update/build.
|
|
@return: A tuple of (devserver url, build) or None if the update_url
|
doesn't match the expected pattern.
|
|
@raises ValueError: If the update_url doesn't match the expected pattern.
|
@raises ValueError: If no global_config was found, or it doesn't contain an
|
image_url_pattern.
|
"""
|
pattern = global_config.global_config.get_config_value(
|
'CROS', 'image_url_pattern', type=str, default='')
|
if not pattern:
|
raise ValueError('Cannot parse update_url, the global config needs '
|
'an image_url_pattern.')
|
re_pattern = pattern.replace('%s', '(\S+)')
|
parts = re.search(re_pattern, update_url)
|
if not parts or len(parts.groups()) < 2:
|
raise ValueError('%s is not an update url' % update_url)
|
return parts.groups()
|
|
|
def _list_image_dir_contents(update_url):
|
"""Lists the contents of the devserver for a given build/update_url.
|
|
@param update_url: An update url. Eg: http://devserver:port/update/build.
|
"""
|
if not update_url:
|
logging.warning('Need update_url to list contents of the devserver.')
|
return
|
error_msg = 'Cannot check contents of devserver, update url %s' % update_url
|
try:
|
devserver_url, build = _get_devserver_build_from_update_url(update_url)
|
except ValueError as e:
|
logging.warning('%s: %s', error_msg, e)
|
return
|
devserver = dev_server.ImageServer(devserver_url)
|
try:
|
devserver.list_image_dir(build)
|
# The devserver will retry on URLError to avoid flaky connections, but will
|
# eventually raise the URLError if it persists. All HTTPErrors get
|
# converted to DevServerExceptions.
|
except (dev_server.DevServerException, urllib2.URLError) as e:
|
logging.warning('%s: %s', error_msg, e)
|
|
|
def _get_metric_fields(update_url):
|
"""Return a dict of metric fields.
|
|
This is used for sending autoupdate metrics for the given update URL.
|
|
@param update_url Metrics fields will be calculated from this URL.
|
"""
|
build_name = url_to_image_name(update_url)
|
try:
|
board, build_type, milestone, _ = server_utils.ParseBuildName(
|
build_name)
|
except server_utils.ParseBuildNameException:
|
logging.warning('Unable to parse build name %s for metrics. '
|
'Continuing anyway.', build_name)
|
board, build_type, milestone = ('', '', '')
|
return {
|
'dev_server': dev_server.get_resolved_hostname(update_url),
|
'board': board,
|
'build_type': build_type,
|
'milestone': milestone,
|
}
|
|
|
# TODO(garnold) This implements shared updater functionality needed for
|
# supporting the autoupdate_EndToEnd server-side test. We should probably
|
# migrate more of the existing ChromiumOSUpdater functionality to it as we
|
# expand non-CrOS support in other tests.
|
class ChromiumOSUpdater(object):
|
"""Chromium OS specific DUT update functionality."""
|
|
def __init__(self, update_url, host=None, interactive=True,
|
use_quick_provision=False):
|
"""Initializes the object.
|
|
@param update_url: The URL we want the update to use.
|
@param host: A client.common_lib.hosts.Host implementation.
|
@param interactive: Bool whether we are doing an interactive update.
|
@param use_quick_provision: Whether we should attempt to perform
|
the update using the quick-provision script.
|
"""
|
self.update_url = update_url
|
self.host = host
|
self.interactive = interactive
|
self.update_version = _url_to_version(update_url)
|
self._use_quick_provision = use_quick_provision
|
|
|
def _run(self, cmd, *args, **kwargs):
|
"""Abbreviated form of self.host.run(...)"""
|
return self.host.run(cmd, *args, **kwargs)
|
|
|
def check_update_status(self):
|
"""Returns the current update engine state.
|
|
We use the `update_engine_client -status' command and parse the line
|
indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
|
"""
|
update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
|
_UPDATER_BIN)
|
return update_status.stdout.strip().split('=')[-1]
|
|
|
def _rootdev(self, options=''):
|
"""Returns the stripped output of rootdev <options>.
|
|
@param options: options to run rootdev.
|
|
"""
|
return self._run('rootdev %s' % options).stdout.strip()
|
|
|
def get_kernel_state(self):
|
"""Returns the (<active>, <inactive>) kernel state as a pair.
|
|
@raise RootFSUpdateError if the DUT reports a root partition
|
number that isn't one of the known valid values.
|
"""
|
active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
|
if active_root == _KERNEL_A['root']:
|
return _KERNEL_A, _KERNEL_B
|
elif active_root == _KERNEL_B['root']:
|
return _KERNEL_B, _KERNEL_A
|
else:
|
raise RootFSUpdateError(
|
'Encountered unknown root partition: %s' % active_root)
|
|
|
def _cgpt(self, flag, kernel):
|
"""Return numeric cgpt value for the specified flag, kernel, device."""
|
return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
|
kernel['kernel'], flag)).stdout.strip())
|
|
|
def _get_next_kernel(self):
|
"""Return the kernel that has priority for the next boot."""
|
priority_a = self._cgpt('-P', _KERNEL_A)
|
priority_b = self._cgpt('-P', _KERNEL_B)
|
if priority_a > priority_b:
|
return _KERNEL_A
|
else:
|
return _KERNEL_B
|
|
|
def _get_kernel_success(self, kernel):
|
"""Return boolean success flag for the specified kernel.
|
|
@param kernel: information of the given kernel, either _KERNEL_A
|
or _KERNEL_B.
|
"""
|
return self._cgpt('-S', kernel) != 0
|
|
|
def _get_kernel_tries(self, kernel):
|
"""Return tries count for the specified kernel.
|
|
@param kernel: information of the given kernel, either _KERNEL_A
|
or _KERNEL_B.
|
"""
|
return self._cgpt('-T', kernel)
|
|
|
def _get_last_update_error(self):
|
"""Get the last autoupdate error code."""
|
command_result = self._run(
|
'%s --last_attempt_error' % _UPDATER_BIN)
|
return command_result.stdout.strip().replace('\n', ', ')
|
|
|
def _base_update_handler_no_retry(self, run_args):
|
"""Base function to handle a remote update ssh call.
|
|
@param run_args: Dictionary of args passed to ssh_host.run function.
|
|
@throws: intercepts and re-throws all exceptions
|
"""
|
try:
|
self.host.run(**run_args)
|
except Exception as e:
|
logging.debug('exception in update handler: %s', e)
|
raise e
|
|
|
def _base_update_handler(self, run_args, err_msg_prefix=None):
|
"""Handle a remote update ssh call, possibly with retries.
|
|
@param run_args: Dictionary of args passed to ssh_host.run function.
|
@param err_msg_prefix: Prefix of the exception error message.
|
"""
|
def exception_handler(e):
|
"""Examines exceptions and returns True if the update handler
|
should be retried.
|
|
@param e: the exception intercepted by the retry util.
|
"""
|
return (isinstance(e, error.AutoservSSHTimeout) or
|
(isinstance(e, error.GenericHostRunError) and
|
hasattr(e, 'description') and
|
(re.search('ERROR_CODE=37', e.description) or
|
re.search('generic error .255.', e.description))))
|
|
try:
|
# Try the update twice (arg 2 is max_retry, not including the first
|
# call). Some exceptions may be caught by the retry handler.
|
retry_util.GenericRetry(exception_handler, 1,
|
self._base_update_handler_no_retry,
|
run_args)
|
except Exception as e:
|
message = err_msg_prefix + ': ' + str(e)
|
raise RootFSUpdateError(message)
|
|
|
def _wait_for_update_service(self):
|
"""Ensure that the update engine daemon is running, possibly
|
by waiting for it a bit in case the DUT just rebooted and the
|
service hasn't started yet.
|
"""
|
def handler(e):
|
"""Retry exception handler.
|
|
Assumes that the error is due to the update service not having
|
started yet.
|
|
@param e: the exception intercepted by the retry util.
|
"""
|
if isinstance(e, error.AutoservRunError):
|
logging.debug('update service check exception: %s\n'
|
'retrying...', e)
|
return True
|
else:
|
return False
|
|
# Retry at most three times, every 5s.
|
status = retry_util.GenericRetry(handler, 3,
|
self.check_update_status,
|
sleep=5)
|
|
# Expect the update engine to be idle.
|
if status != UPDATER_IDLE:
|
raise RootFSUpdateError(
|
'Update engine status is %s (%s was expected).'
|
% (status, UPDATER_IDLE))
|
|
|
def _reset_update_engine(self):
|
"""Resets the host to prepare for a clean update regardless of state."""
|
self._run('stop ui || true')
|
self._run('stop update-engine || true')
|
self._run('start update-engine')
|
self._wait_for_update_service()
|
|
|
def _reset_stateful_partition(self):
|
"""Clear any pending stateful update request."""
|
self._run('%s --stateful_change=reset 2>&1'
|
% self._get_stateful_update_script())
|
self._run('rm -f %s' % _TARGET_VERSION)
|
|
|
def _set_target_version(self):
|
"""Set the "target version" for the update."""
|
version_number = self.update_version.split('-')[1]
|
self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
|
|
|
def _revert_boot_partition(self):
|
"""Revert the boot partition."""
|
part = self._rootdev('-s')
|
logging.warning('Reverting update; Boot partition will be %s', part)
|
return self._run('/postinst %s 2>&1' % part)
|
|
|
def _verify_kernel_state(self):
|
"""Verify that the next kernel to boot is correct for update.
|
|
This tests that the kernel state is correct for a successfully
|
downloaded and installed update. That is, the next kernel to
|
boot must be the currently inactive kernel.
|
|
@raise RootFSUpdateError if the DUT next kernel isn't the
|
expected next kernel.
|
"""
|
inactive_kernel = self.get_kernel_state()[1]
|
next_kernel = self._get_next_kernel()
|
if next_kernel != inactive_kernel:
|
raise RootFSUpdateError(
|
'Update failed. The kernel for next boot is %s, '
|
'but %s was expected.'
|
% (next_kernel['name'], inactive_kernel['name']))
|
return inactive_kernel
|
|
|
def _verify_update_completed(self):
|
"""Verifies that an update has completed.
|
|
@raise RootFSUpdateError if the DUT doesn't indicate that
|
download is complete and the DUT is ready for reboot.
|
"""
|
status = self.check_update_status()
|
if status != UPDATER_NEED_REBOOT:
|
error_msg = ''
|
if status == UPDATER_IDLE:
|
error_msg = 'Update error: %s' % self._get_last_update_error()
|
raise RootFSUpdateError(
|
'Update engine status is %s (%s was expected). %s'
|
% (status, UPDATER_NEED_REBOOT, error_msg))
|
return self._verify_kernel_state()
|
|
|
def trigger_update(self):
|
"""Triggers a background update."""
|
# If this function is called immediately after reboot (which it
|
# can be), there is no guarantee that the update engine is up
|
# and running yet, so wait for it.
|
self._wait_for_update_service()
|
|
autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
|
(_UPDATER_BIN, self.update_url))
|
run_args = {'command': autoupdate_cmd}
|
err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
|
logging.info('Triggering update via: %s', autoupdate_cmd)
|
metric_fields = {'success': False}
|
try:
|
self._base_update_handler(run_args, err_prefix)
|
metric_fields['success'] = True
|
finally:
|
c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
|
metric_fields.update(_get_metric_fields(self.update_url))
|
c.increment(fields=metric_fields)
|
|
|
def update_image(self):
|
"""Updates the device root FS and kernel and verifies success."""
|
autoupdate_cmd = ('%s --update --omaha_url=%s' %
|
(_UPDATER_BIN, self.update_url))
|
if not self.interactive:
|
autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
|
run_args = {'command': autoupdate_cmd, 'timeout': 3600}
|
err_prefix = ('Failed to install device image using payload at %s '
|
'on %s. ' % (self.update_url, self.host.hostname))
|
logging.info('Updating image via: %s', autoupdate_cmd)
|
metric_fields = {'success': False}
|
try:
|
self._base_update_handler(run_args, err_prefix)
|
metric_fields['success'] = True
|
finally:
|
c = metrics.Counter('chromeos/autotest/autoupdater/update')
|
metric_fields.update(_get_metric_fields(self.update_url))
|
c.increment(fields=metric_fields)
|
return self._verify_update_completed()
|
|
|
def _get_remote_script(self, script_name):
|
"""Ensure that `script_name` is present on the DUT.
|
|
The given script (e.g. `stateful_update`) may be present in the
|
stateful partition under /usr/local/bin, or we may have to
|
download it from the devserver.
|
|
Determine whether the script is present or must be downloaded
|
and download if necessary. Then, return a command fragment
|
sufficient to run the script from whereever it now lives on the
|
DUT.
|
|
@param script_name The name of the script as expected in
|
/usr/local/bin and on the devserver.
|
@return A string with the command (minus arguments) that will
|
run the target script.
|
"""
|
remote_script = '/usr/local/bin/%s' % script_name
|
if self.host.path_exists(remote_script):
|
return remote_script
|
remote_tmp_script = '/tmp/%s' % script_name
|
server_name = urlparse.urlparse(self.update_url)[1]
|
script_url = 'http://%s/static/%s' % (server_name, script_name)
|
fetch_script = (
|
'curl -o %s %s && head -1 %s | grep "^#!" | sed "s/#!//"') % (
|
remote_tmp_script, script_url, remote_tmp_script)
|
script_interpreter = self._run(fetch_script,
|
ignore_status=True).stdout.strip()
|
if not script_interpreter:
|
return None
|
return '%s %s' % (script_interpreter, remote_tmp_script)
|
|
|
def _get_stateful_update_script(self):
|
"""Returns a command to run the stateful update script.
|
|
Find `stateful_update` on the target or install it, as
|
necessary. If installation fails, raise an exception.
|
|
@raise StatefulUpdateError if the script can't be found or
|
installed.
|
@return A string that can be joined with arguments to run the
|
`stateful_update` command on the DUT.
|
"""
|
script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
|
if not script_command:
|
raise StatefulUpdateError('Could not install %s on DUT'
|
% _STATEFUL_UPDATE_SCRIPT)
|
return script_command
|
|
|
def rollback_rootfs(self, powerwash):
|
"""Triggers rollback and waits for it to complete.
|
|
@param powerwash: If true, powerwash as part of rollback.
|
|
@raise RootFSUpdateError if anything went wrong.
|
"""
|
version = self.host.get_release_version()
|
# Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
|
# X.Y.Z. This version split just pulls the first part out.
|
try:
|
build_number = int(version.split('.')[0])
|
except ValueError:
|
logging.error('Could not parse build number.')
|
build_number = 0
|
|
if build_number >= 5772:
|
can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
|
logging.info('Checking for rollback.')
|
try:
|
self._run(can_rollback_cmd)
|
except error.AutoservRunError as e:
|
raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
|
(self.host.hostname, str(e)))
|
|
rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
|
if not powerwash:
|
rollback_cmd += ' --nopowerwash'
|
|
logging.info('Performing rollback.')
|
try:
|
self._run(rollback_cmd)
|
except error.AutoservRunError as e:
|
raise RootFSUpdateError('Rollback failed on %s: %s' %
|
(self.host.hostname, str(e)))
|
|
self._verify_update_completed()
|
|
|
def update_stateful(self, clobber=True):
|
"""Updates the stateful partition.
|
|
@param clobber: If True, a clean stateful installation.
|
|
@raise StatefulUpdateError if the update script fails to
|
complete successfully.
|
"""
|
logging.info('Updating stateful partition...')
|
statefuldev_url = self.update_url.replace('update', 'static')
|
|
# Attempt stateful partition update; this must succeed so that the newly
|
# installed host is testable after update.
|
statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
|
if clobber:
|
statefuldev_cmd.append('--stateful_change=clean')
|
|
statefuldev_cmd.append('2>&1')
|
try:
|
self._run(' '.join(statefuldev_cmd), timeout=1200)
|
except error.AutoservRunError:
|
raise StatefulUpdateError(
|
'Failed to perform stateful update on %s' %
|
self.host.hostname)
|
|
|
def verify_boot_expectations(self, expected_kernel, rollback_message):
|
"""Verifies that we fully booted given expected kernel state.
|
|
This method both verifies that we booted using the correct kernel
|
state and that the OS has marked the kernel as good.
|
|
@param expected_kernel: kernel that we are verifying with,
|
i.e. I expect to be booted onto partition 4 etc. See output of
|
get_kernel_state.
|
@param rollback_message: string include in except message text
|
if we booted with the wrong partition.
|
|
@raise NewBuildUpdateError if any of the various checks fail.
|
"""
|
# Figure out the newly active kernel.
|
active_kernel = self.get_kernel_state()[0]
|
|
# Check for rollback due to a bad build.
|
if active_kernel != expected_kernel:
|
|
# Kernel crash reports should be wiped between test runs, but
|
# may persist from earlier parts of the test, or from problems
|
# with provisioning.
|
#
|
# Kernel crash reports will NOT be present if the crash happened
|
# before encrypted stateful is mounted.
|
#
|
# TODO(dgarrett): Integrate with server/crashcollect.py at some
|
# point.
|
kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
|
if kernel_crashes:
|
rollback_message += ': kernel_crash'
|
logging.debug('Found %d kernel crash reports:',
|
len(kernel_crashes))
|
# The crash names contain timestamps that may be useful:
|
# kernel.20131207.005945.0.kcrash
|
for crash in kernel_crashes:
|
logging.debug(' %s', os.path.basename(crash))
|
|
# Print out some information to make it easier to debug
|
# the rollback.
|
logging.debug('Dumping partition table.')
|
self._run('cgpt show $(rootdev -s -d)')
|
logging.debug('Dumping crossystem for firmware debugging.')
|
self._run('crossystem --all')
|
raise NewBuildUpdateError(self.update_version, rollback_message)
|
|
# Make sure chromeos-setgoodkernel runs.
|
try:
|
utils.poll_for_condition(
|
lambda: (self._get_kernel_tries(active_kernel) == 0
|
and self._get_kernel_success(active_kernel)),
|
exception=RootFSUpdateError(),
|
timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
|
except RootFSUpdateError:
|
services_status = self._run('status system-services').stdout
|
if services_status != 'system-services start/running\n':
|
event = NewBuildUpdateError.CHROME_FAILURE
|
else:
|
event = NewBuildUpdateError.UPDATE_ENGINE_FAILURE
|
raise NewBuildUpdateError(self.update_version, event)
|
|
|
def _prepare_host(self):
|
"""Make sure the target DUT is working and ready for update.
|
|
Initially, the target DUT's state is unknown. The DUT is
|
expected to be online, but we strive to be forgiving if Chrome
|
and/or the update engine aren't fully functional.
|
"""
|
# Summary of work, and the rationale:
|
# 1. Reboot, because it's a good way to clear out problems.
|
# 2. Touch the PROVISION_FAILED file, to allow repair to detect
|
# failure later.
|
# 3. Run the hook for host class specific preparation.
|
# 4. Stop Chrome, because the system is designed to eventually
|
# reboot if Chrome is stuck in a crash loop.
|
# 5. Force `update-engine` to start, because if Chrome failed
|
# to start properly, the status of the `update-engine` job
|
# will be uncertain.
|
if not self.host.is_up():
|
raise HostUpdateError(self.host.hostname,
|
HostUpdateError.DUT_DOWN)
|
self._reset_stateful_partition()
|
self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
|
self._run('touch %s' % PROVISION_FAILED)
|
self.host.prepare_for_update()
|
self._reset_update_engine()
|
logging.info('Updating from version %s to %s.',
|
self.host.get_release_version(),
|
self.update_version)
|
|
|
def _verify_devserver(self):
|
"""Check that our chosen devserver is still working.
|
|
@raise DevServerError if the devserver fails any sanity check.
|
"""
|
server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
|
try:
|
if not dev_server.ImageServer.devserver_healthy(server):
|
raise DevServerError(
|
server, 'Devserver is not healthy')
|
except Exception as e:
|
raise DevServerError(
|
server, 'Devserver is not up and available')
|
|
|
def _install_via_update_engine(self):
|
"""Install an updating using the production AU flow.
|
|
This uses the standard AU flow and the `stateful_update` script
|
to download and install a root FS, kernel and stateful
|
filesystem content.
|
|
@return The kernel expected to be booted next.
|
"""
|
logging.info('Installing image using update_engine.')
|
expected_kernel = self.update_image()
|
self.update_stateful()
|
self._set_target_version()
|
return expected_kernel
|
|
|
def _install_via_quick_provision(self):
|
"""Install an updating using the `quick-provision` script.
|
|
This uses the `quick-provision` script to download and install
|
a root FS, kernel and stateful filesystem content.
|
|
@return The kernel expected to be booted next.
|
"""
|
if not self._use_quick_provision:
|
return None
|
build_re = global_config.global_config.get_config_value(
|
'CROS', 'quick_provision_build_regex', type=str, default='')
|
image_name = url_to_image_name(self.update_url)
|
if not build_re or re.match(build_re, image_name) is None:
|
logging.info('Not eligible for quick-provision.')
|
return None
|
logging.info('Installing image using quick-provision.')
|
provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
|
server_name = urlparse.urlparse(self.update_url)[1]
|
static_url = 'http://%s/static' % server_name
|
command = '%s --noreboot %s %s' % (
|
provision_command, image_name, static_url)
|
try:
|
self._run(command)
|
self._set_target_version()
|
return self._verify_kernel_state()
|
except Exception:
|
# N.B. We handle only `Exception` here. Non-Exception
|
# classes (such as KeyboardInterrupt) are handled by our
|
# caller.
|
logging.exception('quick-provision script failed; '
|
'will fall back to update_engine.')
|
self._revert_boot_partition()
|
self._reset_stateful_partition()
|
self._reset_update_engine()
|
return None
|
|
|
def _install_update(self):
|
"""Install the requested image on the DUT, but don't start it.
|
|
This downloads and installs a root FS, kernel and stateful
|
filesystem content. This does not reboot the DUT, so the update
|
is merely pending when the method returns.
|
|
@return The kernel expected to be booted next.
|
"""
|
logging.info('Installing image at %s onto %s',
|
self.update_url, self.host.hostname)
|
try:
|
return (self._install_via_quick_provision()
|
or self._install_via_update_engine())
|
except:
|
# N.B. This handling code includes non-Exception classes such
|
# as KeyboardInterrupt. We need to clean up, but we also must
|
# re-raise.
|
self._revert_boot_partition()
|
self._reset_stateful_partition()
|
self._reset_update_engine()
|
# Collect update engine logs in the event of failure.
|
if self.host.job:
|
logging.info('Collecting update engine logs due to failure...')
|
self.host.get_file(
|
_UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
|
preserve_perm=False)
|
_list_image_dir_contents(self.update_url)
|
raise
|
|
|
def _complete_update(self, expected_kernel):
|
"""Finish the update, and confirm that it succeeded.
|
|
Initial condition is that the target build has been downloaded
|
and installed on the DUT, but has not yet been booted. This
|
function is responsible for rebooting the DUT, and checking that
|
the new build is running successfully.
|
|
@param expected_kernel: kernel expected to be active after reboot.
|
"""
|
# Regarding the 'crossystem' command below: In some cases,
|
# the update flow puts the TPM into a state such that it
|
# fails verification. We don't know why. However, this
|
# call papers over the problem by clearing the TPM during
|
# the reboot.
|
#
|
# We ignore failures from 'crossystem'. Although failure
|
# here is unexpected, and could signal a bug, the point of
|
# the exercise is to paper over problems; allowing this to
|
# fail would defeat the purpose.
|
self._run('crossystem clear_tpm_owner_request=1',
|
ignore_status=True)
|
self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
|
|
# Touch the lab machine file to leave a marker that
|
# distinguishes this image from other test images.
|
# Afterwards, we must re-run the autoreboot script because
|
# it depends on the _LAB_MACHINE_FILE.
|
autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
|
'( touch "$FILE" ; start autoreboot )')
|
self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
|
self.verify_boot_expectations(
|
expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE)
|
|
logging.debug('Cleaning up old autotest directories.')
|
try:
|
installed_autodir = autotest.Autotest.get_installed_autodir(
|
self.host)
|
self._run('rm -rf ' + installed_autodir)
|
except autotest.AutodirNotFoundError:
|
logging.debug('No autotest installed directory found.')
|
|
|
def run_update(self):
|
"""Perform a full update of a DUT in the test lab.
|
|
This downloads and installs the root FS and stateful partition
|
content needed for the update specified in `self.host` and
|
`self.update_url`. The update is performed according to the
|
requirements for provisioning a DUT for testing the requested
|
build.
|
|
At the end of the procedure, metrics are reported describing the
|
outcome of the operation.
|
|
@returns A tuple of the form `(image_name, attributes)`, where
|
`image_name` is the name of the image installed, and
|
`attributes` is new attributes to be applied to the DUT.
|
"""
|
server_name = dev_server.get_resolved_hostname(self.update_url)
|
metrics.Counter(_metric_name('install')).increment(
|
fields={'devserver': server_name})
|
|
self._verify_devserver()
|
|
try:
|
self._prepare_host()
|
except _AttributedUpdateError:
|
raise
|
except Exception as e:
|
logging.exception('Failure preparing host prior to update.')
|
raise HostUpdateError(self.host.hostname, str(e))
|
|
try:
|
expected_kernel = self._install_update()
|
except _AttributedUpdateError:
|
raise
|
except Exception as e:
|
logging.exception('Failure during download and install.')
|
server_name = dev_server.get_resolved_hostname(self.update_url)
|
raise ImageInstallError(self.host.hostname, server_name, str(e))
|
|
try:
|
self._complete_update(expected_kernel)
|
except _AttributedUpdateError:
|
raise
|
except Exception as e:
|
logging.exception('Failure from build after update.')
|
raise NewBuildUpdateError(self.update_version, str(e))
|
|
image_name = url_to_image_name(self.update_url)
|
# update_url is different from devserver url needed to stage autotest
|
# packages, therefore, resolve a new devserver url here.
|
devserver_url = dev_server.ImageServer.resolve(
|
image_name, self.host.hostname).url()
|
repo_url = tools.get_package_url(devserver_url, image_name)
|
return image_name, {ds_constants.JOB_REPO_URL: repo_url}
|