#!/usr/bin/python
|
# Copyright 2017 The Chromium Authors. All rights reserved.
|
# Use of this source code is governed by a BSD-style license that can be
|
# found in the LICENSE file.
|
|
"""Utility to check the replication delay of the slave databases.
|
|
The utility checks the value of Seconds_Behind_Master of slave databases,
|
including:
|
Slave databases of AFE database, retrieved from server database.
|
Readonly replicas of TKO database, passed in by option --replicas.
|
"""
|
|
import argparse
|
import logging
|
import os
|
import re
|
|
import common
|
from autotest_lib.client.bin import utils
|
from autotest_lib.client.common_lib import error
|
from autotest_lib.client.common_lib import global_config
|
from autotest_lib.client.common_lib import logging_config
|
from autotest_lib.frontend import setup_django_environment
|
from autotest_lib.server import site_utils
|
from autotest_lib.site_utils import server_manager_utils
|
|
from chromite.lib import metrics
|
|
|
CONFIG = global_config.global_config
|
|
# SQL command to remove old test results in TKO database.
|
SLAVE_STATUS_CMD = 'show slave status\G'
|
DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)'
|
DELAY_METRICS = 'chromeos/autotest/afe_db/slave_delay_seconds'
|
# A large delay to report to metrics indicating the replica is in error.
|
LARGE_DELAY = 1000000
|
|
def check_delay(server, user, password):
|
"""Check the delay of a given slave database server.
|
|
@param server: Hostname or IP address of the MySQL server.
|
@param user: User name to log in the MySQL server.
|
@param password: Password to log in the MySQL server.
|
"""
|
try:
|
result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD)
|
search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE)
|
m = metrics.Float(DELAY_METRICS)
|
f = {'slave': server}
|
if search:
|
delay = float(search.group(1))
|
m.set(delay, fields=f)
|
logging.debug('Seconds_Behind_Master of server %s is %d.', server,
|
delay)
|
else:
|
# The value of Seconds_Behind_Master could be NULL, report a large
|
# number to indicate database error.
|
m.set(LARGE_DELAY, fields=f)
|
logging.error('Failed to get Seconds_Behind_Master of server %s '
|
'from slave status:\n %s', server, result)
|
except error.CmdError:
|
logging.exception('Failed to get slave status of server %s.', server)
|
|
|
def parse_options():
|
"""Parse command line inputs.
|
|
@return: Options to run the script.
|
"""
|
parser = argparse.ArgumentParser()
|
parser.add_argument('-r', '--replicas', nargs='+',
|
default=[],
|
help='IP addresses of readonly replicas of TKO.')
|
parser.add_argument('-l', '--logfile', type=str,
|
default=None,
|
help='Path to the log file to save logs.')
|
return parser.parse_args()
|
|
|
def main():
|
"""Main script."""
|
with site_utils.SetupTsMonGlobalState('check_slave_db_delay',indirect=True):
|
options = parse_options()
|
log_config = logging_config.LoggingConfig()
|
if options.logfile:
|
log_config.add_file_handler(
|
file_path=os.path.abspath(options.logfile),
|
level=logging.DEBUG
|
)
|
db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user')
|
db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password')
|
|
global_db_user = CONFIG.get_config_value(
|
'AUTOTEST_WEB', 'global_db_user', default=db_user)
|
global_db_password = CONFIG.get_config_value(
|
'AUTOTEST_WEB', 'global_db_password', default=db_password)
|
|
logging.info('Start checking Seconds_Behind_Master of slave databases')
|
|
for replica in options.replicas:
|
check_delay(replica, global_db_user, global_db_password)
|
if not options.replicas:
|
logging.warning('No replicas checked.')
|
|
slaves = server_manager_utils.get_servers(
|
role='database_slave', status='primary')
|
for slave in slaves:
|
check_delay(slave.hostname, db_user, db_password)
|
if not slaves:
|
logging.warning('No slaves checked.')
|
|
|
logging.info('Finished checking.')
|
|
|
if __name__ == '__main__':
|
main()
|