#!/usr/bin/env python
|
# Copyright 2018 The Chromium OS Authors. All rights reserved.
|
# Use of this source code is governed by a BSD-style license that can be
|
# found in the LICENSE file.
|
|
"""A simple service to monitor DUT statuses from master db/afe."""
|
import collections
|
import logging
|
import sys
|
import time
|
|
import common
|
from autotest_lib.server import constants
|
from autotest_lib.server import frontend
|
from chromite.lib import metrics
|
from chromite.lib import ts_mon_config
|
|
from infra_libs import ts_mon
|
|
|
DutCountBucket = collections.namedtuple('DutCountBucket',
|
['board',
|
'model',
|
'pool',
|
'is_locked',
|
'status']
|
)
|
|
|
def _get_bucket_for_host(host):
|
"""Determine the counter bucket for |host|.
|
|
Args:
|
host: A Host object as returned by afe.
|
|
Returns:
|
A DutCountBucket instance describing the bucket for this host.
|
"""
|
board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
|
model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
|
pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
|
if pool in constants.Pools.MANAGED_POOLS:
|
pool = 'managed:' + pool
|
status = host.status or '[None]'
|
is_locked = host.locked
|
return DutCountBucket(board, model, pool, is_locked, status)
|
|
|
def _get_unique_label(labels, prefix):
|
"""Return the labels for a given prefix, with prefix stripped.
|
|
If prefixed label does not occur, return '[None]'
|
If prefixed label occurs multiply, return '[Multiple]'
|
|
_get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'
|
|
_get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'
|
|
_get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
|
"""
|
ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
|
if not ls:
|
return '[None]'
|
elif len(ls) == 1:
|
return ls[0]
|
else:
|
return '[Multiple]'
|
|
|
def main(argv):
|
"""Entry point for dut_mon."""
|
logging.getLogger().setLevel(logging.INFO)
|
|
with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
|
afe = frontend.AFE()
|
counters = collections.defaultdict(lambda: 0)
|
|
field_spec = [ts_mon.StringField('board'),
|
ts_mon.StringField('model'),
|
ts_mon.StringField('pool'),
|
ts_mon.BooleanField('is_locked'),
|
ts_mon.StringField('status'),
|
]
|
dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
|
description='The number of duts in a given '
|
'state and bucket.',
|
field_spec=field_spec)
|
tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
|
description='Tick counter of dut_mon.')
|
|
while True:
|
# Note: We reset all counters to zero in each loop rather than
|
# creating a new defaultdict, because we want to ensure that any
|
# gauges that were previously set to a nonzero value by this process
|
# get set back to zero if necessary.
|
for k in counters:
|
counters[k] = 0
|
|
logging.info('Fetching all hosts.')
|
hosts = afe.get_hosts()
|
logging.info('Fetched %s hosts.', len(hosts))
|
for host in hosts:
|
fields = _get_bucket_for_host(host)
|
counters[fields] += 1
|
|
for field, value in counters.iteritems():
|
logging.info('%s %s', field, value)
|
dut_count.set(value, fields=field.__dict__)
|
|
tick_count.increment()
|
logging.info('Sleeping for 2 minutes.')
|
time.sleep(120)
|
|
|
if __name__ == '__main__':
|
main(sys.argv)
|