liyujie
2025-08-28 786ff4f4ca2374bdd9177f2e24b503d43e7a3b93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#!/usr/bin/python
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
 
import argparse
import datetime
import logging
import os
import re
import sys
import time
 
os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
 
import common
from autotest_lib.server import utils
from django.db import connections, transaction
 
 
# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
                     '|3[01])$')
SELECT_CMD_FORMAT = """
SELECT %(table)s.%(primary_key)s FROM %(table)s
WHERE %(table)s.%(time_column)s <= "%(date)s"
"""
SELECT_JOIN_CMD_FORMAT = """
SELECT %(table)s.%(primary_key)s FROM %(table)s
INNER JOIN %(related_table)s
  ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
WHERE %(related_table)s.%(time_column)s <= "%(date)s"
"""
SELECT_WITH_INDIRECTION_FORMAT = """
SELECT %(table)s.%(primary_key)s FROM %(table)s
INNER JOIN %(indirection_table)s
  ON %(table)s.%(foreign_key)s =
     %(indirection_table)s.%(indirection_primary_key)s
INNER JOIN %(related_table)s
  ON %(indirection_table)s.%(indirection_foreign_key)s =
  %(related_table)s.%(related_primary_key)s
WHERE %(related_table)s.%(time_column)s <= "%(date)s"
"""
DELETE_ROWS_FORMAT = """
DELETE FROM %(table)s
WHERE %(table)s.%(primary_key)s IN (%(rows)s)
"""
 
 
AFE_JOB_ID = 'afe_job_id'
JOB_ID = 'job_id'
JOB_IDX = 'job_idx'
TEST_IDX = 'test_idx'
 
# CAUTION: Make sure only the 'default' connection is used. Otherwise
# db_cleanup may delete stuff from the global database, which is generally not
# intended.
cursor = connections['default'].cursor()
 
# Globals for command line flag constants, for convenience.
DRY_RUN = False
STEP_SIZE = None
LOAD_RATIO = 1.0
 
class ProgressBar(object):
    TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
 
    def __init__(self, name, amount):
        self._name = name
        self._amount = amount
        self._cur = 0
 
    def __enter__(self):
        return self
 
    def __exit__(self, a, b, c):
        sys.stdout.write('\n')
        sys.stdout.flush()
 
    def update(self, x):
        """
        Advance the counter by `x`.
 
        @param x: An integer of how many more elements were processed.
        """
        self._cur += x
 
    def show(self):
        """
        Display the progress bar on the current line.  Repeated invocations
        "update" the display.
        """
        if self._amount == 0:
            barlen = 20
        else:
            barlen = int(20 * self._cur / float(self._amount))
        if barlen:
            bartext = '=' * (barlen-1) + '>'
        else:
            bartext = ''
        text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
        sys.stdout.write('\r')
        sys.stdout.write(text)
        sys.stdout.flush()
 
 
def grouper(iterable, n):
    """
    Group the elements of `iterable` into groups of maximum size `n`.
 
    @param iterable: An iterable.
    @param n: Max size of returned groups.
    @returns: Yields iterables of size <= n.
 
    >>> grouper('ABCDEFG', 3)
    [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
    """
    args = [iter(iterable)] * n
    while True:
        lst = []
        try:
            for itr in args:
                lst.append(next(itr))
            yield lst
        except StopIteration:
            if lst:
                yield lst
            break
 
 
def _delete_table_data_before_date(table_to_delete_from, primary_key,
                                   related_table, related_primary_key,
                                   date, foreign_key=None,
                                   time_column="started_time",
                                   indirection_table=None,
                                   indirection_primary_key=None,
                                   indirection_foreign_key=None):
    """
    We want a delete statement that will only delete from one table while
    using a related table to find the rows to delete.
 
    An example mysql command:
    DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
    tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
    tko_tests.started_time <= '2012-02-01';
 
    There are also tables that require 2 joins to determine which rows we want
    to delete and we determine these rows by joining the table we want to
    delete from with an indirection table to the actual jobs table.
 
    @param table_to_delete_from: Table whose rows we want to delete.
    @param related_table: Table with the date information we are selecting by.
    @param foreign_key: Foreign key used in table_to_delete_from to reference
                        the related table. If None, the primary_key is used.
    @param primary_key: Primary key in the related table.
    @param date: End date of the information we are trying to delete.
    @param time_column: Column that we want to use to compare the date to.
    @param indirection_table: Table we use to link the data we are trying to
                              delete with the table with the date information.
    @param indirection_primary_key: Key we use to connect the indirection table
                                    to the table we are trying to delete rows
                                    from.
    @param indirection_foreign_key: Key we use to connect the indirection table
                                    to the table with the date information.
    """
    if not foreign_key:
        foreign_key = primary_key
 
    if not related_table:
        # Deleting from a table directly.
        variables = dict(table=table_to_delete_from, primary_key=primary_key,
                         time_column=time_column, date=date)
        sql = SELECT_CMD_FORMAT % variables
    elif not indirection_table:
        # Deleting using a single JOIN to get the date information.
        variables = dict(primary_key=primary_key, table=table_to_delete_from,
                         foreign_key=foreign_key, related_table=related_table,
                         related_primary_key=related_primary_key,
                         time_column=time_column, date=date)
        sql = SELECT_JOIN_CMD_FORMAT % variables
    else:
        # There are cases where we need to JOIN 3 TABLES to determine the rows
        # we want to delete.
        variables = dict(primary_key=primary_key, table=table_to_delete_from,
                         indirection_table=indirection_table,
                         foreign_key=foreign_key,
                         indirection_primary_key=indirection_primary_key,
                         related_table=related_table,
                         related_primary_key=related_primary_key,
                         indirection_foreign_key=indirection_foreign_key,
                         time_column=time_column, date=date)
        sql = SELECT_WITH_INDIRECTION_FORMAT % variables
 
    logging.debug('SQL: %s', sql)
    cursor.execute(sql, [])
    rows = [x[0] for x in cursor.fetchall()]
    logging.debug(rows)
 
    if not rows or rows == [None]:
        with ProgressBar(table_to_delete_from, 0) as pb:
            pb.show()
        logging.debug('Nothing to delete for %s', table_to_delete_from)
        return
 
    with ProgressBar(table_to_delete_from, len(rows)) as pb:
        for row_keys in grouper(rows, STEP_SIZE):
            variables['rows'] = ','.join([str(x) for x in row_keys])
            sql = DELETE_ROWS_FORMAT % variables
 
            start = time.time()
            logging.debug('SQL: %s', sql)
            if not DRY_RUN:
                cursor.execute(sql, [])
                transaction.commit_unless_managed(using='default')
            end = time.time()
 
            pb.update(len(row_keys))
            pb.show()
 
            if LOAD_RATIO != 1.0:
                assert 0 < LOAD_RATIO <= 1, (
                        'Load ratio must be a fraction between 0 and 1.')
                time.sleep((end - start) / LOAD_RATIO)
 
 
def _subtract_days(date, days_to_subtract):
    """
    Return a date (string) that is 'days' before 'date'
 
    @param date: date (string) we are subtracting from.
    @param days_to_subtract: days (int) we are subtracting.
    """
    date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
    difference = date_obj - datetime.timedelta(days=days_to_subtract)
    return difference.strftime('%Y-%m-%d')
 
 
def _delete_all_data_before_date(date):
    """
    Delete all the database data before a given date.
 
    This function focuses predominately on the data for jobs in tko_jobs.
    However not all jobs in afe_jobs are also in tko_jobs.
 
    Therefore we delete all the afe_job and foreign key relations prior to two
    days before date. Then we do the queries using tko_jobs and these
    tables to ensure all the related information is gone. Even though we are
    repeating deletes on these tables, the second delete will be quick and
    completely thorough in ensuring we clean up all the foreign key
    dependencies correctly.
 
    @param date: End date of the information we are trying to delete.
    @param step: Rows to delete per SQL query.
    """
    # First cleanup all afe_job related data (prior to 2 days before date).
    # The reason for this is not all afe_jobs may be in tko_jobs.
    afe_date = _subtract_days(date, 2)
    logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
    _delete_table_data_before_date('afe_aborted_host_queue_entries',
                                   'queue_entry_id',
                                   'afe_jobs', 'id', afe_date,
                                   time_column= 'created_on',
                                   foreign_key='queue_entry_id',
                                   indirection_table='afe_host_queue_entries',
                                   indirection_primary_key='id',
                                   indirection_foreign_key='job_id')
    _delete_table_data_before_date('afe_special_tasks', 'id',
                                   'afe_jobs', 'id',
                                   afe_date, time_column='created_on',
                                   foreign_key='queue_entry_id',
                                   indirection_table='afe_host_queue_entries',
                                   indirection_primary_key='id',
                                   indirection_foreign_key='job_id')
    _delete_table_data_before_date('afe_host_queue_entries', 'id',
                                   'afe_jobs', 'id',
                                   afe_date, time_column='created_on',
                                   foreign_key=JOB_ID)
    _delete_table_data_before_date('afe_job_keyvals', 'id',
                                   'afe_jobs', 'id',
                                   afe_date, time_column='created_on',
                                   foreign_key=JOB_ID)
    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
                                   'afe_jobs', 'id',
                                   afe_date, time_column='created_on',
                                   foreign_key=JOB_ID)
    _delete_table_data_before_date('afe_jobs', 'id',
                                   None, None,
                                   afe_date, time_column='created_on')
    # Special tasks that aren't associated with an HQE
    # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
    # more than we should, but I doubt anyone will notice or care.
    _delete_table_data_before_date('afe_special_tasks', 'id',
                                   None, None,
                                   afe_date, time_column='time_requested')
 
    # Now go through and clean up all the rows related to tko_jobs prior to
    # date.
    logging.info('Cleaning up all data related to tko_jobs prior to %s.',
                  date)
    _delete_table_data_before_date('tko_test_attributes', 'id',
                                   'tko_tests', TEST_IDX,
                                   date, foreign_key=TEST_IDX)
    _delete_table_data_before_date('tko_test_labels_tests', 'id',
                                   'tko_tests', TEST_IDX,
                                   date, foreign_key= 'test_id')
    _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
                                   'tko_tests', TEST_IDX,
                                   date)
    _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
                                   'tko_tests', TEST_IDX,
                                   date)
    _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
                                   'tko_tests', TEST_IDX,
                                   date)
    _delete_table_data_before_date('tko_job_keyvals', 'id',
                                   'tko_jobs', JOB_IDX,
                                   date, foreign_key='job_id')
    _delete_table_data_before_date('afe_aborted_host_queue_entries',
                                   'queue_entry_id',
                                   'tko_jobs', AFE_JOB_ID, date,
                                   foreign_key='queue_entry_id',
                                   indirection_table='afe_host_queue_entries',
                                   indirection_primary_key='id',
                                   indirection_foreign_key='job_id')
    _delete_table_data_before_date('afe_special_tasks', 'id',
                                   'tko_jobs', AFE_JOB_ID,
                                   date, foreign_key='queue_entry_id',
                                   indirection_table='afe_host_queue_entries',
                                   indirection_primary_key='id',
                                   indirection_foreign_key='job_id')
    _delete_table_data_before_date('afe_host_queue_entries', 'id',
                                   'tko_jobs', AFE_JOB_ID,
                                   date, foreign_key='job_id')
    _delete_table_data_before_date('afe_job_keyvals', 'id',
                                   'tko_jobs', AFE_JOB_ID,
                                   date, foreign_key='job_id')
    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
                                   'tko_jobs', AFE_JOB_ID,
                                   date, foreign_key='job_id')
    _delete_table_data_before_date('afe_jobs', 'id',
                                   'tko_jobs', AFE_JOB_ID,
                                   date, foreign_key='id')
    _delete_table_data_before_date('tko_tests', TEST_IDX,
                                   'tko_jobs', JOB_IDX,
                                   date, foreign_key=JOB_IDX)
    _delete_table_data_before_date('tko_jobs', JOB_IDX,
                                   None, None, date)
 
 
def parse_args():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='Print SQL commands and results')
    parser.add_argument('--step', type=int, action='store',
                        default=1000,
                        help='Number of rows to delete at once')
    parser.add_argument('--dry_run', action='store_true',
                        help='Print SQL queries instead of executing them.')
    parser.add_argument('--load_ratio', type=float, action='store', default=0.2,
                        help=('The fraction of time the script should be '
                              'performing deletes. For example --load_ratio=.2 '
                              'will cause the script to sleep 80% of the time, '
                              'and perform work for the other 20%.'))
    parser.add_argument('date', help='Keep results newer than')
    return parser.parse_args()
 
 
def main():
    args = parse_args()
 
    verbose = args.verbose or args.dry_run
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(level=level, format=LOGGING_FORMAT)
    logging.info('Calling: %s', sys.argv)
 
    if not re.match(DATE_FORMAT_REGEX, args.date):
        print 'DATE must be in yyyy-mm-dd format!'
        return
 
    global STEP_SIZE, DRY_RUN, LOAD_RATIO
    STEP_SIZE = args.step
    DRY_RUN = args.dry_run
    LOAD_RATIO = args.load_ratio
 
    _delete_all_data_before_date(args.date)
 
 
if __name__ == '__main__':
    main()