| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * xor.c : Multiple Devices driver for Linux |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 5 | 6 | * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. |
|---|
| 6 | 7 | * |
|---|
| 7 | 8 | * Dispatch optimized RAID-5 checksumming functions. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 10 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 11 | | - * the Free Software Foundation; either version 2, or (at your option) |
|---|
| 12 | | - * any later version. |
|---|
| 13 | | - * |
|---|
| 14 | | - * You should have received a copy of the GNU General Public License |
|---|
| 15 | | - * (for example /usr/src/linux/COPYING); if not, write to the Free |
|---|
| 16 | | - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 17 | 9 | */ |
|---|
| 18 | 10 | |
|---|
| 19 | 11 | #define BH_TRACE 0 |
|---|
| .. | .. |
|---|
| 62 | 54 | /* Set of all registered templates. */ |
|---|
| 63 | 55 | static struct xor_block_template *__initdata template_list; |
|---|
| 64 | 56 | |
|---|
| 65 | | -#define BENCH_SIZE (PAGE_SIZE) |
|---|
| 57 | +#ifndef MODULE |
|---|
| 58 | +static void __init do_xor_register(struct xor_block_template *tmpl) |
|---|
| 59 | +{ |
|---|
| 60 | + tmpl->next = template_list; |
|---|
| 61 | + template_list = tmpl; |
|---|
| 62 | +} |
|---|
| 63 | + |
|---|
| 64 | +static int __init register_xor_blocks(void) |
|---|
| 65 | +{ |
|---|
| 66 | + active_template = XOR_SELECT_TEMPLATE(NULL); |
|---|
| 67 | + |
|---|
| 68 | + if (!active_template) { |
|---|
| 69 | +#define xor_speed do_xor_register |
|---|
| 70 | + // register all the templates and pick the first as the default |
|---|
| 71 | + XOR_TRY_TEMPLATES; |
|---|
| 72 | +#undef xor_speed |
|---|
| 73 | + active_template = template_list; |
|---|
| 74 | + } |
|---|
| 75 | + return 0; |
|---|
| 76 | +} |
|---|
| 77 | +#endif |
|---|
| 78 | + |
|---|
| 79 | +#define BENCH_SIZE 4096 |
|---|
| 80 | +#define REPS 800U |
|---|
| 66 | 81 | |
|---|
| 67 | 82 | static void __init |
|---|
| 68 | 83 | do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) |
|---|
| 69 | 84 | { |
|---|
| 70 | 85 | int speed; |
|---|
| 71 | | - unsigned long now, j; |
|---|
| 72 | | - int i, count, max; |
|---|
| 86 | + int i, j; |
|---|
| 87 | + ktime_t min, start, diff; |
|---|
| 73 | 88 | |
|---|
| 74 | 89 | tmpl->next = template_list; |
|---|
| 75 | 90 | template_list = tmpl; |
|---|
| 76 | 91 | |
|---|
| 77 | 92 | preempt_disable(); |
|---|
| 78 | 93 | |
|---|
| 79 | | - /* |
|---|
| 80 | | - * Count the number of XORs done during a whole jiffy, and use |
|---|
| 81 | | - * this to calculate the speed of checksumming. We use a 2-page |
|---|
| 82 | | - * allocation to have guaranteed color L1-cache layout. |
|---|
| 83 | | - */ |
|---|
| 84 | | - max = 0; |
|---|
| 85 | | - for (i = 0; i < 5; i++) { |
|---|
| 86 | | - j = jiffies; |
|---|
| 87 | | - count = 0; |
|---|
| 88 | | - while ((now = jiffies) == j) |
|---|
| 89 | | - cpu_relax(); |
|---|
| 90 | | - while (time_before(jiffies, now + 1)) { |
|---|
| 94 | + min = (ktime_t)S64_MAX; |
|---|
| 95 | + for (i = 0; i < 3; i++) { |
|---|
| 96 | + start = ktime_get(); |
|---|
| 97 | + for (j = 0; j < REPS; j++) { |
|---|
| 91 | 98 | mb(); /* prevent loop optimzation */ |
|---|
| 92 | 99 | tmpl->do_2(BENCH_SIZE, b1, b2); |
|---|
| 93 | 100 | mb(); |
|---|
| 94 | | - count++; |
|---|
| 95 | | - mb(); |
|---|
| 96 | 101 | } |
|---|
| 97 | | - if (count > max) |
|---|
| 98 | | - max = count; |
|---|
| 102 | + diff = ktime_sub(ktime_get(), start); |
|---|
| 103 | + if (diff < min) |
|---|
| 104 | + min = diff; |
|---|
| 99 | 105 | } |
|---|
| 100 | 106 | |
|---|
| 101 | 107 | preempt_enable(); |
|---|
| 102 | 108 | |
|---|
| 103 | | - speed = max * (HZ * BENCH_SIZE / 1024); |
|---|
| 109 | + // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] |
|---|
| 110 | + if (!min) |
|---|
| 111 | + min = 1; |
|---|
| 112 | + speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); |
|---|
| 104 | 113 | tmpl->speed = speed; |
|---|
| 105 | 114 | |
|---|
| 106 | | - printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name, |
|---|
| 107 | | - speed / 1000, speed % 1000); |
|---|
| 115 | + pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); |
|---|
| 108 | 116 | } |
|---|
| 109 | 117 | |
|---|
| 110 | 118 | static int __init |
|---|
| .. | .. |
|---|
| 137 | 145 | #define xor_speed(templ) do_xor_speed((templ), b1, b2) |
|---|
| 138 | 146 | |
|---|
| 139 | 147 | printk(KERN_INFO "xor: measuring software checksum speed\n"); |
|---|
| 148 | + template_list = NULL; |
|---|
| 140 | 149 | XOR_TRY_TEMPLATES; |
|---|
| 141 | 150 | fastest = template_list; |
|---|
| 142 | 151 | for (f = fastest; f; f = f->next) |
|---|
| 143 | 152 | if (f->speed > fastest->speed) |
|---|
| 144 | 153 | fastest = f; |
|---|
| 145 | 154 | |
|---|
| 146 | | - printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n", |
|---|
| 147 | | - fastest->name, fastest->speed / 1000, fastest->speed % 1000); |
|---|
| 155 | + pr_info("xor: using function: %s (%d MB/sec)\n", |
|---|
| 156 | + fastest->name, fastest->speed); |
|---|
| 148 | 157 | |
|---|
| 149 | 158 | #undef xor_speed |
|---|
| 150 | 159 | |
|---|
| .. | .. |
|---|
| 158 | 167 | |
|---|
| 159 | 168 | MODULE_LICENSE("GPL"); |
|---|
| 160 | 169 | |
|---|
| 170 | +#ifndef MODULE |
|---|
| 161 | 171 | /* when built-in xor.o must initialize before drivers/md/md.o */ |
|---|
| 162 | | -core_initcall(calibrate_xor_blocks); |
|---|
| 172 | +core_initcall(register_xor_blocks); |
|---|
| 173 | +#endif |
|---|
| 174 | + |
|---|
| 175 | +module_init(calibrate_xor_blocks); |
|---|
| 163 | 176 | module_exit(xor_exit); |
|---|