hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/tools/testing/selftests/net/tcp_mmap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright 2018 Google Inc.
34 * Author: Eric Dumazet (edumazet@google.com)
....@@ -44,21 +45,6 @@
4445 * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
4546 * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
4647 * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
47
- *
48
- * License (GPLv2):
49
- *
50
- * This program is free software; you can redistribute it and/or modify it
51
- * under the terms and conditions of the GNU General Public License,
52
- * version 2, as published by the Free Software Foundation.
53
- *
54
- * This program is distributed in the hope it will be useful, but WITHOUT
55
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
56
- * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
57
- * more details.
58
- *
59
- * You should have received a copy of the GNU General Public License along with
60
- * this program; if not, write to the Free Software Foundation, Inc.,
61
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
6248 */
6349 #define _GNU_SOURCE
6450 #include <pthread.h>
....@@ -85,7 +71,7 @@
8571 #define MSG_ZEROCOPY 0x4000000
8672 #endif
8773
88
-#define FILE_SZ (1UL << 35)
74
+#define FILE_SZ (1ULL << 35)
8975 static int cfg_family = AF_INET6;
9076 static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
9177 static int cfg_port = 8787;
....@@ -96,7 +82,9 @@
9682 static int xflg; /* hash received data (simple xor) (-h option) */
9783 static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
9884
99
-static int chunk_size = 512*1024;
85
+static size_t chunk_size = 512*1024;
86
+
87
+static size_t map_align;
10088
10189 unsigned long htotal;
10290
....@@ -132,6 +120,31 @@
132120 htotal = temp;
133121 }
134122
123
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
124
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
125
+
126
+
127
+static void *mmap_large_buffer(size_t need, size_t *allocated)
128
+{
129
+ void *buffer;
130
+ size_t sz;
131
+
132
+ /* Attempt to use huge pages if possible. */
133
+ sz = ALIGN_UP(need, map_align);
134
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
135
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
136
+
137
+ if (buffer == (void *)-1) {
138
+ sz = need;
139
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
140
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
141
+ if (buffer != (void *)-1)
142
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
143
+ }
144
+ *allocated = sz;
145
+ return buffer;
146
+}
147
+
135148 void *child_thread(void *arg)
136149 {
137150 unsigned long total_mmap = 0, total = 0;
....@@ -140,9 +153,11 @@
140153 int flags = MAP_SHARED;
141154 struct timeval t0, t1;
142155 char *buffer = NULL;
156
+ void *raddr = NULL;
143157 void *addr = NULL;
144158 double throughput;
145159 struct rusage ru;
160
+ size_t buffer_sz;
146161 int lu, fd;
147162
148163 fd = (int)(unsigned long)arg;
....@@ -150,15 +165,19 @@
150165 gettimeofday(&t0, NULL);
151166
152167 fcntl(fd, F_SETFL, O_NDELAY);
153
- buffer = malloc(chunk_size);
154
- if (!buffer) {
155
- perror("malloc");
168
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
169
+ if (buffer == (void *)-1) {
170
+ perror("mmap");
156171 goto error;
157172 }
158173 if (zflg) {
159
- addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
160
- if (addr == (void *)-1)
174
+ raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0);
175
+ if (raddr == (void *)-1) {
176
+ perror("mmap");
161177 zflg = 0;
178
+ } else {
179
+ addr = ALIGN_PTR_UP(raddr, map_align);
180
+ }
162181 }
163182 while (1) {
164183 struct pollfd pfd = { .fd = fd, .events = POLLIN, };
....@@ -169,9 +188,10 @@
169188 socklen_t zc_len = sizeof(zc);
170189 int res;
171190
172
- zc.address = (__u64)addr;
191
+ memset(&zc, 0, sizeof(zc));
192
+ zc.address = (__u64)((unsigned long)addr);
173193 zc.length = chunk_size;
174
- zc.recv_skip_hint = 0;
194
+
175195 res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
176196 &zc, &zc_len);
177197 if (res == -1)
....@@ -182,6 +202,10 @@
182202 total_mmap += zc.length;
183203 if (xflg)
184204 hash_zone(addr, zc.length);
205
+ /* It is more efficient to unmap the pages right now,
206
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
207
+ */
208
+ madvise(addr, zc.length, MADV_DONTNEED);
185209 total += zc.length;
186210 }
187211 if (zc.recv_skip_hint) {
....@@ -233,10 +257,10 @@
233257 ru.ru_nvcsw);
234258 }
235259 error:
236
- free(buffer);
260
+ munmap(buffer, buffer_sz);
237261 close(fd);
238262 if (zflg)
239
- munmap(addr, chunk_size);
263
+ munmap(raddr, chunk_size + map_align);
240264 pthread_exit(0);
241265 }
242266
....@@ -284,8 +308,15 @@
284308
285309 static void do_accept(int fdlisten)
286310 {
311
+ pthread_attr_t attr;
312
+ int rcvlowat;
313
+
314
+ pthread_attr_init(&attr);
315
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
316
+
317
+ rcvlowat = chunk_size;
287318 if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
288
- &chunk_size, sizeof(chunk_size)) == -1) {
319
+ &rcvlowat, sizeof(rcvlowat)) == -1) {
289320 perror("setsockopt SO_RCVLOWAT");
290321 }
291322
....@@ -302,7 +333,7 @@
302333 perror("accept");
303334 continue;
304335 }
305
- res = pthread_create(&th, NULL, child_thread,
336
+ res = pthread_create(&th, &attr, child_thread,
306337 (void *)(unsigned long)fd);
307338 if (res) {
308339 errno = res;
....@@ -312,18 +343,43 @@
312343 }
313344 }
314345
346
+/* Each thread should reserve a big enough vma to avoid
347
+ * spinlock collisions in ptl locks.
348
+ * This size is 2MB on x86_64, and is exported in /proc/meminfo.
349
+ */
350
+static unsigned long default_huge_page_size(void)
351
+{
352
+ FILE *f = fopen("/proc/meminfo", "r");
353
+ unsigned long hps = 0;
354
+ size_t linelen = 0;
355
+ char *line = NULL;
356
+
357
+ if (!f)
358
+ return 0;
359
+ while (getline(&line, &linelen, f) > 0) {
360
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
361
+ hps <<= 10;
362
+ break;
363
+ }
364
+ }
365
+ free(line);
366
+ fclose(f);
367
+ return hps;
368
+}
369
+
315370 int main(int argc, char *argv[])
316371 {
317372 struct sockaddr_storage listenaddr, addr;
318373 unsigned int max_pacing_rate = 0;
319
- unsigned long total = 0;
374
+ uint64_t total = 0;
320375 char *host = NULL;
321376 int fd, c, on = 1;
377
+ size_t buffer_sz;
322378 char *buffer;
323379 int sflg = 0;
324380 int mss = 0;
325381
326
- while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
382
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
327383 switch (c) {
328384 case '4':
329385 cfg_family = PF_INET;
....@@ -363,9 +419,23 @@
363419 case 'P':
364420 max_pacing_rate = atoi(optarg) ;
365421 break;
422
+ case 'C':
423
+ chunk_size = atol(optarg);
424
+ break;
425
+ case 'a':
426
+ map_align = atol(optarg);
427
+ break;
366428 default:
367429 exit(1);
368430 }
431
+ }
432
+ if (!map_align) {
433
+ map_align = default_huge_page_size();
434
+ /* if really /proc/meminfo is not helping,
435
+ * we use the default x86_64 hugepagesize.
436
+ */
437
+ if (!map_align)
438
+ map_align = 2*1024*1024;
369439 }
370440 if (sflg) {
371441 int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
....@@ -395,8 +465,8 @@
395465 }
396466 do_accept(fdlisten);
397467 }
398
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
399
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
468
+
469
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
400470 if (buffer == (char *)-1) {
401471 perror("mmap");
402472 exit(1);
....@@ -431,17 +501,17 @@
431501 zflg = 0;
432502 }
433503 while (total < FILE_SZ) {
434
- long wr = FILE_SZ - total;
504
+ int64_t wr = FILE_SZ - total;
435505
436506 if (wr > chunk_size)
437507 wr = chunk_size;
438508 /* Note : we just want to fill the pipe with 0 bytes */
439
- wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
509
+ wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
440510 if (wr <= 0)
441511 break;
442512 total += wr;
443513 }
444514 close(fd);
445
- munmap(buffer, chunk_size);
515
+ munmap(buffer, buffer_sz);
446516 return 0;
447517 }