| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0-only  | 
|---|
| 1 | 2 |  /* bpf/cpumap.c | 
|---|
| 2 | 3 |   * | 
|---|
| 3 | 4 |   * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. | 
|---|
| 4 |  | - * Released under terms in GPL version 2.  See COPYING.  | 
|---|
| 5 | 5 |   */ | 
|---|
| 6 | 6 |   | 
|---|
| 7 | 7 |  /* The 'cpumap' is primarily used as a backend map for XDP BPF helper | 
|---|
| .. | .. | 
|---|
| 32 | 32 |   | 
|---|
| 33 | 33 |  /* General idea: XDP packets getting XDP redirected to another CPU, | 
|---|
| 34 | 34 |   * will maximum be stored/queued for one driver ->poll() call.  It is | 
|---|
| 35 |  | - * guaranteed that setting flush bit and flush operation happen on  | 
|---|
 | 35 | + * guaranteed that queueing the frame and the flush operation happen on  | 
|---|
| 36 | 36 |   * same CPU.  Thus, cpu_map_flush operation can deduct via this_cpu_ptr() | 
|---|
| 37 | 37 |   * which queue in bpf_cpu_map_entry contains packets. | 
|---|
| 38 | 38 |   */ | 
|---|
| 39 | 39 |   | 
|---|
| 40 | 40 |  #define CPU_MAP_BULK_SIZE 8  /* 8 == one cacheline on 64-bit archs */ | 
|---|
 | 41 | +struct bpf_cpu_map_entry;  | 
|---|
 | 42 | +struct bpf_cpu_map;  | 
|---|
 | 43 | +  | 
|---|
| 41 | 44 |  struct xdp_bulk_queue { | 
|---|
| 42 | 45 |  	void *q[CPU_MAP_BULK_SIZE]; | 
|---|
 | 46 | +	struct list_head flush_node;  | 
|---|
 | 47 | +	struct bpf_cpu_map_entry *obj;  | 
|---|
| 43 | 48 |  	unsigned int count; | 
|---|
| 44 | 49 |  }; | 
|---|
| 45 | 50 |   | 
|---|
| .. | .. | 
|---|
| 47 | 52 |  struct bpf_cpu_map_entry { | 
|---|
| 48 | 53 |  	u32 cpu;    /* kthread CPU and map index */ | 
|---|
| 49 | 54 |  	int map_id; /* Back reference to map */ | 
|---|
| 50 |  | -	u32 qsize;  /* Queue size placeholder for map lookup */  | 
|---|
| 51 | 55 |   | 
|---|
| 52 | 56 |  	/* XDP can run multiple RX-ring queues, need __percpu enqueue store */ | 
|---|
| 53 | 57 |  	struct xdp_bulk_queue __percpu *bulkq; | 
|---|
| 54 | 58 |   | 
|---|
 | 59 | +	struct bpf_cpu_map *cmap;  | 
|---|
 | 60 | +  | 
|---|
| 55 | 61 |  	/* Queue with potential multi-producers, and single-consumer kthread */ | 
|---|
| 56 | 62 |  	struct ptr_ring *queue; | 
|---|
| 57 | 63 |  	struct task_struct *kthread; | 
|---|
| 58 |  | -	struct work_struct kthread_stop_wq;  | 
|---|
 | 64 | +  | 
|---|
 | 65 | +	struct bpf_cpumap_val value;  | 
|---|
 | 66 | +	struct bpf_prog *prog;  | 
|---|
| 59 | 67 |   | 
|---|
| 60 | 68 |  	atomic_t refcnt; /* Control when this struct can be free'ed */ | 
|---|
| 61 | 69 |  	struct rcu_head rcu; | 
|---|
 | 70 | +  | 
|---|
 | 71 | +	struct work_struct kthread_stop_wq;  | 
|---|
| 62 | 72 |  }; | 
|---|
| 63 | 73 |   | 
|---|
| 64 | 74 |  struct bpf_cpu_map { | 
|---|
| 65 | 75 |  	struct bpf_map map; | 
|---|
| 66 | 76 |  	/* Below members specific for map type */ | 
|---|
| 67 | 77 |  	struct bpf_cpu_map_entry **cpu_map; | 
|---|
| 68 |  | -	unsigned long __percpu *flush_needed;  | 
|---|
| 69 | 78 |  }; | 
|---|
| 70 | 79 |   | 
|---|
| 71 |  | -static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,  | 
|---|
| 72 |  | -			     struct xdp_bulk_queue *bq, bool in_napi_ctx);  | 
|---|
| 73 |  | -  | 
|---|
| 74 |  | -static u64 cpu_map_bitmap_size(const union bpf_attr *attr)  | 
|---|
| 75 |  | -{  | 
|---|
| 76 |  | -	return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);  | 
|---|
| 77 |  | -}  | 
|---|
 | 80 | +static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);  | 
|---|
| 78 | 81 |   | 
|---|
| 79 | 82 |  static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) | 
|---|
| 80 | 83 |  { | 
|---|
 | 84 | +	u32 value_size = attr->value_size;  | 
|---|
| 81 | 85 |  	struct bpf_cpu_map *cmap; | 
|---|
| 82 | 86 |  	int err = -ENOMEM; | 
|---|
| 83 | 87 |  	u64 cost; | 
|---|
| 84 | 88 |  	int ret; | 
|---|
| 85 | 89 |   | 
|---|
| 86 |  | -	if (!capable(CAP_SYS_ADMIN))  | 
|---|
 | 90 | +	if (!bpf_capable())  | 
|---|
| 87 | 91 |  		return ERR_PTR(-EPERM); | 
|---|
| 88 | 92 |   | 
|---|
| 89 | 93 |  	/* check sanity of attributes */ | 
|---|
| 90 | 94 |  	if (attr->max_entries == 0 || attr->key_size != 4 || | 
|---|
| 91 |  | -	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)  | 
|---|
 | 95 | +	    (value_size != offsetofend(struct bpf_cpumap_val, qsize) &&  | 
|---|
 | 96 | +	     value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||  | 
|---|
 | 97 | +	    attr->map_flags & ~BPF_F_NUMA_NODE)  | 
|---|
| 92 | 98 |  		return ERR_PTR(-EINVAL); | 
|---|
| 93 | 99 |   | 
|---|
| 94 | 100 |  	cmap = kzalloc(sizeof(*cmap), GFP_USER); | 
|---|
| .. | .. | 
|---|
| 105 | 111 |   | 
|---|
| 106 | 112 |  	/* make sure page count doesn't overflow */ | 
|---|
| 107 | 113 |  	cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); | 
|---|
| 108 |  | -	cost += cpu_map_bitmap_size(attr) * num_possible_cpus();  | 
|---|
| 109 |  | -	if (cost >= U32_MAX - PAGE_SIZE)  | 
|---|
| 110 |  | -		goto free_cmap;  | 
|---|
| 111 |  | -	cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
| 112 | 114 |   | 
|---|
| 113 | 115 |  	/* Notice returns -EPERM on if map size is larger than memlock limit */ | 
|---|
| 114 |  | -	ret = bpf_map_precharge_memlock(cmap->map.pages);  | 
|---|
 | 116 | +	ret = bpf_map_charge_init(&cmap->map.memory, cost);  | 
|---|
| 115 | 117 |  	if (ret) { | 
|---|
| 116 | 118 |  		err = ret; | 
|---|
| 117 | 119 |  		goto free_cmap; | 
|---|
| 118 | 120 |  	} | 
|---|
| 119 |  | -  | 
|---|
| 120 |  | -	/* A per cpu bitfield with a bit per possible CPU in map  */  | 
|---|
| 121 |  | -	cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),  | 
|---|
| 122 |  | -					    __alignof__(unsigned long));  | 
|---|
| 123 |  | -	if (!cmap->flush_needed)  | 
|---|
| 124 |  | -		goto free_cmap;  | 
|---|
| 125 | 121 |   | 
|---|
| 126 | 122 |  	/* Alloc array for possible remote "destination" CPUs */ | 
|---|
| 127 | 123 |  	cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * | 
|---|
| 128 | 124 |  					   sizeof(struct bpf_cpu_map_entry *), | 
|---|
| 129 | 125 |  					   cmap->map.numa_node); | 
|---|
| 130 | 126 |  	if (!cmap->cpu_map) | 
|---|
| 131 |  | -		goto free_percpu;  | 
|---|
 | 127 | +		goto free_charge;  | 
|---|
| 132 | 128 |   | 
|---|
| 133 | 129 |  	return &cmap->map; | 
|---|
| 134 |  | -free_percpu:  | 
|---|
| 135 |  | -	free_percpu(cmap->flush_needed);  | 
|---|
 | 130 | +free_charge:  | 
|---|
 | 131 | +	bpf_map_charge_finish(&cmap->map.memory);  | 
|---|
| 136 | 132 |  free_cmap: | 
|---|
| 137 | 133 |  	kfree(cmap); | 
|---|
| 138 | 134 |  	return ERR_PTR(err); | 
|---|
| .. | .. | 
|---|
| 159 | 155 |  	kthread_stop(rcpu->kthread); | 
|---|
| 160 | 156 |  } | 
|---|
| 161 | 157 |   | 
|---|
| 162 |  | -static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,  | 
|---|
| 163 |  | -					 struct xdp_frame *xdpf)  | 
|---|
 | 158 | +static struct sk_buff *cpu_map_build_skb(struct xdp_frame *xdpf,  | 
|---|
 | 159 | +					 struct sk_buff *skb)  | 
|---|
| 164 | 160 |  { | 
|---|
| 165 | 161 |  	unsigned int hard_start_headroom; | 
|---|
| 166 | 162 |  	unsigned int frame_size; | 
|---|
| 167 | 163 |  	void *pkt_data_start; | 
|---|
| 168 |  | -	struct sk_buff *skb;  | 
|---|
| 169 | 164 |   | 
|---|
| 170 | 165 |  	/* Part of headroom was reserved to xdpf */ | 
|---|
| 171 | 166 |  	hard_start_headroom = sizeof(struct xdp_frame) +  xdpf->headroom; | 
|---|
| 172 | 167 |   | 
|---|
| 173 |  | -	/* build_skb need to place skb_shared_info after SKB end, and  | 
|---|
| 174 |  | -	 * also want to know the memory "truesize".  Thus, need to  | 
|---|
| 175 |  | -	 * know the memory frame size backing xdp_buff.  | 
|---|
| 176 |  | -	 *  | 
|---|
| 177 |  | -	 * XDP was designed to have PAGE_SIZE frames, but this  | 
|---|
| 178 |  | -	 * assumption is not longer true with ixgbe and i40e.  It  | 
|---|
| 179 |  | -	 * would be preferred to set frame_size to 2048 or 4096  | 
|---|
| 180 |  | -	 * depending on the driver.  | 
|---|
| 181 |  | -	 *   frame_size = 2048;  | 
|---|
| 182 |  | -	 *   frame_len  = frame_size - sizeof(*xdp_frame);  | 
|---|
| 183 |  | -	 *  | 
|---|
| 184 |  | -	 * Instead, with info avail, skb_shared_info in placed after  | 
|---|
| 185 |  | -	 * packet len.  This, unfortunately fakes the truesize.  | 
|---|
| 186 |  | -	 * Another disadvantage of this approach, the skb_shared_info  | 
|---|
| 187 |  | -	 * is not at a fixed memory location, with mixed length  | 
|---|
| 188 |  | -	 * packets, which is bad for cache-line hotness.  | 
|---|
 | 168 | +	/* Memory size backing xdp_frame data already have reserved  | 
|---|
 | 169 | +	 * room for build_skb to place skb_shared_info in tailroom.  | 
|---|
| 189 | 170 |  	 */ | 
|---|
| 190 |  | -	frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +  | 
|---|
| 191 |  | -		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));  | 
|---|
 | 171 | +	frame_size = xdpf->frame_sz;  | 
|---|
| 192 | 172 |   | 
|---|
| 193 | 173 |  	pkt_data_start = xdpf->data - hard_start_headroom; | 
|---|
| 194 |  | -	skb = build_skb(pkt_data_start, frame_size);  | 
|---|
| 195 |  | -	if (!skb)  | 
|---|
 | 174 | +	skb = build_skb_around(skb, pkt_data_start, frame_size);  | 
|---|
 | 175 | +	if (unlikely(!skb))  | 
|---|
| 196 | 176 |  		return NULL; | 
|---|
| 197 | 177 |   | 
|---|
| 198 | 178 |  	skb_reserve(skb, hard_start_headroom); | 
|---|
| .. | .. | 
|---|
| 208 | 188 |  	 * - HW RX hash			(skb_set_hash) | 
|---|
| 209 | 189 |  	 * - RX ring dev queue index	(skb_record_rx_queue) | 
|---|
| 210 | 190 |  	 */ | 
|---|
 | 191 | +  | 
|---|
 | 192 | +	/* Until page_pool get SKB return path, release DMA here */  | 
|---|
 | 193 | +	xdp_release_frame(xdpf);  | 
|---|
| 211 | 194 |   | 
|---|
| 212 | 195 |  	/* Allow SKB to reuse area used by xdp_frame */ | 
|---|
| 213 | 196 |  	xdp_scrub_frame(xdpf); | 
|---|
| .. | .. | 
|---|
| 232 | 215 |  static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) | 
|---|
| 233 | 216 |  { | 
|---|
| 234 | 217 |  	if (atomic_dec_and_test(&rcpu->refcnt)) { | 
|---|
 | 218 | +		if (rcpu->prog)  | 
|---|
 | 219 | +			bpf_prog_put(rcpu->prog);  | 
|---|
| 235 | 220 |  		/* The queue should be empty at this point */ | 
|---|
| 236 | 221 |  		__cpu_map_ring_cleanup(rcpu->queue); | 
|---|
| 237 | 222 |  		ptr_ring_cleanup(rcpu->queue, NULL); | 
|---|
| .. | .. | 
|---|
| 239 | 224 |  		kfree(rcpu); | 
|---|
| 240 | 225 |  	} | 
|---|
| 241 | 226 |  } | 
|---|
 | 227 | +  | 
|---|
 | 228 | +static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,  | 
|---|
 | 229 | +				    void **frames, int n,  | 
|---|
 | 230 | +				    struct xdp_cpumap_stats *stats)  | 
|---|
 | 231 | +{  | 
|---|
 | 232 | +	struct xdp_rxq_info rxq;  | 
|---|
 | 233 | +	struct xdp_buff xdp;  | 
|---|
 | 234 | +	int i, nframes = 0;  | 
|---|
 | 235 | +  | 
|---|
 | 236 | +	if (!rcpu->prog)  | 
|---|
 | 237 | +		return n;  | 
|---|
 | 238 | +  | 
|---|
 | 239 | +	rcu_read_lock_bh();  | 
|---|
 | 240 | +  | 
|---|
 | 241 | +	xdp_set_return_frame_no_direct();  | 
|---|
 | 242 | +	xdp.rxq = &rxq;  | 
|---|
 | 243 | +  | 
|---|
 | 244 | +	for (i = 0; i < n; i++) {  | 
|---|
 | 245 | +		struct xdp_frame *xdpf = frames[i];  | 
|---|
 | 246 | +		u32 act;  | 
|---|
 | 247 | +		int err;  | 
|---|
 | 248 | +  | 
|---|
 | 249 | +		rxq.dev = xdpf->dev_rx;  | 
|---|
 | 250 | +		rxq.mem = xdpf->mem;  | 
|---|
 | 251 | +		/* TODO: report queue_index to xdp_rxq_info */  | 
|---|
 | 252 | +  | 
|---|
 | 253 | +		xdp_convert_frame_to_buff(xdpf, &xdp);  | 
|---|
 | 254 | +  | 
|---|
 | 255 | +		act = bpf_prog_run_xdp(rcpu->prog, &xdp);  | 
|---|
 | 256 | +		switch (act) {  | 
|---|
 | 257 | +		case XDP_PASS:  | 
|---|
 | 258 | +			err = xdp_update_frame_from_buff(&xdp, xdpf);  | 
|---|
 | 259 | +			if (err < 0) {  | 
|---|
 | 260 | +				xdp_return_frame(xdpf);  | 
|---|
 | 261 | +				stats->drop++;  | 
|---|
 | 262 | +			} else {  | 
|---|
 | 263 | +				frames[nframes++] = xdpf;  | 
|---|
 | 264 | +				stats->pass++;  | 
|---|
 | 265 | +			}  | 
|---|
 | 266 | +			break;  | 
|---|
 | 267 | +		case XDP_REDIRECT:  | 
|---|
 | 268 | +			err = xdp_do_redirect(xdpf->dev_rx, &xdp,  | 
|---|
 | 269 | +					      rcpu->prog);  | 
|---|
 | 270 | +			if (unlikely(err)) {  | 
|---|
 | 271 | +				xdp_return_frame(xdpf);  | 
|---|
 | 272 | +				stats->drop++;  | 
|---|
 | 273 | +			} else {  | 
|---|
 | 274 | +				stats->redirect++;  | 
|---|
 | 275 | +			}  | 
|---|
 | 276 | +			break;  | 
|---|
 | 277 | +		default:  | 
|---|
 | 278 | +			bpf_warn_invalid_xdp_action(act);  | 
|---|
 | 279 | +			fallthrough;  | 
|---|
 | 280 | +		case XDP_DROP:  | 
|---|
 | 281 | +			xdp_return_frame(xdpf);  | 
|---|
 | 282 | +			stats->drop++;  | 
|---|
 | 283 | +			break;  | 
|---|
 | 284 | +		}  | 
|---|
 | 285 | +	}  | 
|---|
 | 286 | +  | 
|---|
 | 287 | +	if (stats->redirect)  | 
|---|
 | 288 | +		xdp_do_flush_map();  | 
|---|
 | 289 | +  | 
|---|
 | 290 | +	xdp_clear_return_frame_no_direct();  | 
|---|
 | 291 | +  | 
|---|
 | 292 | +	rcu_read_unlock_bh(); /* resched point, may call do_softirq() */  | 
|---|
 | 293 | +  | 
|---|
 | 294 | +	return nframes;  | 
|---|
 | 295 | +}  | 
|---|
 | 296 | +  | 
|---|
 | 297 | +#define CPUMAP_BATCH 8  | 
|---|
| 242 | 298 |   | 
|---|
| 243 | 299 |  static int cpu_map_kthread_run(void *data) | 
|---|
| 244 | 300 |  { | 
|---|
| .. | .. | 
|---|
| 252 | 308 |  	 * kthread_stop signal until queue is empty. | 
|---|
| 253 | 309 |  	 */ | 
|---|
| 254 | 310 |  	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { | 
|---|
| 255 |  | -		unsigned int processed = 0, drops = 0, sched = 0;  | 
|---|
| 256 |  | -		struct xdp_frame *xdpf;  | 
|---|
 | 311 | +		struct xdp_cpumap_stats stats = {}; /* zero stats */  | 
|---|
 | 312 | +		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;  | 
|---|
 | 313 | +		unsigned int drops = 0, sched = 0;  | 
|---|
 | 314 | +		void *frames[CPUMAP_BATCH];  | 
|---|
 | 315 | +		void *skbs[CPUMAP_BATCH];  | 
|---|
 | 316 | +		int i, n, m, nframes;  | 
|---|
| 257 | 317 |   | 
|---|
| 258 | 318 |  		/* Release CPU reschedule checks */ | 
|---|
| 259 | 319 |  		if (__ptr_ring_empty(rcpu->queue)) { | 
|---|
| .. | .. | 
|---|
| 269 | 329 |  			sched = cond_resched(); | 
|---|
| 270 | 330 |  		} | 
|---|
| 271 | 331 |   | 
|---|
| 272 |  | -		/* Process packets in rcpu->queue */  | 
|---|
| 273 |  | -		local_bh_disable();  | 
|---|
| 274 | 332 |  		/* | 
|---|
| 275 | 333 |  		 * The bpf_cpu_map_entry is single consumer, with this | 
|---|
| 276 | 334 |  		 * kthread CPU pinned. Lockless access to ptr_ring | 
|---|
| 277 | 335 |  		 * consume side valid as no-resize allowed of queue. | 
|---|
| 278 | 336 |  		 */ | 
|---|
| 279 |  | -		while ((xdpf = __ptr_ring_consume(rcpu->queue))) {  | 
|---|
| 280 |  | -			struct sk_buff *skb;  | 
|---|
 | 337 | +		n = __ptr_ring_consume_batched(rcpu->queue, frames,  | 
|---|
 | 338 | +					       CPUMAP_BATCH);  | 
|---|
 | 339 | +		for (i = 0; i < n; i++) {  | 
|---|
 | 340 | +			void *f = frames[i];  | 
|---|
 | 341 | +			struct page *page = virt_to_page(f);  | 
|---|
 | 342 | +  | 
|---|
 | 343 | +			/* Bring struct page memory area to curr CPU. Read by  | 
|---|
 | 344 | +			 * build_skb_around via page_is_pfmemalloc(), and when  | 
|---|
 | 345 | +			 * freed written by page_frag_free call.  | 
|---|
 | 346 | +			 */  | 
|---|
 | 347 | +			prefetchw(page);  | 
|---|
 | 348 | +		}  | 
|---|
 | 349 | +  | 
|---|
 | 350 | +		/* Support running another XDP prog on this CPU */  | 
|---|
 | 351 | +		nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);  | 
|---|
 | 352 | +		if (nframes) {  | 
|---|
 | 353 | +			m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);  | 
|---|
 | 354 | +			if (unlikely(m == 0)) {  | 
|---|
 | 355 | +				for (i = 0; i < nframes; i++)  | 
|---|
 | 356 | +					skbs[i] = NULL; /* effect: xdp_return_frame */  | 
|---|
 | 357 | +				drops += nframes;  | 
|---|
 | 358 | +			}  | 
|---|
 | 359 | +		}  | 
|---|
 | 360 | +  | 
|---|
 | 361 | +		local_bh_disable();  | 
|---|
 | 362 | +		for (i = 0; i < nframes; i++) {  | 
|---|
 | 363 | +			struct xdp_frame *xdpf = frames[i];  | 
|---|
 | 364 | +			struct sk_buff *skb = skbs[i];  | 
|---|
| 281 | 365 |  			int ret; | 
|---|
| 282 | 366 |   | 
|---|
| 283 |  | -			skb = cpu_map_build_skb(rcpu, xdpf);  | 
|---|
 | 367 | +			skb = cpu_map_build_skb(xdpf, skb);  | 
|---|
| 284 | 368 |  			if (!skb) { | 
|---|
| 285 | 369 |  				xdp_return_frame(xdpf); | 
|---|
| 286 | 370 |  				continue; | 
|---|
| .. | .. | 
|---|
| 290 | 374 |  			ret = netif_receive_skb_core(skb); | 
|---|
| 291 | 375 |  			if (ret == NET_RX_DROP) | 
|---|
| 292 | 376 |  				drops++; | 
|---|
| 293 |  | -  | 
|---|
| 294 |  | -			/* Limit BH-disable period */  | 
|---|
| 295 |  | -			if (++processed == 8)  | 
|---|
| 296 |  | -				break;  | 
|---|
| 297 | 377 |  		} | 
|---|
| 298 | 378 |  		/* Feedback loop via tracepoint */ | 
|---|
| 299 |  | -		trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);  | 
|---|
 | 379 | +		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);  | 
|---|
| 300 | 380 |   | 
|---|
| 301 | 381 |  		local_bh_enable(); /* resched point, may call do_softirq() */ | 
|---|
| 302 | 382 |  	} | 
|---|
| .. | .. | 
|---|
| 306 | 386 |  	return 0; | 
|---|
| 307 | 387 |  } | 
|---|
| 308 | 388 |   | 
|---|
| 309 |  | -static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,  | 
|---|
| 310 |  | -						       int map_id)  | 
|---|
 | 389 | +bool cpu_map_prog_allowed(struct bpf_map *map)  | 
|---|
| 311 | 390 |  { | 
|---|
 | 391 | +	return map->map_type == BPF_MAP_TYPE_CPUMAP &&  | 
|---|
 | 392 | +	       map->value_size != offsetofend(struct bpf_cpumap_val, qsize);  | 
|---|
 | 393 | +}  | 
|---|
 | 394 | +  | 
|---|
 | 395 | +static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)  | 
|---|
 | 396 | +{  | 
|---|
 | 397 | +	struct bpf_prog *prog;  | 
|---|
 | 398 | +  | 
|---|
 | 399 | +	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);  | 
|---|
 | 400 | +	if (IS_ERR(prog))  | 
|---|
 | 401 | +		return PTR_ERR(prog);  | 
|---|
 | 402 | +  | 
|---|
 | 403 | +	if (prog->expected_attach_type != BPF_XDP_CPUMAP) {  | 
|---|
 | 404 | +		bpf_prog_put(prog);  | 
|---|
 | 405 | +		return -EINVAL;  | 
|---|
 | 406 | +	}  | 
|---|
 | 407 | +  | 
|---|
 | 408 | +	rcpu->value.bpf_prog.id = prog->aux->id;  | 
|---|
 | 409 | +	rcpu->prog = prog;  | 
|---|
 | 410 | +  | 
|---|
 | 411 | +	return 0;  | 
|---|
 | 412 | +}  | 
|---|
 | 413 | +  | 
|---|
 | 414 | +static struct bpf_cpu_map_entry *  | 
|---|
 | 415 | +__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)  | 
|---|
 | 416 | +{  | 
|---|
 | 417 | +	int numa, err, i, fd = value->bpf_prog.fd;  | 
|---|
| 312 | 418 |  	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; | 
|---|
| 313 | 419 |  	struct bpf_cpu_map_entry *rcpu; | 
|---|
| 314 |  | -	int numa, err;  | 
|---|
 | 420 | +	struct xdp_bulk_queue *bq;  | 
|---|
| 315 | 421 |   | 
|---|
| 316 | 422 |  	/* Have map->numa_node, but choose node of redirect target CPU */ | 
|---|
| 317 | 423 |  	numa = cpu_to_node(cpu); | 
|---|
| .. | .. | 
|---|
| 326 | 432 |  	if (!rcpu->bulkq) | 
|---|
| 327 | 433 |  		goto free_rcu; | 
|---|
| 328 | 434 |   | 
|---|
 | 435 | +	for_each_possible_cpu(i) {  | 
|---|
 | 436 | +		bq = per_cpu_ptr(rcpu->bulkq, i);  | 
|---|
 | 437 | +		bq->obj = rcpu;  | 
|---|
 | 438 | +	}  | 
|---|
 | 439 | +  | 
|---|
| 329 | 440 |  	/* Alloc queue */ | 
|---|
| 330 | 441 |  	rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa); | 
|---|
| 331 | 442 |  	if (!rcpu->queue) | 
|---|
| 332 | 443 |  		goto free_bulkq; | 
|---|
| 333 | 444 |   | 
|---|
| 334 |  | -	err = ptr_ring_init(rcpu->queue, qsize, gfp);  | 
|---|
 | 445 | +	err = ptr_ring_init(rcpu->queue, value->qsize, gfp);  | 
|---|
| 335 | 446 |  	if (err) | 
|---|
| 336 | 447 |  		goto free_queue; | 
|---|
| 337 | 448 |   | 
|---|
| 338 | 449 |  	rcpu->cpu    = cpu; | 
|---|
| 339 | 450 |  	rcpu->map_id = map_id; | 
|---|
| 340 |  | -	rcpu->qsize  = qsize;  | 
|---|
 | 451 | +	rcpu->value.qsize  = value->qsize;  | 
|---|
 | 452 | +  | 
|---|
 | 453 | +	if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))  | 
|---|
 | 454 | +		goto free_ptr_ring;  | 
|---|
| 341 | 455 |   | 
|---|
| 342 | 456 |  	/* Setup kthread */ | 
|---|
| 343 | 457 |  	rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, | 
|---|
| 344 | 458 |  					       "cpumap/%d/map:%d", cpu, map_id); | 
|---|
| 345 | 459 |  	if (IS_ERR(rcpu->kthread)) | 
|---|
| 346 |  | -		goto free_ptr_ring;  | 
|---|
 | 460 | +		goto free_prog;  | 
|---|
| 347 | 461 |   | 
|---|
| 348 | 462 |  	get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ | 
|---|
| 349 | 463 |  	get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ | 
|---|
| .. | .. | 
|---|
| 354 | 468 |   | 
|---|
| 355 | 469 |  	return rcpu; | 
|---|
| 356 | 470 |   | 
|---|
 | 471 | +free_prog:  | 
|---|
 | 472 | +	if (rcpu->prog)  | 
|---|
 | 473 | +		bpf_prog_put(rcpu->prog);  | 
|---|
| 357 | 474 |  free_ptr_ring: | 
|---|
| 358 | 475 |  	ptr_ring_cleanup(rcpu->queue, NULL); | 
|---|
| 359 | 476 |  free_queue: | 
|---|
| .. | .. | 
|---|
| 368 | 485 |  static void __cpu_map_entry_free(struct rcu_head *rcu) | 
|---|
| 369 | 486 |  { | 
|---|
| 370 | 487 |  	struct bpf_cpu_map_entry *rcpu; | 
|---|
| 371 |  | -	int cpu;  | 
|---|
| 372 | 488 |   | 
|---|
| 373 | 489 |  	/* This cpu_map_entry have been disconnected from map and one | 
|---|
| 374 |  | -	 * RCU graze-period have elapsed.  Thus, XDP cannot queue any  | 
|---|
 | 490 | +	 * RCU grace-period have elapsed.  Thus, XDP cannot queue any  | 
|---|
| 375 | 491 |  	 * new packets and cannot change/set flush_needed that can | 
|---|
| 376 | 492 |  	 * find this entry. | 
|---|
| 377 | 493 |  	 */ | 
|---|
| 378 | 494 |  	rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); | 
|---|
| 379 | 495 |   | 
|---|
| 380 |  | -	/* Flush remaining packets in percpu bulkq */  | 
|---|
| 381 |  | -	for_each_online_cpu(cpu) {  | 
|---|
| 382 |  | -		struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);  | 
|---|
| 383 |  | -  | 
|---|
| 384 |  | -		/* No concurrent bq_enqueue can run at this point */  | 
|---|
| 385 |  | -		bq_flush_to_queue(rcpu, bq, false);  | 
|---|
| 386 |  | -	}  | 
|---|
| 387 | 496 |  	free_percpu(rcpu->bulkq); | 
|---|
| 388 | 497 |  	/* Cannot kthread_stop() here, last put free rcpu resources */ | 
|---|
| 389 | 498 |  	put_cpu_map_entry(rcpu); | 
|---|
| .. | .. | 
|---|
| 405 | 514 |   * percpu bulkq to queue.  Due to caller map_delete_elem() disable | 
|---|
| 406 | 515 |   * preemption, cannot call kthread_stop() to make sure queue is empty. | 
|---|
| 407 | 516 |   * Instead a work_queue is started for stopping kthread, | 
|---|
| 408 |  | - * cpu_map_kthread_stop, which waits for an RCU graze period before  | 
|---|
 | 517 | + * cpu_map_kthread_stop, which waits for an RCU grace period before  | 
|---|
| 409 | 518 |   * stopping kthread, emptying the queue. | 
|---|
| 410 | 519 |   */ | 
|---|
| 411 | 520 |  static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, | 
|---|
| .. | .. | 
|---|
| 438 | 547 |  			       u64 map_flags) | 
|---|
| 439 | 548 |  { | 
|---|
| 440 | 549 |  	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); | 
|---|
 | 550 | +	struct bpf_cpumap_val cpumap_value = {};  | 
|---|
| 441 | 551 |  	struct bpf_cpu_map_entry *rcpu; | 
|---|
| 442 |  | -  | 
|---|
| 443 | 552 |  	/* Array index key correspond to CPU number */ | 
|---|
| 444 | 553 |  	u32 key_cpu = *(u32 *)key; | 
|---|
| 445 |  | -	/* Value is the queue size */  | 
|---|
| 446 |  | -	u32 qsize = *(u32 *)value;  | 
|---|
 | 554 | +  | 
|---|
 | 555 | +	memcpy(&cpumap_value, value, map->value_size);  | 
|---|
| 447 | 556 |   | 
|---|
| 448 | 557 |  	if (unlikely(map_flags > BPF_EXIST)) | 
|---|
| 449 | 558 |  		return -EINVAL; | 
|---|
| .. | .. | 
|---|
| 451 | 560 |  		return -E2BIG; | 
|---|
| 452 | 561 |  	if (unlikely(map_flags == BPF_NOEXIST)) | 
|---|
| 453 | 562 |  		return -EEXIST; | 
|---|
| 454 |  | -	if (unlikely(qsize > 16384)) /* sanity limit on qsize */  | 
|---|
 | 563 | +	if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */  | 
|---|
| 455 | 564 |  		return -EOVERFLOW; | 
|---|
| 456 | 565 |   | 
|---|
| 457 | 566 |  	/* Make sure CPU is a valid possible cpu */ | 
|---|
| 458 | 567 |  	if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu)) | 
|---|
| 459 | 568 |  		return -ENODEV; | 
|---|
| 460 | 569 |   | 
|---|
| 461 |  | -	if (qsize == 0) {  | 
|---|
 | 570 | +	if (cpumap_value.qsize == 0) {  | 
|---|
| 462 | 571 |  		rcpu = NULL; /* Same as deleting */ | 
|---|
| 463 | 572 |  	} else { | 
|---|
| 464 | 573 |  		/* Updating qsize cause re-allocation of bpf_cpu_map_entry */ | 
|---|
| 465 |  | -		rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);  | 
|---|
 | 574 | +		rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);  | 
|---|
| 466 | 575 |  		if (!rcpu) | 
|---|
| 467 | 576 |  			return -ENOMEM; | 
|---|
 | 577 | +		rcpu->cmap = cmap;  | 
|---|
| 468 | 578 |  	} | 
|---|
| 469 | 579 |  	rcu_read_lock(); | 
|---|
| 470 | 580 |  	__cpu_map_entry_replace(cmap, key_cpu, rcpu); | 
|---|
| .. | .. | 
|---|
| 475 | 585 |  static void cpu_map_free(struct bpf_map *map) | 
|---|
| 476 | 586 |  { | 
|---|
| 477 | 587 |  	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); | 
|---|
| 478 |  | -	int cpu;  | 
|---|
| 479 | 588 |  	u32 i; | 
|---|
| 480 | 589 |   | 
|---|
| 481 | 590 |  	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | 
|---|
| .. | .. | 
|---|
| 490 | 599 |  	bpf_clear_redirect_map(map); | 
|---|
| 491 | 600 |  	synchronize_rcu(); | 
|---|
| 492 | 601 |   | 
|---|
| 493 |  | -	/* To ensure all pending flush operations have completed wait for flush  | 
|---|
| 494 |  | -	 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.  | 
|---|
| 495 |  | -	 * Because the above synchronize_rcu() ensures the map is disconnected  | 
|---|
| 496 |  | -	 * from the program we can assume no new bits will be set.  | 
|---|
| 497 |  | -	 */  | 
|---|
| 498 |  | -	for_each_online_cpu(cpu) {  | 
|---|
| 499 |  | -		unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu);  | 
|---|
| 500 |  | -  | 
|---|
| 501 |  | -		while (!bitmap_empty(bitmap, cmap->map.max_entries))  | 
|---|
| 502 |  | -			cond_resched();  | 
|---|
| 503 |  | -	}  | 
|---|
| 504 |  | -  | 
|---|
| 505 | 602 |  	/* For cpu_map the remote CPUs can still be using the entries | 
|---|
| 506 | 603 |  	 * (struct bpf_cpu_map_entry). | 
|---|
| 507 | 604 |  	 */ | 
|---|
| .. | .. | 
|---|
| 512 | 609 |  		if (!rcpu) | 
|---|
| 513 | 610 |  			continue; | 
|---|
| 514 | 611 |   | 
|---|
| 515 |  | -		/* bq flush and cleanup happens after RCU graze-period */  | 
|---|
 | 612 | +		/* bq flush and cleanup happens after RCU grace-period */  | 
|---|
| 516 | 613 |  		__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ | 
|---|
| 517 | 614 |  	} | 
|---|
| 518 |  | -	free_percpu(cmap->flush_needed);  | 
|---|
| 519 | 615 |  	bpf_map_area_free(cmap->cpu_map); | 
|---|
| 520 | 616 |  	kfree(cmap); | 
|---|
| 521 | 617 |  } | 
|---|
| .. | .. | 
|---|
| 537 | 633 |  	struct bpf_cpu_map_entry *rcpu = | 
|---|
| 538 | 634 |  		__cpu_map_lookup_elem(map, *(u32 *)key); | 
|---|
| 539 | 635 |   | 
|---|
| 540 |  | -	return rcpu ? &rcpu->qsize : NULL;  | 
|---|
 | 636 | +	return rcpu ? &rcpu->value : NULL;  | 
|---|
| 541 | 637 |  } | 
|---|
| 542 | 638 |   | 
|---|
| 543 | 639 |  static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | 
|---|
| .. | .. | 
|---|
| 557 | 653 |  	return 0; | 
|---|
| 558 | 654 |  } | 
|---|
| 559 | 655 |   | 
|---|
 | 656 | +static int cpu_map_btf_id;  | 
|---|
| 560 | 657 |  const struct bpf_map_ops cpu_map_ops = { | 
|---|
 | 658 | +	.map_meta_equal		= bpf_map_meta_equal,  | 
|---|
| 561 | 659 |  	.map_alloc		= cpu_map_alloc, | 
|---|
| 562 | 660 |  	.map_free		= cpu_map_free, | 
|---|
| 563 | 661 |  	.map_delete_elem	= cpu_map_delete_elem, | 
|---|
| .. | .. | 
|---|
| 565 | 663 |  	.map_lookup_elem	= cpu_map_lookup_elem, | 
|---|
| 566 | 664 |  	.map_get_next_key	= cpu_map_get_next_key, | 
|---|
| 567 | 665 |  	.map_check_btf		= map_check_no_btf, | 
|---|
 | 666 | +	.map_btf_name		= "bpf_cpu_map",  | 
|---|
 | 667 | +	.map_btf_id		= &cpu_map_btf_id,  | 
|---|
| 568 | 668 |  }; | 
|---|
| 569 | 669 |   | 
|---|
| 570 |  | -static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,  | 
|---|
| 571 |  | -			     struct xdp_bulk_queue *bq, bool in_napi_ctx)  | 
|---|
 | 670 | +static void bq_flush_to_queue(struct xdp_bulk_queue *bq)  | 
|---|
| 572 | 671 |  { | 
|---|
 | 672 | +	struct bpf_cpu_map_entry *rcpu = bq->obj;  | 
|---|
| 573 | 673 |  	unsigned int processed = 0, drops = 0; | 
|---|
| 574 | 674 |  	const int to_cpu = rcpu->cpu; | 
|---|
| 575 | 675 |  	struct ptr_ring *q; | 
|---|
| 576 | 676 |  	int i; | 
|---|
| 577 | 677 |   | 
|---|
| 578 | 678 |  	if (unlikely(!bq->count)) | 
|---|
| 579 |  | -		return 0;  | 
|---|
 | 679 | +		return;  | 
|---|
| 580 | 680 |   | 
|---|
| 581 | 681 |  	q = rcpu->queue; | 
|---|
| 582 | 682 |  	spin_lock(&q->producer_lock); | 
|---|
| .. | .. | 
|---|
| 588 | 688 |  		err = __ptr_ring_produce(q, xdpf); | 
|---|
| 589 | 689 |  		if (err) { | 
|---|
| 590 | 690 |  			drops++; | 
|---|
| 591 |  | -			if (likely(in_napi_ctx))  | 
|---|
| 592 |  | -				xdp_return_frame_rx_napi(xdpf);  | 
|---|
| 593 |  | -			else  | 
|---|
| 594 |  | -				xdp_return_frame(xdpf);  | 
|---|
 | 691 | +			xdp_return_frame_rx_napi(xdpf);  | 
|---|
| 595 | 692 |  		} | 
|---|
| 596 | 693 |  		processed++; | 
|---|
| 597 | 694 |  	} | 
|---|
| 598 | 695 |  	bq->count = 0; | 
|---|
| 599 | 696 |  	spin_unlock(&q->producer_lock); | 
|---|
| 600 | 697 |   | 
|---|
 | 698 | +	__list_del_clearprev(&bq->flush_node);  | 
|---|
 | 699 | +  | 
|---|
| 601 | 700 |  	/* Feedback loop via tracepoints */ | 
|---|
| 602 | 701 |  	trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu); | 
|---|
| 603 |  | -	return 0;  | 
|---|
| 604 | 702 |  } | 
|---|
| 605 | 703 |   | 
|---|
| 606 | 704 |  /* Runs under RCU-read-side, plus in softirq under NAPI protection. | 
|---|
| 607 | 705 |   * Thus, safe percpu variable access. | 
|---|
| 608 | 706 |   */ | 
|---|
| 609 |  | -static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)  | 
|---|
 | 707 | +static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)  | 
|---|
| 610 | 708 |  { | 
|---|
 | 709 | +	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);  | 
|---|
| 611 | 710 |  	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); | 
|---|
| 612 | 711 |   | 
|---|
| 613 | 712 |  	if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) | 
|---|
| 614 |  | -		bq_flush_to_queue(rcpu, bq, true);  | 
|---|
 | 713 | +		bq_flush_to_queue(bq);  | 
|---|
| 615 | 714 |   | 
|---|
| 616 | 715 |  	/* Notice, xdp_buff/page MUST be queued here, long enough for | 
|---|
| 617 | 716 |  	 * driver to code invoking us to finished, due to driver | 
|---|
| .. | .. | 
|---|
| 623 | 722 |  	 * operation, when completing napi->poll call. | 
|---|
| 624 | 723 |  	 */ | 
|---|
| 625 | 724 |  	bq->q[bq->count++] = xdpf; | 
|---|
| 626 |  | -	return 0;  | 
|---|
 | 725 | +  | 
|---|
 | 726 | +	if (!bq->flush_node.prev)  | 
|---|
 | 727 | +		list_add(&bq->flush_node, flush_list);  | 
|---|
| 627 | 728 |  } | 
|---|
| 628 | 729 |   | 
|---|
| 629 | 730 |  int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, | 
|---|
| .. | .. | 
|---|
| 631 | 732 |  { | 
|---|
| 632 | 733 |  	struct xdp_frame *xdpf; | 
|---|
| 633 | 734 |   | 
|---|
| 634 |  | -	xdpf = convert_to_xdp_frame(xdp);  | 
|---|
 | 735 | +	xdpf = xdp_convert_buff_to_frame(xdp);  | 
|---|
| 635 | 736 |  	if (unlikely(!xdpf)) | 
|---|
| 636 | 737 |  		return -EOVERFLOW; | 
|---|
| 637 | 738 |   | 
|---|
| .. | .. | 
|---|
| 642 | 743 |  	return 0; | 
|---|
| 643 | 744 |  } | 
|---|
| 644 | 745 |   | 
|---|
| 645 |  | -void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit)  | 
|---|
 | 746 | +void __cpu_map_flush(void)  | 
|---|
| 646 | 747 |  { | 
|---|
| 647 |  | -	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);  | 
|---|
| 648 |  | -	unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);  | 
|---|
 | 748 | +	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);  | 
|---|
 | 749 | +	struct xdp_bulk_queue *bq, *tmp;  | 
|---|
| 649 | 750 |   | 
|---|
| 650 |  | -	__set_bit(bit, bitmap);  | 
|---|
| 651 |  | -}  | 
|---|
| 652 |  | -  | 
|---|
| 653 |  | -void __cpu_map_flush(struct bpf_map *map)  | 
|---|
| 654 |  | -{  | 
|---|
| 655 |  | -	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);  | 
|---|
| 656 |  | -	unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);  | 
|---|
| 657 |  | -	u32 bit;  | 
|---|
| 658 |  | -  | 
|---|
| 659 |  | -	/* The napi->poll softirq makes sure __cpu_map_insert_ctx()  | 
|---|
| 660 |  | -	 * and __cpu_map_flush() happen on same CPU. Thus, the percpu  | 
|---|
| 661 |  | -	 * bitmap indicate which percpu bulkq have packets.  | 
|---|
| 662 |  | -	 */  | 
|---|
| 663 |  | -	for_each_set_bit(bit, bitmap, map->max_entries) {  | 
|---|
| 664 |  | -		struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]);  | 
|---|
| 665 |  | -		struct xdp_bulk_queue *bq;  | 
|---|
| 666 |  | -  | 
|---|
| 667 |  | -		/* This is possible if entry is removed by user space  | 
|---|
| 668 |  | -		 * between xdp redirect and flush op.  | 
|---|
| 669 |  | -		 */  | 
|---|
| 670 |  | -		if (unlikely(!rcpu))  | 
|---|
| 671 |  | -			continue;  | 
|---|
| 672 |  | -  | 
|---|
| 673 |  | -		__clear_bit(bit, bitmap);  | 
|---|
| 674 |  | -  | 
|---|
| 675 |  | -		/* Flush all frames in bulkq to real queue */  | 
|---|
| 676 |  | -		bq = this_cpu_ptr(rcpu->bulkq);  | 
|---|
| 677 |  | -		bq_flush_to_queue(rcpu, bq, true);  | 
|---|
 | 751 | +	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {  | 
|---|
 | 752 | +		bq_flush_to_queue(bq);  | 
|---|
| 678 | 753 |   | 
|---|
| 679 | 754 |  		/* If already running, costs spin_lock_irqsave + smb_mb */ | 
|---|
| 680 |  | -		wake_up_process(rcpu->kthread);  | 
|---|
 | 755 | +		wake_up_process(bq->obj->kthread);  | 
|---|
| 681 | 756 |  	} | 
|---|
| 682 | 757 |  } | 
|---|
 | 758 | +  | 
|---|
 | 759 | +static int __init cpu_map_init(void)  | 
|---|
 | 760 | +{  | 
|---|
 | 761 | +	int cpu;  | 
|---|
 | 762 | +  | 
|---|
 | 763 | +	for_each_possible_cpu(cpu)  | 
|---|
 | 764 | +		INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));  | 
|---|
 | 765 | +	return 0;  | 
|---|
 | 766 | +}  | 
|---|
 | 767 | +  | 
|---|
 | 768 | +subsys_initcall(cpu_map_init);  | 
|---|