.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * Kernel Probes (KProbes) |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License as published by |
---|
6 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
7 | | - * (at your option) any later version. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | | - * GNU General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License |
---|
15 | | - * along with this program; if not, write to the Free Software |
---|
16 | | - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
17 | 4 | * |
---|
18 | 5 | * Copyright (C) IBM Corporation, 2002, 2004 |
---|
19 | 6 | * |
---|
.. | .. |
---|
46 | 33 | #include <linux/hardirq.h> |
---|
47 | 34 | #include <linux/preempt.h> |
---|
48 | 35 | #include <linux/sched/debug.h> |
---|
| 36 | +#include <linux/perf_event.h> |
---|
49 | 37 | #include <linux/extable.h> |
---|
50 | 38 | #include <linux/kdebug.h> |
---|
51 | 39 | #include <linux/kallsyms.h> |
---|
| 40 | +#include <linux/kgdb.h> |
---|
52 | 41 | #include <linux/ftrace.h> |
---|
53 | | -#include <linux/frame.h> |
---|
54 | 42 | #include <linux/kasan.h> |
---|
55 | 43 | #include <linux/moduleloader.h> |
---|
| 44 | +#include <linux/objtool.h> |
---|
| 45 | +#include <linux/vmalloc.h> |
---|
| 46 | +#include <linux/pgtable.h> |
---|
56 | 47 | |
---|
57 | 48 | #include <asm/text-patching.h> |
---|
58 | 49 | #include <asm/cacheflush.h> |
---|
59 | 50 | #include <asm/desc.h> |
---|
60 | | -#include <asm/pgtable.h> |
---|
61 | 51 | #include <linux/uaccess.h> |
---|
62 | 52 | #include <asm/alternative.h> |
---|
63 | 53 | #include <asm/insn.h> |
---|
.. | .. |
---|
69 | 59 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
---|
70 | 60 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
---|
71 | 61 | |
---|
72 | | -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) |
---|
| 62 | +#define stack_addr(regs) ((unsigned long *)regs->sp) |
---|
73 | 63 | |
---|
74 | 64 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
---|
75 | 65 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
---|
.. | .. |
---|
132 | 122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ |
---|
133 | 123 | void synthesize_reljump(void *dest, void *from, void *to) |
---|
134 | 124 | { |
---|
135 | | - __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); |
---|
| 125 | + __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); |
---|
136 | 126 | } |
---|
137 | 127 | NOKPROBE_SYMBOL(synthesize_reljump); |
---|
138 | 128 | |
---|
139 | 129 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ |
---|
140 | 130 | void synthesize_relcall(void *dest, void *from, void *to) |
---|
141 | 131 | { |
---|
142 | | - __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); |
---|
| 132 | + __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); |
---|
143 | 133 | } |
---|
144 | 134 | NOKPROBE_SYMBOL(synthesize_relcall); |
---|
145 | | - |
---|
146 | | -/* |
---|
147 | | - * Skip the prefixes of the instruction. |
---|
148 | | - */ |
---|
149 | | -static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) |
---|
150 | | -{ |
---|
151 | | - insn_attr_t attr; |
---|
152 | | - |
---|
153 | | - attr = inat_get_opcode_attribute((insn_byte_t)*insn); |
---|
154 | | - while (inat_is_legacy_prefix(attr)) { |
---|
155 | | - insn++; |
---|
156 | | - attr = inat_get_opcode_attribute((insn_byte_t)*insn); |
---|
157 | | - } |
---|
158 | | -#ifdef CONFIG_X86_64 |
---|
159 | | - if (inat_is_rex_prefix(attr)) |
---|
160 | | - insn++; |
---|
161 | | -#endif |
---|
162 | | - return insn; |
---|
163 | | -} |
---|
164 | | -NOKPROBE_SYMBOL(skip_prefixes); |
---|
165 | 135 | |
---|
166 | 136 | /* |
---|
167 | 137 | * Returns non-zero if INSN is boostable. |
---|
.. | .. |
---|
195 | 165 | |
---|
196 | 166 | opcode = insn->opcode.bytes[0]; |
---|
197 | 167 | |
---|
198 | | - switch (opcode & 0xf0) { |
---|
199 | | - case 0x60: |
---|
200 | | - /* can't boost "bound" */ |
---|
201 | | - return (opcode != 0x62); |
---|
202 | | - case 0x70: |
---|
203 | | - return 0; /* can't boost conditional jump */ |
---|
204 | | - case 0x90: |
---|
205 | | - return opcode != 0x9a; /* can't boost call far */ |
---|
206 | | - case 0xc0: |
---|
207 | | - /* can't boost software-interruptions */ |
---|
208 | | - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; |
---|
209 | | - case 0xd0: |
---|
210 | | - /* can boost AA* and XLAT */ |
---|
211 | | - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); |
---|
212 | | - case 0xe0: |
---|
213 | | - /* can boost in/out and absolute jmps */ |
---|
214 | | - return ((opcode & 0x04) || opcode == 0xea); |
---|
215 | | - case 0xf0: |
---|
216 | | - /* clear and set flags are boostable */ |
---|
217 | | - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); |
---|
| 168 | + switch (opcode) { |
---|
| 169 | + case 0x62: /* bound */ |
---|
| 170 | + case 0x70 ... 0x7f: /* Conditional jumps */ |
---|
| 171 | + case 0x9a: /* Call far */ |
---|
| 172 | + case 0xc0 ... 0xc1: /* Grp2 */ |
---|
| 173 | + case 0xcc ... 0xce: /* software exceptions */ |
---|
| 174 | + case 0xd0 ... 0xd3: /* Grp2 */ |
---|
| 175 | + case 0xd6: /* (UD) */ |
---|
| 176 | + case 0xd8 ... 0xdf: /* ESC */ |
---|
| 177 | + case 0xe0 ... 0xe3: /* LOOP*, JCXZ */ |
---|
| 178 | + case 0xe8 ... 0xe9: /* near Call, JMP */ |
---|
| 179 | + case 0xeb: /* Short JMP */ |
---|
| 180 | + case 0xf0 ... 0xf4: /* LOCK/REP, HLT */ |
---|
| 181 | + case 0xf6 ... 0xf7: /* Grp3 */ |
---|
| 182 | + case 0xfe: /* Grp4 */ |
---|
| 183 | + /* ... are not boostable */ |
---|
| 184 | + return 0; |
---|
| 185 | + case 0xff: /* Grp5 */ |
---|
| 186 | + /* Only indirect jmp is boostable */ |
---|
| 187 | + return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; |
---|
218 | 188 | default: |
---|
219 | | - /* call is not boostable */ |
---|
220 | | - return opcode != 0x9a; |
---|
| 189 | + return 1; |
---|
221 | 190 | } |
---|
222 | 191 | } |
---|
223 | 192 | |
---|
.. | .. |
---|
262 | 231 | * Fortunately, we know that the original code is the ideal 5-byte |
---|
263 | 232 | * long NOP. |
---|
264 | 233 | */ |
---|
265 | | - if (probe_kernel_read(buf, (void *)addr, |
---|
| 234 | + if (copy_from_kernel_nofault(buf, (void *)addr, |
---|
266 | 235 | MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) |
---|
267 | 236 | return 0UL; |
---|
268 | 237 | |
---|
.. | .. |
---|
303 | 272 | /* Decode instructions */ |
---|
304 | 273 | addr = paddr - offset; |
---|
305 | 274 | while (addr < paddr) { |
---|
| 275 | + int ret; |
---|
| 276 | + |
---|
306 | 277 | /* |
---|
307 | 278 | * Check if the instruction has been modified by another |
---|
308 | 279 | * kprobe, in which case we replace the breakpoint by the |
---|
.. | .. |
---|
314 | 285 | __addr = recover_probed_instruction(buf, addr); |
---|
315 | 286 | if (!__addr) |
---|
316 | 287 | return 0; |
---|
317 | | - kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); |
---|
318 | | - insn_get_length(&insn); |
---|
319 | 288 | |
---|
320 | | - /* |
---|
321 | | - * Another debugging subsystem might insert this breakpoint. |
---|
322 | | - * In that case, we can't recover it. |
---|
323 | | - */ |
---|
324 | | - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
---|
| 289 | + ret = insn_decode(&insn, (void *)__addr, MAX_INSN_SIZE, INSN_MODE_KERN); |
---|
| 290 | + if (ret < 0) |
---|
325 | 291 | return 0; |
---|
| 292 | + |
---|
| 293 | +#ifdef CONFIG_KGDB |
---|
| 294 | + /* |
---|
| 295 | + * If there is a dynamically installed kgdb sw breakpoint, |
---|
| 296 | + * this function should not be probed. |
---|
| 297 | + */ |
---|
| 298 | + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && |
---|
| 299 | + kgdb_has_hit_break(addr)) |
---|
| 300 | + return 0; |
---|
| 301 | +#endif |
---|
326 | 302 | addr += insn.length; |
---|
327 | 303 | } |
---|
328 | 304 | |
---|
329 | 305 | return (addr == paddr); |
---|
330 | | -} |
---|
331 | | - |
---|
332 | | -/* |
---|
333 | | - * Returns non-zero if opcode modifies the interrupt flag. |
---|
334 | | - */ |
---|
335 | | -static int is_IF_modifier(kprobe_opcode_t *insn) |
---|
336 | | -{ |
---|
337 | | - /* Skip prefixes */ |
---|
338 | | - insn = skip_prefixes(insn); |
---|
339 | | - |
---|
340 | | - switch (*insn) { |
---|
341 | | - case 0xfa: /* cli */ |
---|
342 | | - case 0xfb: /* sti */ |
---|
343 | | - case 0xcf: /* iret/iretd */ |
---|
344 | | - case 0x9d: /* popf/popfd */ |
---|
345 | | - return 1; |
---|
346 | | - } |
---|
347 | | - |
---|
348 | | - return 0; |
---|
349 | 306 | } |
---|
350 | 307 | |
---|
351 | 308 | /* |
---|
.. | .. |
---|
358 | 315 | int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) |
---|
359 | 316 | { |
---|
360 | 317 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
---|
361 | | - unsigned long recovered_insn = |
---|
362 | | - recover_probed_instruction(buf, (unsigned long)src); |
---|
| 318 | + unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); |
---|
| 319 | + int ret; |
---|
363 | 320 | |
---|
364 | 321 | if (!recovered_insn || !insn) |
---|
365 | 322 | return 0; |
---|
366 | 323 | |
---|
367 | 324 | /* This can access kernel text if given address is not recovered */ |
---|
368 | | - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) |
---|
| 325 | + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, |
---|
| 326 | + MAX_INSN_SIZE)) |
---|
369 | 327 | return 0; |
---|
370 | 328 | |
---|
371 | | - kernel_insn_init(insn, dest, MAX_INSN_SIZE); |
---|
372 | | - insn_get_length(insn); |
---|
| 329 | + ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN); |
---|
| 330 | + if (ret < 0) |
---|
| 331 | + return 0; |
---|
| 332 | + |
---|
| 333 | + /* We can not probe force emulate prefixed instruction */ |
---|
| 334 | + if (insn_has_emulate_prefix(insn)) |
---|
| 335 | + return 0; |
---|
373 | 336 | |
---|
374 | 337 | /* Another subsystem puts a breakpoint, failed to recover */ |
---|
375 | | - if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
---|
| 338 | + if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) |
---|
376 | 339 | return 0; |
---|
377 | 340 | |
---|
378 | 341 | /* We should not singlestep on the exception masking instructions */ |
---|
.. | .. |
---|
409 | 372 | return insn->length; |
---|
410 | 373 | } |
---|
411 | 374 | |
---|
412 | | -/* Prepare reljump right after instruction to boost */ |
---|
413 | | -static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, |
---|
414 | | - struct insn *insn) |
---|
| 375 | +/* Prepare reljump or int3 right after instruction */ |
---|
| 376 | +static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, |
---|
| 377 | + struct insn *insn) |
---|
415 | 378 | { |
---|
416 | 379 | int len = insn->length; |
---|
417 | 380 | |
---|
418 | | - if (can_boost(insn, p->addr) && |
---|
419 | | - MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { |
---|
| 381 | + if (!IS_ENABLED(CONFIG_PREEMPTION) && |
---|
| 382 | + !p->post_handler && can_boost(insn, p->addr) && |
---|
| 383 | + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { |
---|
420 | 384 | /* |
---|
421 | 385 | * These instructions can be executed directly if it |
---|
422 | 386 | * jumps back to correct address. |
---|
423 | 387 | */ |
---|
424 | 388 | synthesize_reljump(buf + len, p->ainsn.insn + len, |
---|
425 | 389 | p->addr + insn->length); |
---|
426 | | - len += RELATIVEJUMP_SIZE; |
---|
427 | | - p->ainsn.boostable = true; |
---|
| 390 | + len += JMP32_INSN_SIZE; |
---|
| 391 | + p->ainsn.boostable = 1; |
---|
428 | 392 | } else { |
---|
429 | | - p->ainsn.boostable = false; |
---|
| 393 | + /* Otherwise, put an int3 for trapping singlestep */ |
---|
| 394 | + if (MAX_INSN_SIZE - len < INT3_INSN_SIZE) |
---|
| 395 | + return -ENOSPC; |
---|
| 396 | + |
---|
| 397 | + buf[len] = INT3_INSN_OPCODE; |
---|
| 398 | + len += INT3_INSN_SIZE; |
---|
430 | 399 | } |
---|
431 | 400 | |
---|
432 | 401 | return len; |
---|
.. | .. |
---|
441 | 410 | if (!page) |
---|
442 | 411 | return NULL; |
---|
443 | 412 | |
---|
| 413 | + set_vm_flush_reset_perms(page); |
---|
444 | 414 | /* |
---|
445 | 415 | * First make the page read-only, and only then make it executable to |
---|
446 | 416 | * prevent it from being W+X in between. |
---|
.. | .. |
---|
459 | 429 | /* Recover page to RW mode before releasing it */ |
---|
460 | 430 | void free_insn_page(void *page) |
---|
461 | 431 | { |
---|
462 | | - /* |
---|
463 | | - * First make the page non-executable, and only then make it writable to |
---|
464 | | - * prevent it from being W+X in between. |
---|
465 | | - */ |
---|
466 | | - set_memory_nx((unsigned long)page, 1); |
---|
467 | | - set_memory_rw((unsigned long)page, 1); |
---|
468 | 432 | module_memfree(page); |
---|
| 433 | +} |
---|
| 434 | + |
---|
| 435 | +/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ |
---|
| 436 | + |
---|
| 437 | +static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) |
---|
| 438 | +{ |
---|
| 439 | + switch (p->ainsn.opcode) { |
---|
| 440 | + case 0xfa: /* cli */ |
---|
| 441 | + regs->flags &= ~(X86_EFLAGS_IF); |
---|
| 442 | + break; |
---|
| 443 | + case 0xfb: /* sti */ |
---|
| 444 | + regs->flags |= X86_EFLAGS_IF; |
---|
| 445 | + break; |
---|
| 446 | + case 0x9c: /* pushf */ |
---|
| 447 | + int3_emulate_push(regs, regs->flags); |
---|
| 448 | + break; |
---|
| 449 | + case 0x9d: /* popf */ |
---|
| 450 | + regs->flags = int3_emulate_pop(regs); |
---|
| 451 | + break; |
---|
| 452 | + } |
---|
| 453 | + regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; |
---|
| 454 | +} |
---|
| 455 | +NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers); |
---|
| 456 | + |
---|
| 457 | +static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs) |
---|
| 458 | +{ |
---|
| 459 | + int3_emulate_ret(regs); |
---|
| 460 | +} |
---|
| 461 | +NOKPROBE_SYMBOL(kprobe_emulate_ret); |
---|
| 462 | + |
---|
| 463 | +static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) |
---|
| 464 | +{ |
---|
| 465 | + unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size; |
---|
| 466 | + |
---|
| 467 | + func += p->ainsn.rel32; |
---|
| 468 | + int3_emulate_call(regs, func); |
---|
| 469 | +} |
---|
| 470 | +NOKPROBE_SYMBOL(kprobe_emulate_call); |
---|
| 471 | + |
---|
| 472 | +static nokprobe_inline |
---|
| 473 | +void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) |
---|
| 474 | +{ |
---|
| 475 | + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; |
---|
| 476 | + |
---|
| 477 | + if (cond) |
---|
| 478 | + ip += p->ainsn.rel32; |
---|
| 479 | + int3_emulate_jmp(regs, ip); |
---|
| 480 | +} |
---|
| 481 | + |
---|
| 482 | +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) |
---|
| 483 | +{ |
---|
| 484 | + __kprobe_emulate_jmp(p, regs, true); |
---|
| 485 | +} |
---|
| 486 | +NOKPROBE_SYMBOL(kprobe_emulate_jmp); |
---|
| 487 | + |
---|
| 488 | +static const unsigned long jcc_mask[6] = { |
---|
| 489 | + [0] = X86_EFLAGS_OF, |
---|
| 490 | + [1] = X86_EFLAGS_CF, |
---|
| 491 | + [2] = X86_EFLAGS_ZF, |
---|
| 492 | + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, |
---|
| 493 | + [4] = X86_EFLAGS_SF, |
---|
| 494 | + [5] = X86_EFLAGS_PF, |
---|
| 495 | +}; |
---|
| 496 | + |
---|
| 497 | +static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) |
---|
| 498 | +{ |
---|
| 499 | + bool invert = p->ainsn.jcc.type & 1; |
---|
| 500 | + bool match; |
---|
| 501 | + |
---|
| 502 | + if (p->ainsn.jcc.type < 0xc) { |
---|
| 503 | + match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; |
---|
| 504 | + } else { |
---|
| 505 | + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ |
---|
| 506 | + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); |
---|
| 507 | + if (p->ainsn.jcc.type >= 0xe) |
---|
| 508 | + match = match || (regs->flags & X86_EFLAGS_ZF); |
---|
| 509 | + } |
---|
| 510 | + __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); |
---|
| 511 | +} |
---|
| 512 | +NOKPROBE_SYMBOL(kprobe_emulate_jcc); |
---|
| 513 | + |
---|
| 514 | +static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) |
---|
| 515 | +{ |
---|
| 516 | + bool match; |
---|
| 517 | + |
---|
| 518 | + if (p->ainsn.loop.type != 3) { /* LOOP* */ |
---|
| 519 | + if (p->ainsn.loop.asize == 32) |
---|
| 520 | + match = ((*(u32 *)®s->cx)--) != 0; |
---|
| 521 | +#ifdef CONFIG_X86_64 |
---|
| 522 | + else if (p->ainsn.loop.asize == 64) |
---|
| 523 | + match = ((*(u64 *)®s->cx)--) != 0; |
---|
| 524 | +#endif |
---|
| 525 | + else |
---|
| 526 | + match = ((*(u16 *)®s->cx)--) != 0; |
---|
| 527 | + } else { /* JCXZ */ |
---|
| 528 | + if (p->ainsn.loop.asize == 32) |
---|
| 529 | + match = *(u32 *)(®s->cx) == 0; |
---|
| 530 | +#ifdef CONFIG_X86_64 |
---|
| 531 | + else if (p->ainsn.loop.asize == 64) |
---|
| 532 | + match = *(u64 *)(®s->cx) == 0; |
---|
| 533 | +#endif |
---|
| 534 | + else |
---|
| 535 | + match = *(u16 *)(®s->cx) == 0; |
---|
| 536 | + } |
---|
| 537 | + |
---|
| 538 | + if (p->ainsn.loop.type == 0) /* LOOPNE */ |
---|
| 539 | + match = match && !(regs->flags & X86_EFLAGS_ZF); |
---|
| 540 | + else if (p->ainsn.loop.type == 1) /* LOOPE */ |
---|
| 541 | + match = match && (regs->flags & X86_EFLAGS_ZF); |
---|
| 542 | + |
---|
| 543 | + __kprobe_emulate_jmp(p, regs, match); |
---|
| 544 | +} |
---|
| 545 | +NOKPROBE_SYMBOL(kprobe_emulate_loop); |
---|
| 546 | + |
---|
| 547 | +static const int addrmode_regoffs[] = { |
---|
| 548 | + offsetof(struct pt_regs, ax), |
---|
| 549 | + offsetof(struct pt_regs, cx), |
---|
| 550 | + offsetof(struct pt_regs, dx), |
---|
| 551 | + offsetof(struct pt_regs, bx), |
---|
| 552 | + offsetof(struct pt_regs, sp), |
---|
| 553 | + offsetof(struct pt_regs, bp), |
---|
| 554 | + offsetof(struct pt_regs, si), |
---|
| 555 | + offsetof(struct pt_regs, di), |
---|
| 556 | +#ifdef CONFIG_X86_64 |
---|
| 557 | + offsetof(struct pt_regs, r8), |
---|
| 558 | + offsetof(struct pt_regs, r9), |
---|
| 559 | + offsetof(struct pt_regs, r10), |
---|
| 560 | + offsetof(struct pt_regs, r11), |
---|
| 561 | + offsetof(struct pt_regs, r12), |
---|
| 562 | + offsetof(struct pt_regs, r13), |
---|
| 563 | + offsetof(struct pt_regs, r14), |
---|
| 564 | + offsetof(struct pt_regs, r15), |
---|
| 565 | +#endif |
---|
| 566 | +}; |
---|
| 567 | + |
---|
| 568 | +static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) |
---|
| 569 | +{ |
---|
| 570 | + unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; |
---|
| 571 | + |
---|
| 572 | + int3_emulate_call(regs, regs_get_register(regs, offs)); |
---|
| 573 | +} |
---|
| 574 | +NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); |
---|
| 575 | + |
---|
| 576 | +static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs) |
---|
| 577 | +{ |
---|
| 578 | + unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; |
---|
| 579 | + |
---|
| 580 | + int3_emulate_jmp(regs, regs_get_register(regs, offs)); |
---|
| 581 | +} |
---|
| 582 | +NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect); |
---|
| 583 | + |
---|
| 584 | +static int prepare_emulation(struct kprobe *p, struct insn *insn) |
---|
| 585 | +{ |
---|
| 586 | + insn_byte_t opcode = insn->opcode.bytes[0]; |
---|
| 587 | + |
---|
| 588 | + switch (opcode) { |
---|
| 589 | + case 0xfa: /* cli */ |
---|
| 590 | + case 0xfb: /* sti */ |
---|
| 591 | + case 0x9c: /* pushfl */ |
---|
| 592 | + case 0x9d: /* popf/popfd */ |
---|
| 593 | + /* |
---|
| 594 | + * IF modifiers must be emulated since it will enable interrupt while |
---|
| 595 | + * int3 single stepping. |
---|
| 596 | + */ |
---|
| 597 | + p->ainsn.emulate_op = kprobe_emulate_ifmodifiers; |
---|
| 598 | + p->ainsn.opcode = opcode; |
---|
| 599 | + break; |
---|
| 600 | + case 0xc2: /* ret/lret */ |
---|
| 601 | + case 0xc3: |
---|
| 602 | + case 0xca: |
---|
| 603 | + case 0xcb: |
---|
| 604 | + p->ainsn.emulate_op = kprobe_emulate_ret; |
---|
| 605 | + break; |
---|
| 606 | + case 0x9a: /* far call absolute -- segment is not supported */ |
---|
| 607 | + case 0xea: /* far jmp absolute -- segment is not supported */ |
---|
| 608 | + case 0xcc: /* int3 */ |
---|
| 609 | + case 0xcf: /* iret -- in-kernel IRET is not supported */ |
---|
| 610 | + return -EOPNOTSUPP; |
---|
| 611 | + break; |
---|
| 612 | + case 0xe8: /* near call relative */ |
---|
| 613 | + p->ainsn.emulate_op = kprobe_emulate_call; |
---|
| 614 | + if (insn->immediate.nbytes == 2) |
---|
| 615 | + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; |
---|
| 616 | + else |
---|
| 617 | + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; |
---|
| 618 | + break; |
---|
| 619 | + case 0xeb: /* short jump relative */ |
---|
| 620 | + case 0xe9: /* near jump relative */ |
---|
| 621 | + p->ainsn.emulate_op = kprobe_emulate_jmp; |
---|
| 622 | + if (insn->immediate.nbytes == 1) |
---|
| 623 | + p->ainsn.rel32 = *(s8 *)&insn->immediate.value; |
---|
| 624 | + else if (insn->immediate.nbytes == 2) |
---|
| 625 | + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; |
---|
| 626 | + else |
---|
| 627 | + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; |
---|
| 628 | + break; |
---|
| 629 | + case 0x70 ... 0x7f: |
---|
| 630 | + /* 1 byte conditional jump */ |
---|
| 631 | + p->ainsn.emulate_op = kprobe_emulate_jcc; |
---|
| 632 | + p->ainsn.jcc.type = opcode & 0xf; |
---|
| 633 | + p->ainsn.rel32 = *(char *)insn->immediate.bytes; |
---|
| 634 | + break; |
---|
| 635 | + case 0x0f: |
---|
| 636 | + opcode = insn->opcode.bytes[1]; |
---|
| 637 | + if ((opcode & 0xf0) == 0x80) { |
---|
| 638 | + /* 2 bytes Conditional Jump */ |
---|
| 639 | + p->ainsn.emulate_op = kprobe_emulate_jcc; |
---|
| 640 | + p->ainsn.jcc.type = opcode & 0xf; |
---|
| 641 | + if (insn->immediate.nbytes == 2) |
---|
| 642 | + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; |
---|
| 643 | + else |
---|
| 644 | + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; |
---|
| 645 | + } else if (opcode == 0x01 && |
---|
| 646 | + X86_MODRM_REG(insn->modrm.bytes[0]) == 0 && |
---|
| 647 | + X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) { |
---|
| 648 | + /* VM extensions - not supported */ |
---|
| 649 | + return -EOPNOTSUPP; |
---|
| 650 | + } |
---|
| 651 | + break; |
---|
| 652 | + case 0xe0: /* Loop NZ */ |
---|
| 653 | + case 0xe1: /* Loop */ |
---|
| 654 | + case 0xe2: /* Loop */ |
---|
| 655 | + case 0xe3: /* J*CXZ */ |
---|
| 656 | + p->ainsn.emulate_op = kprobe_emulate_loop; |
---|
| 657 | + p->ainsn.loop.type = opcode & 0x3; |
---|
| 658 | + p->ainsn.loop.asize = insn->addr_bytes * 8; |
---|
| 659 | + p->ainsn.rel32 = *(s8 *)&insn->immediate.value; |
---|
| 660 | + break; |
---|
| 661 | + case 0xff: |
---|
| 662 | + /* |
---|
| 663 | + * Since the 0xff is an extended group opcode, the instruction |
---|
| 664 | + * is determined by the MOD/RM byte. |
---|
| 665 | + */ |
---|
| 666 | + opcode = insn->modrm.bytes[0]; |
---|
| 667 | + if ((opcode & 0x30) == 0x10) { |
---|
| 668 | + if ((opcode & 0x8) == 0x8) |
---|
| 669 | + return -EOPNOTSUPP; /* far call */ |
---|
| 670 | + /* call absolute, indirect */ |
---|
| 671 | + p->ainsn.emulate_op = kprobe_emulate_call_indirect; |
---|
| 672 | + } else if ((opcode & 0x30) == 0x20) { |
---|
| 673 | + if ((opcode & 0x8) == 0x8) |
---|
| 674 | + return -EOPNOTSUPP; /* far jmp */ |
---|
| 675 | + /* jmp near absolute indirect */ |
---|
| 676 | + p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; |
---|
| 677 | + } else |
---|
| 678 | + break; |
---|
| 679 | + |
---|
| 680 | + if (insn->addr_bytes != sizeof(unsigned long)) |
---|
| 681 | + return -EOPNOTSUPP; /* Don't support differnt size */ |
---|
| 682 | + if (X86_MODRM_MOD(opcode) != 3) |
---|
| 683 | + return -EOPNOTSUPP; /* TODO: support memory addressing */ |
---|
| 684 | + |
---|
| 685 | + p->ainsn.indirect.reg = X86_MODRM_RM(opcode); |
---|
| 686 | +#ifdef CONFIG_X86_64 |
---|
| 687 | + if (X86_REX_B(insn->rex_prefix.value)) |
---|
| 688 | + p->ainsn.indirect.reg += 8; |
---|
| 689 | +#endif |
---|
| 690 | + break; |
---|
| 691 | + default: |
---|
| 692 | + break; |
---|
| 693 | + } |
---|
| 694 | + p->ainsn.size = insn->length; |
---|
| 695 | + |
---|
| 696 | + return 0; |
---|
469 | 697 | } |
---|
470 | 698 | |
---|
471 | 699 | static int arch_copy_kprobe(struct kprobe *p) |
---|
472 | 700 | { |
---|
473 | 701 | struct insn insn; |
---|
474 | 702 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
---|
475 | | - int len; |
---|
| 703 | + int ret, len; |
---|
476 | 704 | |
---|
477 | 705 | /* Copy an instruction with recovering if other optprobe modifies it.*/ |
---|
478 | 706 | len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); |
---|
479 | 707 | if (!len) |
---|
480 | 708 | return -EINVAL; |
---|
481 | 709 | |
---|
482 | | - /* |
---|
483 | | - * __copy_instruction can modify the displacement of the instruction, |
---|
484 | | - * but it doesn't affect boostable check. |
---|
485 | | - */ |
---|
486 | | - len = prepare_boost(buf, p, &insn); |
---|
| 710 | + /* Analyze the opcode and setup emulate functions */ |
---|
| 711 | + ret = prepare_emulation(p, &insn); |
---|
| 712 | + if (ret < 0) |
---|
| 713 | + return ret; |
---|
487 | 714 | |
---|
488 | | - /* Check whether the instruction modifies Interrupt Flag or not */ |
---|
489 | | - p->ainsn.if_modifier = is_IF_modifier(buf); |
---|
| 715 | + /* Add int3 for single-step or booster jmp */ |
---|
| 716 | + len = prepare_singlestep(buf, p, &insn); |
---|
| 717 | + if (len < 0) |
---|
| 718 | + return len; |
---|
490 | 719 | |
---|
491 | 720 | /* Also, displacement change doesn't affect the first byte */ |
---|
492 | 721 | p->opcode = buf[0]; |
---|
| 722 | + |
---|
| 723 | + p->ainsn.tp_len = len; |
---|
| 724 | + perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len); |
---|
493 | 725 | |
---|
494 | 726 | /* OK, write back the instruction(s) into ROX insn buffer */ |
---|
495 | 727 | text_poke(p->ainsn.insn, buf, len); |
---|
.. | .. |
---|
506 | 738 | |
---|
507 | 739 | if (!can_probe((unsigned long)p->addr)) |
---|
508 | 740 | return -EILSEQ; |
---|
| 741 | + |
---|
| 742 | + memset(&p->ainsn, 0, sizeof(p->ainsn)); |
---|
| 743 | + |
---|
509 | 744 | /* insn: must be on special executable page on x86. */ |
---|
510 | 745 | p->ainsn.insn = get_insn_slot(); |
---|
511 | 746 | if (!p->ainsn.insn) |
---|
.. | .. |
---|
522 | 757 | |
---|
523 | 758 | void arch_arm_kprobe(struct kprobe *p) |
---|
524 | 759 | { |
---|
525 | | - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); |
---|
| 760 | + u8 int3 = INT3_INSN_OPCODE; |
---|
| 761 | + |
---|
| 762 | + text_poke(p->addr, &int3, 1); |
---|
| 763 | + text_poke_sync(); |
---|
| 764 | + perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1); |
---|
526 | 765 | } |
---|
527 | 766 | |
---|
528 | 767 | void arch_disarm_kprobe(struct kprobe *p) |
---|
529 | 768 | { |
---|
| 769 | + u8 int3 = INT3_INSN_OPCODE; |
---|
| 770 | + |
---|
| 771 | + perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1); |
---|
530 | 772 | text_poke(p->addr, &p->opcode, 1); |
---|
| 773 | + text_poke_sync(); |
---|
531 | 774 | } |
---|
532 | 775 | |
---|
533 | 776 | void arch_remove_kprobe(struct kprobe *p) |
---|
534 | 777 | { |
---|
535 | 778 | if (p->ainsn.insn) { |
---|
| 779 | + /* Record the perf event before freeing the slot */ |
---|
| 780 | + perf_event_text_poke(p->ainsn.insn, p->ainsn.insn, |
---|
| 781 | + p->ainsn.tp_len, NULL, 0); |
---|
536 | 782 | free_insn_slot(p->ainsn.insn, p->ainsn.boostable); |
---|
537 | 783 | p->ainsn.insn = NULL; |
---|
538 | 784 | } |
---|
.. | .. |
---|
562 | 808 | { |
---|
563 | 809 | __this_cpu_write(current_kprobe, p); |
---|
564 | 810 | kcb->kprobe_saved_flags = kcb->kprobe_old_flags |
---|
565 | | - = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); |
---|
566 | | - if (p->ainsn.if_modifier) |
---|
567 | | - kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; |
---|
568 | | -} |
---|
569 | | - |
---|
570 | | -static nokprobe_inline void clear_btf(void) |
---|
571 | | -{ |
---|
572 | | - if (test_thread_flag(TIF_BLOCKSTEP)) { |
---|
573 | | - unsigned long debugctl = get_debugctlmsr(); |
---|
574 | | - |
---|
575 | | - debugctl &= ~DEBUGCTLMSR_BTF; |
---|
576 | | - update_debugctlmsr(debugctl); |
---|
577 | | - } |
---|
578 | | -} |
---|
579 | | - |
---|
580 | | -static nokprobe_inline void restore_btf(void) |
---|
581 | | -{ |
---|
582 | | - if (test_thread_flag(TIF_BLOCKSTEP)) { |
---|
583 | | - unsigned long debugctl = get_debugctlmsr(); |
---|
584 | | - |
---|
585 | | - debugctl |= DEBUGCTLMSR_BTF; |
---|
586 | | - update_debugctlmsr(debugctl); |
---|
587 | | - } |
---|
| 811 | + = (regs->flags & X86_EFLAGS_IF); |
---|
588 | 812 | } |
---|
589 | 813 | |
---|
590 | 814 | void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) |
---|
.. | .. |
---|
599 | 823 | } |
---|
600 | 824 | NOKPROBE_SYMBOL(arch_prepare_kretprobe); |
---|
601 | 825 | |
---|
| 826 | +static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, |
---|
| 827 | + struct kprobe_ctlblk *kcb) |
---|
| 828 | +{ |
---|
| 829 | + /* Restore back the original saved kprobes variables and continue. */ |
---|
| 830 | + if (kcb->kprobe_status == KPROBE_REENTER) { |
---|
| 831 | + /* This will restore both kcb and current_kprobe */ |
---|
| 832 | + restore_previous_kprobe(kcb); |
---|
| 833 | + } else { |
---|
| 834 | + /* |
---|
| 835 | + * Always update the kcb status because |
---|
| 836 | + * reset_curent_kprobe() doesn't update kcb. |
---|
| 837 | + */ |
---|
| 838 | + kcb->kprobe_status = KPROBE_HIT_SSDONE; |
---|
| 839 | + if (cur->post_handler) |
---|
| 840 | + cur->post_handler(cur, regs, 0); |
---|
| 841 | + reset_current_kprobe(); |
---|
| 842 | + } |
---|
| 843 | +} |
---|
| 844 | +NOKPROBE_SYMBOL(kprobe_post_process); |
---|
| 845 | + |
---|
602 | 846 | static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
---|
603 | 847 | struct kprobe_ctlblk *kcb, int reenter) |
---|
604 | 848 | { |
---|
605 | 849 | if (setup_detour_execution(p, regs, reenter)) |
---|
606 | 850 | return; |
---|
607 | 851 | |
---|
608 | | -#if !defined(CONFIG_PREEMPT) |
---|
609 | | - if (p->ainsn.boostable && !p->post_handler) { |
---|
| 852 | +#if !defined(CONFIG_PREEMPTION) |
---|
| 853 | + if (p->ainsn.boostable) { |
---|
610 | 854 | /* Boost up -- we can execute copied instructions directly */ |
---|
611 | 855 | if (!reenter) |
---|
612 | 856 | reset_current_kprobe(); |
---|
.. | .. |
---|
625 | 869 | kcb->kprobe_status = KPROBE_REENTER; |
---|
626 | 870 | } else |
---|
627 | 871 | kcb->kprobe_status = KPROBE_HIT_SS; |
---|
628 | | - /* Prepare real single stepping */ |
---|
629 | | - clear_btf(); |
---|
630 | | - regs->flags |= X86_EFLAGS_TF; |
---|
| 872 | + |
---|
| 873 | + if (p->ainsn.emulate_op) { |
---|
| 874 | + p->ainsn.emulate_op(p, regs); |
---|
| 875 | + kprobe_post_process(p, regs, kcb); |
---|
| 876 | + return; |
---|
| 877 | + } |
---|
| 878 | + |
---|
| 879 | + /* Disable interrupt, and set ip register on trampoline */ |
---|
631 | 880 | regs->flags &= ~X86_EFLAGS_IF; |
---|
632 | | - /* single step inline if the instruction is an int3 */ |
---|
633 | | - if (p->opcode == BREAKPOINT_INSTRUCTION) |
---|
634 | | - regs->ip = (unsigned long)p->addr; |
---|
635 | | - else |
---|
636 | | - regs->ip = (unsigned long)p->ainsn.insn; |
---|
| 881 | + regs->ip = (unsigned long)p->ainsn.insn; |
---|
637 | 882 | } |
---|
638 | 883 | NOKPROBE_SYMBOL(setup_singlestep); |
---|
| 884 | + |
---|
| 885 | +/* |
---|
| 886 | + * Called after single-stepping. p->addr is the address of the |
---|
| 887 | + * instruction whose first byte has been replaced by the "int3" |
---|
| 888 | + * instruction. To avoid the SMP problems that can occur when we |
---|
| 889 | + * temporarily put back the original opcode to single-step, we |
---|
| 890 | + * single-stepped a copy of the instruction. The address of this |
---|
| 891 | + * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again |
---|
| 892 | + * right after the copied instruction. |
---|
| 893 | + * Different from the trap single-step, "int3" single-step can not |
---|
| 894 | + * handle the instruction which changes the ip register, e.g. jmp, |
---|
| 895 | + * call, conditional jmp, and the instructions which changes the IF |
---|
| 896 | + * flags because interrupt must be disabled around the single-stepping. |
---|
| 897 | + * Such instructions are software emulated, but others are single-stepped |
---|
| 898 | + * using "int3". |
---|
| 899 | + * |
---|
| 900 | + * When the 2nd "int3" handled, the regs->ip and regs->flags needs to |
---|
| 901 | + * be adjusted, so that we can resume execution on correct code. |
---|
| 902 | + */ |
---|
| 903 | +static void resume_singlestep(struct kprobe *p, struct pt_regs *regs, |
---|
| 904 | + struct kprobe_ctlblk *kcb) |
---|
| 905 | +{ |
---|
| 906 | + unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
---|
| 907 | + unsigned long orig_ip = (unsigned long)p->addr; |
---|
| 908 | + |
---|
| 909 | + /* Restore saved interrupt flag and ip register */ |
---|
| 910 | + regs->flags |= kcb->kprobe_saved_flags; |
---|
| 911 | + /* Note that regs->ip is executed int3 so must be a step back */ |
---|
| 912 | + regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE; |
---|
| 913 | +} |
---|
| 914 | +NOKPROBE_SYMBOL(resume_singlestep); |
---|
639 | 915 | |
---|
640 | 916 | /* |
---|
641 | 917 | * We have reentered the kprobe_handler(), since another probe was hit while |
---|
.. | .. |
---|
671 | 947 | return 1; |
---|
672 | 948 | } |
---|
673 | 949 | NOKPROBE_SYMBOL(reenter_kprobe); |
---|
| 950 | + |
---|
| 951 | +static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb) |
---|
| 952 | +{ |
---|
| 953 | + return (kcb->kprobe_status == KPROBE_HIT_SS || |
---|
| 954 | + kcb->kprobe_status == KPROBE_REENTER); |
---|
| 955 | +} |
---|
674 | 956 | |
---|
675 | 957 | /* |
---|
676 | 958 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
---|
.. | .. |
---|
716 | 998 | reset_current_kprobe(); |
---|
717 | 999 | return 1; |
---|
718 | 1000 | } |
---|
719 | | - } else if (*addr != BREAKPOINT_INSTRUCTION) { |
---|
| 1001 | + } else if (kprobe_is_ss(kcb)) { |
---|
| 1002 | + p = kprobe_running(); |
---|
| 1003 | + if ((unsigned long)p->ainsn.insn < regs->ip && |
---|
| 1004 | + (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) { |
---|
| 1005 | + /* Most provably this is the second int3 for singlestep */ |
---|
| 1006 | + resume_singlestep(p, regs, kcb); |
---|
| 1007 | + kprobe_post_process(p, regs, kcb); |
---|
| 1008 | + return 1; |
---|
| 1009 | + } |
---|
| 1010 | + } |
---|
| 1011 | + |
---|
| 1012 | + if (*addr != INT3_INSN_OPCODE) { |
---|
720 | 1013 | /* |
---|
721 | 1014 | * The breakpoint instruction was removed right |
---|
722 | 1015 | * after we hit it. Another cpu has removed |
---|
.. | .. |
---|
739 | 1032 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
---|
740 | 1033 | */ |
---|
741 | 1034 | asm( |
---|
| 1035 | + ".text\n" |
---|
742 | 1036 | ".global kretprobe_trampoline\n" |
---|
743 | 1037 | ".type kretprobe_trampoline, @function\n" |
---|
744 | 1038 | "kretprobe_trampoline:\n" |
---|
745 | | -#ifdef CONFIG_X86_64 |
---|
746 | 1039 | /* We don't bother saving the ss register */ |
---|
| 1040 | +#ifdef CONFIG_X86_64 |
---|
747 | 1041 | " pushq %rsp\n" |
---|
748 | 1042 | " pushfq\n" |
---|
749 | 1043 | SAVE_REGS_STRING |
---|
750 | 1044 | " movq %rsp, %rdi\n" |
---|
751 | 1045 | " call trampoline_handler\n" |
---|
752 | 1046 | /* Replace saved sp with true return address. */ |
---|
753 | | - " movq %rax, 152(%rsp)\n" |
---|
| 1047 | + " movq %rax, 19*8(%rsp)\n" |
---|
754 | 1048 | RESTORE_REGS_STRING |
---|
755 | 1049 | " popfq\n" |
---|
756 | 1050 | #else |
---|
757 | | - " pushf\n" |
---|
| 1051 | + " pushl %esp\n" |
---|
| 1052 | + " pushfl\n" |
---|
758 | 1053 | SAVE_REGS_STRING |
---|
759 | 1054 | " movl %esp, %eax\n" |
---|
760 | 1055 | " call trampoline_handler\n" |
---|
761 | | - /* Move flags to cs */ |
---|
762 | | - " movl 56(%esp), %edx\n" |
---|
763 | | - " movl %edx, 52(%esp)\n" |
---|
764 | | - /* Replace saved flags with true return address. */ |
---|
765 | | - " movl %eax, 56(%esp)\n" |
---|
| 1056 | + /* Replace saved sp with true return address. */ |
---|
| 1057 | + " movl %eax, 15*4(%esp)\n" |
---|
766 | 1058 | RESTORE_REGS_STRING |
---|
767 | | - " popf\n" |
---|
| 1059 | + " popfl\n" |
---|
768 | 1060 | #endif |
---|
769 | | - " ret\n" |
---|
| 1061 | + ASM_RET |
---|
770 | 1062 | ".size kretprobe_trampoline, .-kretprobe_trampoline\n" |
---|
771 | 1063 | ); |
---|
772 | 1064 | NOKPROBE_SYMBOL(kretprobe_trampoline); |
---|
773 | 1065 | STACK_FRAME_NON_STANDARD(kretprobe_trampoline); |
---|
774 | 1066 | |
---|
| 1067 | + |
---|
775 | 1068 | /* |
---|
776 | 1069 | * Called from kretprobe_trampoline |
---|
777 | 1070 | */ |
---|
778 | | -__visible __used void *trampoline_handler(struct pt_regs *regs) |
---|
| 1071 | +__used __visible void *trampoline_handler(struct pt_regs *regs) |
---|
779 | 1072 | { |
---|
780 | | - struct kretprobe_instance *ri = NULL; |
---|
781 | | - struct hlist_head *head, empty_rp; |
---|
782 | | - struct hlist_node *tmp; |
---|
783 | | - unsigned long flags, orig_ret_address = 0; |
---|
784 | | - unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
---|
785 | | - kprobe_opcode_t *correct_ret_addr = NULL; |
---|
786 | | - void *frame_pointer; |
---|
787 | | - bool skipped = false; |
---|
788 | | - |
---|
789 | | - /* |
---|
790 | | - * Set a dummy kprobe for avoiding kretprobe recursion. |
---|
791 | | - * Since kretprobe never run in kprobe handler, kprobe must not |
---|
792 | | - * be running at this point. |
---|
793 | | - */ |
---|
794 | | - kprobe_busy_begin(); |
---|
795 | | - |
---|
796 | | - INIT_HLIST_HEAD(&empty_rp); |
---|
797 | | - kretprobe_hash_lock(current, &head, &flags); |
---|
798 | 1073 | /* fixup registers */ |
---|
799 | | -#ifdef CONFIG_X86_64 |
---|
800 | 1074 | regs->cs = __KERNEL_CS; |
---|
801 | | - /* On x86-64, we use pt_regs->sp for return address holder. */ |
---|
802 | | - frame_pointer = ®s->sp; |
---|
803 | | -#else |
---|
804 | | - regs->cs = __KERNEL_CS | get_kernel_rpl(); |
---|
| 1075 | +#ifdef CONFIG_X86_32 |
---|
805 | 1076 | regs->gs = 0; |
---|
806 | | - /* On x86-32, we use pt_regs->flags for return address holder. */ |
---|
807 | | - frame_pointer = ®s->flags; |
---|
808 | 1077 | #endif |
---|
809 | | - regs->ip = trampoline_address; |
---|
| 1078 | + regs->ip = (unsigned long)&kretprobe_trampoline; |
---|
810 | 1079 | regs->orig_ax = ~0UL; |
---|
811 | 1080 | |
---|
812 | | - /* |
---|
813 | | - * It is possible to have multiple instances associated with a given |
---|
814 | | - * task either because multiple functions in the call path have |
---|
815 | | - * return probes installed on them, and/or more than one |
---|
816 | | - * return probe was registered for a target function. |
---|
817 | | - * |
---|
818 | | - * We can handle this because: |
---|
819 | | - * - instances are always pushed into the head of the list |
---|
820 | | - * - when multiple return probes are registered for the same |
---|
821 | | - * function, the (chronologically) first instance's ret_addr |
---|
822 | | - * will be the real return address, and all the rest will |
---|
823 | | - * point to kretprobe_trampoline. |
---|
824 | | - */ |
---|
825 | | - hlist_for_each_entry(ri, head, hlist) { |
---|
826 | | - if (ri->task != current) |
---|
827 | | - /* another task is sharing our hash bucket */ |
---|
828 | | - continue; |
---|
829 | | - /* |
---|
830 | | - * Return probes must be pushed on this hash list correct |
---|
831 | | - * order (same as return order) so that it can be poped |
---|
832 | | - * correctly. However, if we find it is pushed it incorrect |
---|
833 | | - * order, this means we find a function which should not be |
---|
834 | | - * probed, because the wrong order entry is pushed on the |
---|
835 | | - * path of processing other kretprobe itself. |
---|
836 | | - */ |
---|
837 | | - if (ri->fp != frame_pointer) { |
---|
838 | | - if (!skipped) |
---|
839 | | - pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); |
---|
840 | | - skipped = true; |
---|
841 | | - continue; |
---|
842 | | - } |
---|
843 | | - |
---|
844 | | - orig_ret_address = (unsigned long)ri->ret_addr; |
---|
845 | | - if (skipped) |
---|
846 | | - pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", |
---|
847 | | - ri->rp->kp.addr); |
---|
848 | | - |
---|
849 | | - if (orig_ret_address != trampoline_address) |
---|
850 | | - /* |
---|
851 | | - * This is the real return address. Any other |
---|
852 | | - * instances associated with this task are for |
---|
853 | | - * other calls deeper on the call stack |
---|
854 | | - */ |
---|
855 | | - break; |
---|
856 | | - } |
---|
857 | | - |
---|
858 | | - kretprobe_assert(ri, orig_ret_address, trampoline_address); |
---|
859 | | - |
---|
860 | | - correct_ret_addr = ri->ret_addr; |
---|
861 | | - hlist_for_each_entry_safe(ri, tmp, head, hlist) { |
---|
862 | | - if (ri->task != current) |
---|
863 | | - /* another task is sharing our hash bucket */ |
---|
864 | | - continue; |
---|
865 | | - if (ri->fp != frame_pointer) |
---|
866 | | - continue; |
---|
867 | | - |
---|
868 | | - orig_ret_address = (unsigned long)ri->ret_addr; |
---|
869 | | - if (ri->rp && ri->rp->handler) { |
---|
870 | | - __this_cpu_write(current_kprobe, &ri->rp->kp); |
---|
871 | | - ri->ret_addr = correct_ret_addr; |
---|
872 | | - ri->rp->handler(ri, regs); |
---|
873 | | - __this_cpu_write(current_kprobe, &kprobe_busy); |
---|
874 | | - } |
---|
875 | | - |
---|
876 | | - recycle_rp_inst(ri, &empty_rp); |
---|
877 | | - |
---|
878 | | - if (orig_ret_address != trampoline_address) |
---|
879 | | - /* |
---|
880 | | - * This is the real return address. Any other |
---|
881 | | - * instances associated with this task are for |
---|
882 | | - * other calls deeper on the call stack |
---|
883 | | - */ |
---|
884 | | - break; |
---|
885 | | - } |
---|
886 | | - |
---|
887 | | - kretprobe_hash_unlock(current, &flags); |
---|
888 | | - |
---|
889 | | - kprobe_busy_end(); |
---|
890 | | - |
---|
891 | | - hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { |
---|
892 | | - hlist_del(&ri->hlist); |
---|
893 | | - kfree(ri); |
---|
894 | | - } |
---|
895 | | - return (void *)orig_ret_address; |
---|
| 1081 | + return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, ®s->sp); |
---|
896 | 1082 | } |
---|
897 | 1083 | NOKPROBE_SYMBOL(trampoline_handler); |
---|
898 | | - |
---|
899 | | -/* |
---|
900 | | - * Called after single-stepping. p->addr is the address of the |
---|
901 | | - * instruction whose first byte has been replaced by the "int 3" |
---|
902 | | - * instruction. To avoid the SMP problems that can occur when we |
---|
903 | | - * temporarily put back the original opcode to single-step, we |
---|
904 | | - * single-stepped a copy of the instruction. The address of this |
---|
905 | | - * copy is p->ainsn.insn. |
---|
906 | | - * |
---|
907 | | - * This function prepares to return from the post-single-step |
---|
908 | | - * interrupt. We have to fix up the stack as follows: |
---|
909 | | - * |
---|
910 | | - * 0) Except in the case of absolute or indirect jump or call instructions, |
---|
911 | | - * the new ip is relative to the copied instruction. We need to make |
---|
912 | | - * it relative to the original instruction. |
---|
913 | | - * |
---|
914 | | - * 1) If the single-stepped instruction was pushfl, then the TF and IF |
---|
915 | | - * flags are set in the just-pushed flags, and may need to be cleared. |
---|
916 | | - * |
---|
917 | | - * 2) If the single-stepped instruction was a call, the return address |
---|
918 | | - * that is atop the stack is the address following the copied instruction. |
---|
919 | | - * We need to make it the address following the original instruction. |
---|
920 | | - * |
---|
921 | | - * If this is the first time we've single-stepped the instruction at |
---|
922 | | - * this probepoint, and the instruction is boostable, boost it: add a |
---|
923 | | - * jump instruction after the copied instruction, that jumps to the next |
---|
924 | | - * instruction after the probepoint. |
---|
925 | | - */ |
---|
926 | | -static void resume_execution(struct kprobe *p, struct pt_regs *regs, |
---|
927 | | - struct kprobe_ctlblk *kcb) |
---|
928 | | -{ |
---|
929 | | - unsigned long *tos = stack_addr(regs); |
---|
930 | | - unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
---|
931 | | - unsigned long orig_ip = (unsigned long)p->addr; |
---|
932 | | - kprobe_opcode_t *insn = p->ainsn.insn; |
---|
933 | | - |
---|
934 | | - /* Skip prefixes */ |
---|
935 | | - insn = skip_prefixes(insn); |
---|
936 | | - |
---|
937 | | - regs->flags &= ~X86_EFLAGS_TF; |
---|
938 | | - switch (*insn) { |
---|
939 | | - case 0x9c: /* pushfl */ |
---|
940 | | - *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); |
---|
941 | | - *tos |= kcb->kprobe_old_flags; |
---|
942 | | - break; |
---|
943 | | - case 0xc2: /* iret/ret/lret */ |
---|
944 | | - case 0xc3: |
---|
945 | | - case 0xca: |
---|
946 | | - case 0xcb: |
---|
947 | | - case 0xcf: |
---|
948 | | - case 0xea: /* jmp absolute -- ip is correct */ |
---|
949 | | - /* ip is already adjusted, no more changes required */ |
---|
950 | | - p->ainsn.boostable = true; |
---|
951 | | - goto no_change; |
---|
952 | | - case 0xe8: /* call relative - Fix return addr */ |
---|
953 | | - *tos = orig_ip + (*tos - copy_ip); |
---|
954 | | - break; |
---|
955 | | -#ifdef CONFIG_X86_32 |
---|
956 | | - case 0x9a: /* call absolute -- same as call absolute, indirect */ |
---|
957 | | - *tos = orig_ip + (*tos - copy_ip); |
---|
958 | | - goto no_change; |
---|
959 | | -#endif |
---|
960 | | - case 0xff: |
---|
961 | | - if ((insn[1] & 0x30) == 0x10) { |
---|
962 | | - /* |
---|
963 | | - * call absolute, indirect |
---|
964 | | - * Fix return addr; ip is correct. |
---|
965 | | - * But this is not boostable |
---|
966 | | - */ |
---|
967 | | - *tos = orig_ip + (*tos - copy_ip); |
---|
968 | | - goto no_change; |
---|
969 | | - } else if (((insn[1] & 0x31) == 0x20) || |
---|
970 | | - ((insn[1] & 0x31) == 0x21)) { |
---|
971 | | - /* |
---|
972 | | - * jmp near and far, absolute indirect |
---|
973 | | - * ip is correct. And this is boostable |
---|
974 | | - */ |
---|
975 | | - p->ainsn.boostable = true; |
---|
976 | | - goto no_change; |
---|
977 | | - } |
---|
978 | | - default: |
---|
979 | | - break; |
---|
980 | | - } |
---|
981 | | - |
---|
982 | | - regs->ip += orig_ip - copy_ip; |
---|
983 | | - |
---|
984 | | -no_change: |
---|
985 | | - restore_btf(); |
---|
986 | | -} |
---|
987 | | -NOKPROBE_SYMBOL(resume_execution); |
---|
988 | | - |
---|
989 | | -/* |
---|
990 | | - * Interrupts are disabled on entry as trap1 is an interrupt gate and they |
---|
991 | | - * remain disabled throughout this function. |
---|
992 | | - */ |
---|
993 | | -int kprobe_debug_handler(struct pt_regs *regs) |
---|
994 | | -{ |
---|
995 | | - struct kprobe *cur = kprobe_running(); |
---|
996 | | - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
---|
997 | | - |
---|
998 | | - if (!cur) |
---|
999 | | - return 0; |
---|
1000 | | - |
---|
1001 | | - resume_execution(cur, regs, kcb); |
---|
1002 | | - regs->flags |= kcb->kprobe_saved_flags; |
---|
1003 | | - |
---|
1004 | | - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { |
---|
1005 | | - kcb->kprobe_status = KPROBE_HIT_SSDONE; |
---|
1006 | | - cur->post_handler(cur, regs, 0); |
---|
1007 | | - } |
---|
1008 | | - |
---|
1009 | | - /* Restore back the original saved kprobes variables and continue. */ |
---|
1010 | | - if (kcb->kprobe_status == KPROBE_REENTER) { |
---|
1011 | | - restore_previous_kprobe(kcb); |
---|
1012 | | - goto out; |
---|
1013 | | - } |
---|
1014 | | - reset_current_kprobe(); |
---|
1015 | | -out: |
---|
1016 | | - /* |
---|
1017 | | - * if somebody else is singlestepping across a probe point, flags |
---|
1018 | | - * will have TF set, in which case, continue the remaining processing |
---|
1019 | | - * of do_debug, as if this is not a probe hit. |
---|
1020 | | - */ |
---|
1021 | | - if (regs->flags & X86_EFLAGS_TF) |
---|
1022 | | - return 0; |
---|
1023 | | - |
---|
1024 | | - return 1; |
---|
1025 | | -} |
---|
1026 | | -NOKPROBE_SYMBOL(kprobe_debug_handler); |
---|
1027 | 1084 | |
---|
1028 | 1085 | int kprobe_fault_handler(struct pt_regs *regs, int trapnr) |
---|
1029 | 1086 | { |
---|
.. | .. |
---|
1042 | 1099 | * normal page fault. |
---|
1043 | 1100 | */ |
---|
1044 | 1101 | regs->ip = (unsigned long)cur->addr; |
---|
1045 | | - /* |
---|
1046 | | - * Trap flag (TF) has been set here because this fault |
---|
1047 | | - * happened where the single stepping will be done. |
---|
1048 | | - * So clear it by resetting the current kprobe: |
---|
1049 | | - */ |
---|
1050 | | - regs->flags &= ~X86_EFLAGS_TF; |
---|
1051 | | - /* |
---|
1052 | | - * Since the single step (trap) has been cancelled, |
---|
1053 | | - * we need to restore BTF here. |
---|
1054 | | - */ |
---|
1055 | | - restore_btf(); |
---|
1056 | 1102 | |
---|
1057 | 1103 | /* |
---|
1058 | | - * If the TF flag was set before the kprobe hit, |
---|
| 1104 | + * If the IF flag was set before the kprobe hit, |
---|
1059 | 1105 | * don't touch it: |
---|
1060 | 1106 | */ |
---|
1061 | 1107 | regs->flags |= kcb->kprobe_old_flags; |
---|
.. | .. |
---|
1082 | 1128 | */ |
---|
1083 | 1129 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) |
---|
1084 | 1130 | return 1; |
---|
1085 | | - |
---|
1086 | | - /* |
---|
1087 | | - * In case the user-specified fault handler returned |
---|
1088 | | - * zero, try to fix up. |
---|
1089 | | - */ |
---|
1090 | | - if (fixup_exception(regs, trapnr)) |
---|
1091 | | - return 1; |
---|
1092 | | - |
---|
1093 | | - /* |
---|
1094 | | - * fixup routine could not handle it, |
---|
1095 | | - * Let do_page_fault() fix it. |
---|
1096 | | - */ |
---|
1097 | 1131 | } |
---|
1098 | 1132 | |
---|
1099 | 1133 | return 0; |
---|
1100 | 1134 | } |
---|
1101 | 1135 | NOKPROBE_SYMBOL(kprobe_fault_handler); |
---|
1102 | | - |
---|
1103 | | -/* |
---|
1104 | | - * Wrapper routine for handling exceptions. |
---|
1105 | | - */ |
---|
1106 | | -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, |
---|
1107 | | - void *data) |
---|
1108 | | -{ |
---|
1109 | | - struct die_args *args = data; |
---|
1110 | | - int ret = NOTIFY_DONE; |
---|
1111 | | - |
---|
1112 | | - if (args->regs && user_mode(args->regs)) |
---|
1113 | | - return ret; |
---|
1114 | | - |
---|
1115 | | - if (val == DIE_GPF) { |
---|
1116 | | - /* |
---|
1117 | | - * To be potentially processing a kprobe fault and to |
---|
1118 | | - * trust the result from kprobe_running(), we have |
---|
1119 | | - * be non-preemptible. |
---|
1120 | | - */ |
---|
1121 | | - if (!preemptible() && kprobe_running() && |
---|
1122 | | - kprobe_fault_handler(args->regs, args->trapnr)) |
---|
1123 | | - ret = NOTIFY_STOP; |
---|
1124 | | - } |
---|
1125 | | - return ret; |
---|
1126 | | -} |
---|
1127 | | -NOKPROBE_SYMBOL(kprobe_exceptions_notify); |
---|
1128 | | - |
---|
1129 | | -bool arch_within_kprobe_blacklist(unsigned long addr) |
---|
1130 | | -{ |
---|
1131 | | - bool is_in_entry_trampoline_section = false; |
---|
1132 | | - |
---|
1133 | | -#ifdef CONFIG_X86_64 |
---|
1134 | | - is_in_entry_trampoline_section = |
---|
1135 | | - (addr >= (unsigned long)__entry_trampoline_start && |
---|
1136 | | - addr < (unsigned long)__entry_trampoline_end); |
---|
1137 | | -#endif |
---|
1138 | | - return (addr >= (unsigned long)__kprobes_text_start && |
---|
1139 | | - addr < (unsigned long)__kprobes_text_end) || |
---|
1140 | | - (addr >= (unsigned long)__entry_text_start && |
---|
1141 | | - addr < (unsigned long)__entry_text_end) || |
---|
1142 | | - is_in_entry_trampoline_section; |
---|
1143 | | -} |
---|
1144 | 1136 | |
---|
1145 | 1137 | int __init arch_populate_kprobe_blacklist(void) |
---|
1146 | 1138 | { |
---|