hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/kernel/kprobes/core.c
....@@ -37,6 +37,7 @@
3737 #include <linux/extable.h>
3838 #include <linux/kdebug.h>
3939 #include <linux/kallsyms.h>
40
+#include <linux/kgdb.h>
4041 #include <linux/ftrace.h>
4142 #include <linux/kasan.h>
4243 #include <linux/moduleloader.h>
....@@ -133,26 +134,6 @@
133134 NOKPROBE_SYMBOL(synthesize_relcall);
134135
135136 /*
136
- * Skip the prefixes of the instruction.
137
- */
138
-static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
139
-{
140
- insn_attr_t attr;
141
-
142
- attr = inat_get_opcode_attribute((insn_byte_t)*insn);
143
- while (inat_is_legacy_prefix(attr)) {
144
- insn++;
145
- attr = inat_get_opcode_attribute((insn_byte_t)*insn);
146
- }
147
-#ifdef CONFIG_X86_64
148
- if (inat_is_rex_prefix(attr))
149
- insn++;
150
-#endif
151
- return insn;
152
-}
153
-NOKPROBE_SYMBOL(skip_prefixes);
154
-
155
-/*
156137 * Returns non-zero if INSN is boostable.
157138 * RIP relative instructions are adjusted at copying time in 64 bits mode
158139 */
....@@ -184,29 +165,28 @@
184165
185166 opcode = insn->opcode.bytes[0];
186167
187
- switch (opcode & 0xf0) {
188
- case 0x60:
189
- /* can't boost "bound" */
190
- return (opcode != 0x62);
191
- case 0x70:
192
- return 0; /* can't boost conditional jump */
193
- case 0x90:
194
- return opcode != 0x9a; /* can't boost call far */
195
- case 0xc0:
196
- /* can't boost software-interruptions */
197
- return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
198
- case 0xd0:
199
- /* can boost AA* and XLAT */
200
- return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
201
- case 0xe0:
202
- /* can boost in/out and absolute jmps */
203
- return ((opcode & 0x04) || opcode == 0xea);
204
- case 0xf0:
205
- /* clear and set flags are boostable */
206
- return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
168
+ switch (opcode) {
169
+ case 0x62: /* bound */
170
+ case 0x70 ... 0x7f: /* Conditional jumps */
171
+ case 0x9a: /* Call far */
172
+ case 0xc0 ... 0xc1: /* Grp2 */
173
+ case 0xcc ... 0xce: /* software exceptions */
174
+ case 0xd0 ... 0xd3: /* Grp2 */
175
+ case 0xd6: /* (UD) */
176
+ case 0xd8 ... 0xdf: /* ESC */
177
+ case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
178
+ case 0xe8 ... 0xe9: /* near Call, JMP */
179
+ case 0xeb: /* Short JMP */
180
+ case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
181
+ case 0xf6 ... 0xf7: /* Grp3 */
182
+ case 0xfe: /* Grp4 */
183
+ /* ... are not boostable */
184
+ return 0;
185
+ case 0xff: /* Grp5 */
186
+ /* Only indirect jmp is boostable */
187
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
207188 default:
208
- /* call is not boostable */
209
- return opcode != 0x9a;
189
+ return 1;
210190 }
211191 }
212192
....@@ -292,6 +272,8 @@
292272 /* Decode instructions */
293273 addr = paddr - offset;
294274 while (addr < paddr) {
275
+ int ret;
276
+
295277 /*
296278 * Check if the instruction has been modified by another
297279 * kprobe, in which case we replace the breakpoint by the
....@@ -303,38 +285,24 @@
303285 __addr = recover_probed_instruction(buf, addr);
304286 if (!__addr)
305287 return 0;
306
- kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
307
- insn_get_length(&insn);
308288
309
- /*
310
- * Another debugging subsystem might insert this breakpoint.
311
- * In that case, we can't recover it.
312
- */
313
- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
289
+ ret = insn_decode(&insn, (void *)__addr, MAX_INSN_SIZE, INSN_MODE_KERN);
290
+ if (ret < 0)
314291 return 0;
292
+
293
+#ifdef CONFIG_KGDB
294
+ /*
295
+ * If there is a dynamically installed kgdb sw breakpoint,
296
+ * this function should not be probed.
297
+ */
298
+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
299
+ kgdb_has_hit_break(addr))
300
+ return 0;
301
+#endif
315302 addr += insn.length;
316303 }
317304
318305 return (addr == paddr);
319
-}
320
-
321
-/*
322
- * Returns non-zero if opcode modifies the interrupt flag.
323
- */
324
-static int is_IF_modifier(kprobe_opcode_t *insn)
325
-{
326
- /* Skip prefixes */
327
- insn = skip_prefixes(insn);
328
-
329
- switch (*insn) {
330
- case 0xfa: /* cli */
331
- case 0xfb: /* sti */
332
- case 0xcf: /* iret/iretd */
333
- case 0x9d: /* popf/popfd */
334
- return 1;
335
- }
336
-
337
- return 0;
338306 }
339307
340308 /*
....@@ -347,8 +315,8 @@
347315 int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
348316 {
349317 kprobe_opcode_t buf[MAX_INSN_SIZE];
350
- unsigned long recovered_insn =
351
- recover_probed_instruction(buf, (unsigned long)src);
318
+ unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src);
319
+ int ret;
352320
353321 if (!recovered_insn || !insn)
354322 return 0;
....@@ -358,8 +326,9 @@
358326 MAX_INSN_SIZE))
359327 return 0;
360328
361
- kernel_insn_init(insn, dest, MAX_INSN_SIZE);
362
- insn_get_length(insn);
329
+ ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN);
330
+ if (ret < 0)
331
+ return 0;
363332
364333 /* We can not probe force emulate prefixed instruction */
365334 if (insn_has_emulate_prefix(insn))
....@@ -403,13 +372,14 @@
403372 return insn->length;
404373 }
405374
406
-/* Prepare reljump right after instruction to boost */
407
-static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
408
- struct insn *insn)
375
+/* Prepare reljump or int3 right after instruction */
376
+static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p,
377
+ struct insn *insn)
409378 {
410379 int len = insn->length;
411380
412
- if (can_boost(insn, p->addr) &&
381
+ if (!IS_ENABLED(CONFIG_PREEMPTION) &&
382
+ !p->post_handler && can_boost(insn, p->addr) &&
413383 MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
414384 /*
415385 * These instructions can be executed directly if it
....@@ -418,9 +388,14 @@
418388 synthesize_reljump(buf + len, p->ainsn.insn + len,
419389 p->addr + insn->length);
420390 len += JMP32_INSN_SIZE;
421
- p->ainsn.boostable = true;
391
+ p->ainsn.boostable = 1;
422392 } else {
423
- p->ainsn.boostable = false;
393
+ /* Otherwise, put an int3 for trapping singlestep */
394
+ if (MAX_INSN_SIZE - len < INT3_INSN_SIZE)
395
+ return -ENOSPC;
396
+
397
+ buf[len] = INT3_INSN_OPCODE;
398
+ len += INT3_INSN_SIZE;
424399 }
425400
426401 return len;
....@@ -457,25 +432,290 @@
457432 module_memfree(page);
458433 }
459434
435
+/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
436
+
437
+static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs)
438
+{
439
+ switch (p->ainsn.opcode) {
440
+ case 0xfa: /* cli */
441
+ regs->flags &= ~(X86_EFLAGS_IF);
442
+ break;
443
+ case 0xfb: /* sti */
444
+ regs->flags |= X86_EFLAGS_IF;
445
+ break;
446
+ case 0x9c: /* pushf */
447
+ int3_emulate_push(regs, regs->flags);
448
+ break;
449
+ case 0x9d: /* popf */
450
+ regs->flags = int3_emulate_pop(regs);
451
+ break;
452
+ }
453
+ regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
454
+}
455
+NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers);
456
+
457
+static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs)
458
+{
459
+ int3_emulate_ret(regs);
460
+}
461
+NOKPROBE_SYMBOL(kprobe_emulate_ret);
462
+
463
+static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
464
+{
465
+ unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
466
+
467
+ func += p->ainsn.rel32;
468
+ int3_emulate_call(regs, func);
469
+}
470
+NOKPROBE_SYMBOL(kprobe_emulate_call);
471
+
472
+static nokprobe_inline
473
+void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
474
+{
475
+ unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
476
+
477
+ if (cond)
478
+ ip += p->ainsn.rel32;
479
+ int3_emulate_jmp(regs, ip);
480
+}
481
+
482
+static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
483
+{
484
+ __kprobe_emulate_jmp(p, regs, true);
485
+}
486
+NOKPROBE_SYMBOL(kprobe_emulate_jmp);
487
+
488
+static const unsigned long jcc_mask[6] = {
489
+ [0] = X86_EFLAGS_OF,
490
+ [1] = X86_EFLAGS_CF,
491
+ [2] = X86_EFLAGS_ZF,
492
+ [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
493
+ [4] = X86_EFLAGS_SF,
494
+ [5] = X86_EFLAGS_PF,
495
+};
496
+
497
+static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
498
+{
499
+ bool invert = p->ainsn.jcc.type & 1;
500
+ bool match;
501
+
502
+ if (p->ainsn.jcc.type < 0xc) {
503
+ match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
504
+ } else {
505
+ match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
506
+ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
507
+ if (p->ainsn.jcc.type >= 0xe)
508
+ match = match || (regs->flags & X86_EFLAGS_ZF);
509
+ }
510
+ __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
511
+}
512
+NOKPROBE_SYMBOL(kprobe_emulate_jcc);
513
+
514
+static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
515
+{
516
+ bool match;
517
+
518
+ if (p->ainsn.loop.type != 3) { /* LOOP* */
519
+ if (p->ainsn.loop.asize == 32)
520
+ match = ((*(u32 *)&regs->cx)--) != 0;
521
+#ifdef CONFIG_X86_64
522
+ else if (p->ainsn.loop.asize == 64)
523
+ match = ((*(u64 *)&regs->cx)--) != 0;
524
+#endif
525
+ else
526
+ match = ((*(u16 *)&regs->cx)--) != 0;
527
+ } else { /* JCXZ */
528
+ if (p->ainsn.loop.asize == 32)
529
+ match = *(u32 *)(&regs->cx) == 0;
530
+#ifdef CONFIG_X86_64
531
+ else if (p->ainsn.loop.asize == 64)
532
+ match = *(u64 *)(&regs->cx) == 0;
533
+#endif
534
+ else
535
+ match = *(u16 *)(&regs->cx) == 0;
536
+ }
537
+
538
+ if (p->ainsn.loop.type == 0) /* LOOPNE */
539
+ match = match && !(regs->flags & X86_EFLAGS_ZF);
540
+ else if (p->ainsn.loop.type == 1) /* LOOPE */
541
+ match = match && (regs->flags & X86_EFLAGS_ZF);
542
+
543
+ __kprobe_emulate_jmp(p, regs, match);
544
+}
545
+NOKPROBE_SYMBOL(kprobe_emulate_loop);
546
+
547
+static const int addrmode_regoffs[] = {
548
+ offsetof(struct pt_regs, ax),
549
+ offsetof(struct pt_regs, cx),
550
+ offsetof(struct pt_regs, dx),
551
+ offsetof(struct pt_regs, bx),
552
+ offsetof(struct pt_regs, sp),
553
+ offsetof(struct pt_regs, bp),
554
+ offsetof(struct pt_regs, si),
555
+ offsetof(struct pt_regs, di),
556
+#ifdef CONFIG_X86_64
557
+ offsetof(struct pt_regs, r8),
558
+ offsetof(struct pt_regs, r9),
559
+ offsetof(struct pt_regs, r10),
560
+ offsetof(struct pt_regs, r11),
561
+ offsetof(struct pt_regs, r12),
562
+ offsetof(struct pt_regs, r13),
563
+ offsetof(struct pt_regs, r14),
564
+ offsetof(struct pt_regs, r15),
565
+#endif
566
+};
567
+
568
+static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
569
+{
570
+ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
571
+
572
+ int3_emulate_call(regs, regs_get_register(regs, offs));
573
+}
574
+NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
575
+
576
+static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs)
577
+{
578
+ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
579
+
580
+ int3_emulate_jmp(regs, regs_get_register(regs, offs));
581
+}
582
+NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect);
583
+
584
+static int prepare_emulation(struct kprobe *p, struct insn *insn)
585
+{
586
+ insn_byte_t opcode = insn->opcode.bytes[0];
587
+
588
+ switch (opcode) {
589
+ case 0xfa: /* cli */
590
+ case 0xfb: /* sti */
591
+ case 0x9c: /* pushfl */
592
+ case 0x9d: /* popf/popfd */
593
+ /*
594
+ * IF modifiers must be emulated since it will enable interrupt while
595
+ * int3 single stepping.
596
+ */
597
+ p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
598
+ p->ainsn.opcode = opcode;
599
+ break;
600
+ case 0xc2: /* ret/lret */
601
+ case 0xc3:
602
+ case 0xca:
603
+ case 0xcb:
604
+ p->ainsn.emulate_op = kprobe_emulate_ret;
605
+ break;
606
+ case 0x9a: /* far call absolute -- segment is not supported */
607
+ case 0xea: /* far jmp absolute -- segment is not supported */
608
+ case 0xcc: /* int3 */
609
+ case 0xcf: /* iret -- in-kernel IRET is not supported */
610
+ return -EOPNOTSUPP;
611
+ break;
612
+ case 0xe8: /* near call relative */
613
+ p->ainsn.emulate_op = kprobe_emulate_call;
614
+ if (insn->immediate.nbytes == 2)
615
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
616
+ else
617
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
618
+ break;
619
+ case 0xeb: /* short jump relative */
620
+ case 0xe9: /* near jump relative */
621
+ p->ainsn.emulate_op = kprobe_emulate_jmp;
622
+ if (insn->immediate.nbytes == 1)
623
+ p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
624
+ else if (insn->immediate.nbytes == 2)
625
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
626
+ else
627
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
628
+ break;
629
+ case 0x70 ... 0x7f:
630
+ /* 1 byte conditional jump */
631
+ p->ainsn.emulate_op = kprobe_emulate_jcc;
632
+ p->ainsn.jcc.type = opcode & 0xf;
633
+ p->ainsn.rel32 = *(char *)insn->immediate.bytes;
634
+ break;
635
+ case 0x0f:
636
+ opcode = insn->opcode.bytes[1];
637
+ if ((opcode & 0xf0) == 0x80) {
638
+ /* 2 bytes Conditional Jump */
639
+ p->ainsn.emulate_op = kprobe_emulate_jcc;
640
+ p->ainsn.jcc.type = opcode & 0xf;
641
+ if (insn->immediate.nbytes == 2)
642
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
643
+ else
644
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
645
+ } else if (opcode == 0x01 &&
646
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
647
+ X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) {
648
+ /* VM extensions - not supported */
649
+ return -EOPNOTSUPP;
650
+ }
651
+ break;
652
+ case 0xe0: /* Loop NZ */
653
+ case 0xe1: /* Loop */
654
+ case 0xe2: /* Loop */
655
+ case 0xe3: /* J*CXZ */
656
+ p->ainsn.emulate_op = kprobe_emulate_loop;
657
+ p->ainsn.loop.type = opcode & 0x3;
658
+ p->ainsn.loop.asize = insn->addr_bytes * 8;
659
+ p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
660
+ break;
661
+ case 0xff:
662
+ /*
663
+ * Since the 0xff is an extended group opcode, the instruction
664
+ * is determined by the MOD/RM byte.
665
+ */
666
+ opcode = insn->modrm.bytes[0];
667
+ if ((opcode & 0x30) == 0x10) {
668
+ if ((opcode & 0x8) == 0x8)
669
+ return -EOPNOTSUPP; /* far call */
670
+ /* call absolute, indirect */
671
+ p->ainsn.emulate_op = kprobe_emulate_call_indirect;
672
+ } else if ((opcode & 0x30) == 0x20) {
673
+ if ((opcode & 0x8) == 0x8)
674
+ return -EOPNOTSUPP; /* far jmp */
675
+ /* jmp near absolute indirect */
676
+ p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
677
+ } else
678
+ break;
679
+
680
+ if (insn->addr_bytes != sizeof(unsigned long))
681
+ return -EOPNOTSUPP; /* Don't support differnt size */
682
+ if (X86_MODRM_MOD(opcode) != 3)
683
+ return -EOPNOTSUPP; /* TODO: support memory addressing */
684
+
685
+ p->ainsn.indirect.reg = X86_MODRM_RM(opcode);
686
+#ifdef CONFIG_X86_64
687
+ if (X86_REX_B(insn->rex_prefix.value))
688
+ p->ainsn.indirect.reg += 8;
689
+#endif
690
+ break;
691
+ default:
692
+ break;
693
+ }
694
+ p->ainsn.size = insn->length;
695
+
696
+ return 0;
697
+}
698
+
460699 static int arch_copy_kprobe(struct kprobe *p)
461700 {
462701 struct insn insn;
463702 kprobe_opcode_t buf[MAX_INSN_SIZE];
464
- int len;
703
+ int ret, len;
465704
466705 /* Copy an instruction with recovering if other optprobe modifies it.*/
467706 len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
468707 if (!len)
469708 return -EINVAL;
470709
471
- /*
472
- * __copy_instruction can modify the displacement of the instruction,
473
- * but it doesn't affect boostable check.
474
- */
475
- len = prepare_boost(buf, p, &insn);
710
+ /* Analyze the opcode and setup emulate functions */
711
+ ret = prepare_emulation(p, &insn);
712
+ if (ret < 0)
713
+ return ret;
476714
477
- /* Check whether the instruction modifies Interrupt Flag or not */
478
- p->ainsn.if_modifier = is_IF_modifier(buf);
715
+ /* Add int3 for single-step or booster jmp */
716
+ len = prepare_singlestep(buf, p, &insn);
717
+ if (len < 0)
718
+ return len;
479719
480720 /* Also, displacement change doesn't affect the first byte */
481721 p->opcode = buf[0];
....@@ -498,6 +738,9 @@
498738
499739 if (!can_probe((unsigned long)p->addr))
500740 return -EILSEQ;
741
+
742
+ memset(&p->ainsn, 0, sizeof(p->ainsn));
743
+
501744 /* insn: must be on special executable page on x86. */
502745 p->ainsn.insn = get_insn_slot();
503746 if (!p->ainsn.insn)
....@@ -565,29 +808,7 @@
565808 {
566809 __this_cpu_write(current_kprobe, p);
567810 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
568
- = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
569
- if (p->ainsn.if_modifier)
570
- kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
571
-}
572
-
573
-static nokprobe_inline void clear_btf(void)
574
-{
575
- if (test_thread_flag(TIF_BLOCKSTEP)) {
576
- unsigned long debugctl = get_debugctlmsr();
577
-
578
- debugctl &= ~DEBUGCTLMSR_BTF;
579
- update_debugctlmsr(debugctl);
580
- }
581
-}
582
-
583
-static nokprobe_inline void restore_btf(void)
584
-{
585
- if (test_thread_flag(TIF_BLOCKSTEP)) {
586
- unsigned long debugctl = get_debugctlmsr();
587
-
588
- debugctl |= DEBUGCTLMSR_BTF;
589
- update_debugctlmsr(debugctl);
590
- }
811
+ = (regs->flags & X86_EFLAGS_IF);
591812 }
592813
593814 void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
....@@ -602,6 +823,26 @@
602823 }
603824 NOKPROBE_SYMBOL(arch_prepare_kretprobe);
604825
826
+static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
827
+ struct kprobe_ctlblk *kcb)
828
+{
829
+ /* Restore back the original saved kprobes variables and continue. */
830
+ if (kcb->kprobe_status == KPROBE_REENTER) {
831
+ /* This will restore both kcb and current_kprobe */
832
+ restore_previous_kprobe(kcb);
833
+ } else {
834
+ /*
835
+ * Always update the kcb status because
836
+ * reset_curent_kprobe() doesn't update kcb.
837
+ */
838
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
839
+ if (cur->post_handler)
840
+ cur->post_handler(cur, regs, 0);
841
+ reset_current_kprobe();
842
+ }
843
+}
844
+NOKPROBE_SYMBOL(kprobe_post_process);
845
+
605846 static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
606847 struct kprobe_ctlblk *kcb, int reenter)
607848 {
....@@ -609,7 +850,7 @@
609850 return;
610851
611852 #if !defined(CONFIG_PREEMPTION)
612
- if (p->ainsn.boostable && !p->post_handler) {
853
+ if (p->ainsn.boostable) {
613854 /* Boost up -- we can execute copied instructions directly */
614855 if (!reenter)
615856 reset_current_kprobe();
....@@ -628,17 +869,49 @@
628869 kcb->kprobe_status = KPROBE_REENTER;
629870 } else
630871 kcb->kprobe_status = KPROBE_HIT_SS;
631
- /* Prepare real single stepping */
632
- clear_btf();
633
- regs->flags |= X86_EFLAGS_TF;
872
+
873
+ if (p->ainsn.emulate_op) {
874
+ p->ainsn.emulate_op(p, regs);
875
+ kprobe_post_process(p, regs, kcb);
876
+ return;
877
+ }
878
+
879
+ /* Disable interrupt, and set ip register on trampoline */
634880 regs->flags &= ~X86_EFLAGS_IF;
635
- /* single step inline if the instruction is an int3 */
636
- if (p->opcode == INT3_INSN_OPCODE)
637
- regs->ip = (unsigned long)p->addr;
638
- else
639
- regs->ip = (unsigned long)p->ainsn.insn;
881
+ regs->ip = (unsigned long)p->ainsn.insn;
640882 }
641883 NOKPROBE_SYMBOL(setup_singlestep);
884
+
885
+/*
886
+ * Called after single-stepping. p->addr is the address of the
887
+ * instruction whose first byte has been replaced by the "int3"
888
+ * instruction. To avoid the SMP problems that can occur when we
889
+ * temporarily put back the original opcode to single-step, we
890
+ * single-stepped a copy of the instruction. The address of this
891
+ * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again
892
+ * right after the copied instruction.
893
+ * Different from the trap single-step, "int3" single-step can not
894
+ * handle the instruction which changes the ip register, e.g. jmp,
895
+ * call, conditional jmp, and the instructions which changes the IF
896
+ * flags because interrupt must be disabled around the single-stepping.
897
+ * Such instructions are software emulated, but others are single-stepped
898
+ * using "int3".
899
+ *
900
+ * When the 2nd "int3" handled, the regs->ip and regs->flags needs to
901
+ * be adjusted, so that we can resume execution on correct code.
902
+ */
903
+static void resume_singlestep(struct kprobe *p, struct pt_regs *regs,
904
+ struct kprobe_ctlblk *kcb)
905
+{
906
+ unsigned long copy_ip = (unsigned long)p->ainsn.insn;
907
+ unsigned long orig_ip = (unsigned long)p->addr;
908
+
909
+ /* Restore saved interrupt flag and ip register */
910
+ regs->flags |= kcb->kprobe_saved_flags;
911
+ /* Note that regs->ip is executed int3 so must be a step back */
912
+ regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
913
+}
914
+NOKPROBE_SYMBOL(resume_singlestep);
642915
643916 /*
644917 * We have reentered the kprobe_handler(), since another probe was hit while
....@@ -674,6 +947,12 @@
674947 return 1;
675948 }
676949 NOKPROBE_SYMBOL(reenter_kprobe);
950
+
951
+static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb)
952
+{
953
+ return (kcb->kprobe_status == KPROBE_HIT_SS ||
954
+ kcb->kprobe_status == KPROBE_REENTER);
955
+}
677956
678957 /*
679958 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
....@@ -719,7 +998,18 @@
719998 reset_current_kprobe();
720999 return 1;
7211000 }
722
- } else if (*addr != INT3_INSN_OPCODE) {
1001
+ } else if (kprobe_is_ss(kcb)) {
1002
+ p = kprobe_running();
1003
+ if ((unsigned long)p->ainsn.insn < regs->ip &&
1004
+ (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) {
1005
+ /* Most provably this is the second int3 for singlestep */
1006
+ resume_singlestep(p, regs, kcb);
1007
+ kprobe_post_process(p, regs, kcb);
1008
+ return 1;
1009
+ }
1010
+ }
1011
+
1012
+ if (*addr != INT3_INSN_OPCODE) {
7231013 /*
7241014 * The breakpoint instruction was removed right
7251015 * after we hit it. Another cpu has removed
....@@ -792,135 +1082,6 @@
7921082 }
7931083 NOKPROBE_SYMBOL(trampoline_handler);
7941084
795
-/*
796
- * Called after single-stepping. p->addr is the address of the
797
- * instruction whose first byte has been replaced by the "int 3"
798
- * instruction. To avoid the SMP problems that can occur when we
799
- * temporarily put back the original opcode to single-step, we
800
- * single-stepped a copy of the instruction. The address of this
801
- * copy is p->ainsn.insn.
802
- *
803
- * This function prepares to return from the post-single-step
804
- * interrupt. We have to fix up the stack as follows:
805
- *
806
- * 0) Except in the case of absolute or indirect jump or call instructions,
807
- * the new ip is relative to the copied instruction. We need to make
808
- * it relative to the original instruction.
809
- *
810
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
811
- * flags are set in the just-pushed flags, and may need to be cleared.
812
- *
813
- * 2) If the single-stepped instruction was a call, the return address
814
- * that is atop the stack is the address following the copied instruction.
815
- * We need to make it the address following the original instruction.
816
- *
817
- * If this is the first time we've single-stepped the instruction at
818
- * this probepoint, and the instruction is boostable, boost it: add a
819
- * jump instruction after the copied instruction, that jumps to the next
820
- * instruction after the probepoint.
821
- */
822
-static void resume_execution(struct kprobe *p, struct pt_regs *regs,
823
- struct kprobe_ctlblk *kcb)
824
-{
825
- unsigned long *tos = stack_addr(regs);
826
- unsigned long copy_ip = (unsigned long)p->ainsn.insn;
827
- unsigned long orig_ip = (unsigned long)p->addr;
828
- kprobe_opcode_t *insn = p->ainsn.insn;
829
-
830
- /* Skip prefixes */
831
- insn = skip_prefixes(insn);
832
-
833
- regs->flags &= ~X86_EFLAGS_TF;
834
- switch (*insn) {
835
- case 0x9c: /* pushfl */
836
- *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
837
- *tos |= kcb->kprobe_old_flags;
838
- break;
839
- case 0xc2: /* iret/ret/lret */
840
- case 0xc3:
841
- case 0xca:
842
- case 0xcb:
843
- case 0xcf:
844
- case 0xea: /* jmp absolute -- ip is correct */
845
- /* ip is already adjusted, no more changes required */
846
- p->ainsn.boostable = true;
847
- goto no_change;
848
- case 0xe8: /* call relative - Fix return addr */
849
- *tos = orig_ip + (*tos - copy_ip);
850
- break;
851
-#ifdef CONFIG_X86_32
852
- case 0x9a: /* call absolute -- same as call absolute, indirect */
853
- *tos = orig_ip + (*tos - copy_ip);
854
- goto no_change;
855
-#endif
856
- case 0xff:
857
- if ((insn[1] & 0x30) == 0x10) {
858
- /*
859
- * call absolute, indirect
860
- * Fix return addr; ip is correct.
861
- * But this is not boostable
862
- */
863
- *tos = orig_ip + (*tos - copy_ip);
864
- goto no_change;
865
- } else if (((insn[1] & 0x31) == 0x20) ||
866
- ((insn[1] & 0x31) == 0x21)) {
867
- /*
868
- * jmp near and far, absolute indirect
869
- * ip is correct. And this is boostable
870
- */
871
- p->ainsn.boostable = true;
872
- goto no_change;
873
- }
874
- default:
875
- break;
876
- }
877
-
878
- regs->ip += orig_ip - copy_ip;
879
-
880
-no_change:
881
- restore_btf();
882
-}
883
-NOKPROBE_SYMBOL(resume_execution);
884
-
885
-/*
886
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
887
- * remain disabled throughout this function.
888
- */
889
-int kprobe_debug_handler(struct pt_regs *regs)
890
-{
891
- struct kprobe *cur = kprobe_running();
892
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
893
-
894
- if (!cur)
895
- return 0;
896
-
897
- resume_execution(cur, regs, kcb);
898
- regs->flags |= kcb->kprobe_saved_flags;
899
-
900
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
901
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
902
- cur->post_handler(cur, regs, 0);
903
- }
904
-
905
- /* Restore back the original saved kprobes variables and continue. */
906
- if (kcb->kprobe_status == KPROBE_REENTER) {
907
- restore_previous_kprobe(kcb);
908
- goto out;
909
- }
910
- reset_current_kprobe();
911
-out:
912
- /*
913
- * if somebody else is singlestepping across a probe point, flags
914
- * will have TF set, in which case, continue the remaining processing
915
- * of do_debug, as if this is not a probe hit.
916
- */
917
- if (regs->flags & X86_EFLAGS_TF)
918
- return 0;
919
-
920
- return 1;
921
-}
922
-NOKPROBE_SYMBOL(kprobe_debug_handler);
923
-
9241085 int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
9251086 {
9261087 struct kprobe *cur = kprobe_running();
....@@ -938,20 +1099,9 @@
9381099 * normal page fault.
9391100 */
9401101 regs->ip = (unsigned long)cur->addr;
941
- /*
942
- * Trap flag (TF) has been set here because this fault
943
- * happened where the single stepping will be done.
944
- * So clear it by resetting the current kprobe:
945
- */
946
- regs->flags &= ~X86_EFLAGS_TF;
947
- /*
948
- * Since the single step (trap) has been cancelled,
949
- * we need to restore BTF here.
950
- */
951
- restore_btf();
9521102
9531103 /*
954
- * If the TF flag was set before the kprobe hit,
1104
+ * If the IF flag was set before the kprobe hit,
9551105 * don't touch it:
9561106 */
9571107 regs->flags |= kcb->kprobe_old_flags;