hc
2023-12-09 958e46acc8e900e8569dd467c1af9b8d2d019394
kernel/fs/coredump.c
....@@ -7,6 +7,7 @@
77 #include <linux/stat.h>
88 #include <linux/fcntl.h>
99 #include <linux/swap.h>
10
+#include <linux/ctype.h>
1011 #include <linux/string.h>
1112 #include <linux/init.h>
1213 #include <linux/pagemap.h>
....@@ -40,6 +41,7 @@
4041 #include <linux/fs.h>
4142 #include <linux/path.h>
4243 #include <linux/timekeeping.h>
44
+#include <linux/elf.h>
4345
4446 #include <linux/uaccess.h>
4547 #include <asm/mmu_context.h>
....@@ -152,10 +154,10 @@
152154 return ret;
153155 }
154156
155
-static int cn_print_exe_file(struct core_name *cn)
157
+static int cn_print_exe_file(struct core_name *cn, bool name_only)
156158 {
157159 struct file *exe_file;
158
- char *pathbuf, *path;
160
+ char *pathbuf, *path, *ptr;
159161 int ret;
160162
161163 exe_file = get_mm_exe_file(current->mm);
....@@ -174,6 +176,11 @@
174176 goto free_buf;
175177 }
176178
179
+ if (name_only) {
180
+ ptr = strrchr(path, '/');
181
+ if (ptr)
182
+ path = ptr + 1;
183
+ }
177184 ret = cn_esc_printf(cn, "%s", path);
178185
179186 free_buf:
....@@ -187,11 +194,13 @@
187194 * name into corename, which must have space for at least
188195 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
189196 */
190
-static int format_corename(struct core_name *cn, struct coredump_params *cprm)
197
+static int format_corename(struct core_name *cn, struct coredump_params *cprm,
198
+ size_t **argv, int *argc)
191199 {
192200 const struct cred *cred = current_cred();
193201 const char *pat_ptr = core_pattern;
194202 int ispipe = (*pat_ptr == '|');
203
+ bool was_space = false;
195204 int pid_in_pattern = 0;
196205 int err = 0;
197206
....@@ -201,12 +210,38 @@
201210 return -ENOMEM;
202211 cn->corename[0] = '\0';
203212
204
- if (ispipe)
213
+ if (ispipe) {
214
+ int argvs = sizeof(core_pattern) / 2;
215
+ (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
216
+ if (!(*argv))
217
+ return -ENOMEM;
218
+ (*argv)[(*argc)++] = 0;
205219 ++pat_ptr;
220
+ if (!(*pat_ptr))
221
+ return -ENOMEM;
222
+ }
206223
207224 /* Repeat as long as we have more pattern to process and more output
208225 space */
209226 while (*pat_ptr) {
227
+ /*
228
+ * Split on spaces before doing template expansion so that
229
+ * %e and %E don't get split if they have spaces in them
230
+ */
231
+ if (ispipe) {
232
+ if (isspace(*pat_ptr)) {
233
+ if (cn->used != 0)
234
+ was_space = true;
235
+ pat_ptr++;
236
+ continue;
237
+ } else if (was_space) {
238
+ was_space = false;
239
+ err = cn_printf(cn, "%c", '\0');
240
+ if (err)
241
+ return err;
242
+ (*argv)[(*argc)++] = cn->used;
243
+ }
244
+ }
210245 if (*pat_ptr != '%') {
211246 err = cn_printf(cn, "%c", *pat_ptr++);
212247 } else {
....@@ -273,12 +308,16 @@
273308 utsname()->nodename);
274309 up_read(&uts_sem);
275310 break;
276
- /* executable */
311
+ /* executable, could be changed by prctl PR_SET_NAME etc */
277312 case 'e':
278313 err = cn_esc_printf(cn, "%s", current->comm);
279314 break;
315
+ /* file name of executable */
316
+ case 'f':
317
+ err = cn_print_exe_file(cn, true);
318
+ break;
280319 case 'E':
281
- err = cn_print_exe_file(cn);
320
+ err = cn_print_exe_file(cn, false);
282321 break;
283322 /* core limit size */
284323 case 'c':
....@@ -365,7 +404,7 @@
365404 * of ->siglock provides a memory barrier.
366405 *
367406 * do_exit:
368
- * The caller holds mm->mmap_sem. This means that the task which
407
+ * The caller holds mm->mmap_lock. This means that the task which
369408 * uses this mm can't pass exit_mm(), so it can't exit or clear
370409 * its ->mm.
371410 *
....@@ -373,7 +412,7 @@
373412 * It does list_replace_rcu(&leader->tasks, &current->tasks),
374413 * we must see either old or new leader, this does not matter.
375414 * However, it can change p->sighand, so lock_task_sighand(p)
376
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
415
+ * must be used. Since p->mm != NULL and we hold ->mmap_lock
377416 * it can't fail.
378417 *
379418 * Note also that "g" can be the old leader with ->mm == NULL
....@@ -417,12 +456,12 @@
417456 core_state->dumper.task = tsk;
418457 core_state->dumper.next = NULL;
419458
420
- if (down_write_killable(&mm->mmap_sem))
459
+ if (mmap_write_lock_killable(mm))
421460 return -EINTR;
422461
423462 if (!mm->core_state)
424463 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
425
- up_write(&mm->mmap_sem);
464
+ mmap_write_unlock(mm);
426465
427466 if (core_waiters > 0) {
428467 struct core_thread *ptr;
....@@ -481,7 +520,7 @@
481520 * but then we need to teach dump_write() to restart and clear
482521 * TIF_SIGPENDING.
483522 */
484
- return signal_pending(current);
523
+ return fatal_signal_pending(current) || freezing(current);
485524 }
486525
487526 static void wait_for_dump_helpers(struct file *file)
....@@ -491,7 +530,7 @@
491530 pipe_lock(pipe);
492531 pipe->readers++;
493532 pipe->writers--;
494
- wake_up_interruptible_sync(&pipe->wait);
533
+ wake_up_interruptible_sync(&pipe->rd_wait);
495534 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
496535 pipe_unlock(pipe);
497536
....@@ -499,7 +538,7 @@
499538 * We actually want wait_event_freezable() but then we need
500539 * to clear TIF_SIGPENDING and improve dump_interrupted().
501540 */
502
- wait_event_interruptible(pipe->wait, pipe->readers == 1);
541
+ wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
503542
504543 pipe_lock(pipe);
505544 pipe->readers--;
....@@ -536,7 +575,7 @@
536575 return err;
537576 }
538577
539
-void do_coredump(const siginfo_t *siginfo)
578
+void do_coredump(const kernel_siginfo_t *siginfo)
540579 {
541580 struct core_state core_state;
542581 struct core_name cn;
....@@ -546,6 +585,8 @@
546585 struct cred *cred;
547586 int retval = 0;
548587 int ispipe;
588
+ size_t *argv = NULL;
589
+ int argc = 0;
549590 struct files_struct *displaced;
550591 /* require nonrelative corefile path and be extra careful */
551592 bool need_suid_safe = false;
....@@ -592,9 +633,10 @@
592633
593634 old_cred = override_creds(cred);
594635
595
- ispipe = format_corename(&cn, &cprm);
636
+ ispipe = format_corename(&cn, &cprm, &argv, &argc);
596637
597638 if (ispipe) {
639
+ int argi;
598640 int dump_count;
599641 char **helper_argv;
600642 struct subprocess_info *sub_info;
....@@ -637,12 +679,16 @@
637679 goto fail_dropcount;
638680 }
639681
640
- helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
682
+ helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
683
+ GFP_KERNEL);
641684 if (!helper_argv) {
642685 printk(KERN_WARNING "%s failed to allocate memory\n",
643686 __func__);
644687 goto fail_dropcount;
645688 }
689
+ for (argi = 0; argi < argc; argi++)
690
+ helper_argv[argi] = cn.corename + argv[argi];
691
+ helper_argv[argi] = NULL;
646692
647693 retval = -ENOMEM;
648694 sub_info = call_usermodehelper_setup(helper_argv[0],
....@@ -652,7 +698,7 @@
652698 retval = call_usermodehelper_exec(sub_info,
653699 UMH_WAIT_EXEC);
654700
655
- argv_free(helper_argv);
701
+ kfree(helper_argv);
656702 if (retval) {
657703 printk(KERN_INFO "Core dump to |%s pipe failed\n",
658704 cn.corename);
....@@ -742,7 +788,7 @@
742788 goto close_fail;
743789 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
744790 goto close_fail;
745
- if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file))
791
+ if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
746792 goto close_fail;
747793 }
748794
....@@ -774,6 +820,7 @@
774820 if (ispipe)
775821 atomic_dec(&core_dump_count);
776822 fail_unlock:
823
+ kfree(argv);
777824 kfree(cn.corename);
778825 coredump_finish(mm, core_dumped);
779826 revert_creds(old_cred);
....@@ -795,17 +842,17 @@
795842 ssize_t n;
796843 if (cprm->written + nr > cprm->limit)
797844 return 0;
798
- while (nr) {
799
- if (dump_interrupted())
800
- return 0;
801
- n = __kernel_write(file, addr, nr, &pos);
802
- if (n <= 0)
803
- return 0;
804
- file->f_pos = pos;
805
- cprm->written += n;
806
- cprm->pos += n;
807
- nr -= n;
808
- }
845
+
846
+
847
+ if (dump_interrupted())
848
+ return 0;
849
+ n = __kernel_write(file, addr, nr, &pos);
850
+ if (n != nr)
851
+ return 0;
852
+ file->f_pos = pos;
853
+ cprm->written += n;
854
+ cprm->pos += n;
855
+
809856 return 1;
810857 }
811858 EXPORT_SYMBOL(dump_emit);
....@@ -830,6 +877,40 @@
830877 }
831878 }
832879 EXPORT_SYMBOL(dump_skip);
880
+
881
+#ifdef CONFIG_ELF_CORE
882
+int dump_user_range(struct coredump_params *cprm, unsigned long start,
883
+ unsigned long len)
884
+{
885
+ unsigned long addr;
886
+
887
+ for (addr = start; addr < start + len; addr += PAGE_SIZE) {
888
+ struct page *page;
889
+ int stop;
890
+
891
+ /*
892
+ * To avoid having to allocate page tables for virtual address
893
+ * ranges that have never been used yet, and also to make it
894
+ * easy to generate sparse core files, use a helper that returns
895
+ * NULL when encountering an empty page table entry that would
896
+ * otherwise have been filled with the zero page.
897
+ */
898
+ page = get_dump_page(addr);
899
+ if (page) {
900
+ void *kaddr = kmap(page);
901
+
902
+ stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
903
+ kunmap(page);
904
+ put_user_page(page);
905
+ } else {
906
+ stop = !dump_skip(cprm, PAGE_SIZE);
907
+ }
908
+ if (stop)
909
+ return 0;
910
+ }
911
+ return 1;
912
+}
913
+#endif
833914
834915 int dump_align(struct coredump_params *cprm, int align)
835916 {
....@@ -857,3 +938,213 @@
857938 }
858939 }
859940 EXPORT_SYMBOL(dump_truncate);
941
+
942
+/*
943
+ * The purpose of always_dump_vma() is to make sure that special kernel mappings
944
+ * that are useful for post-mortem analysis are included in every core dump.
945
+ * In that way we ensure that the core dump is fully interpretable later
946
+ * without matching up the same kernel and hardware config to see what PC values
947
+ * meant. These special mappings include - vDSO, vsyscall, and other
948
+ * architecture specific mappings
949
+ */
950
+static bool always_dump_vma(struct vm_area_struct *vma)
951
+{
952
+ /* Any vsyscall mappings? */
953
+ if (vma == get_gate_vma(vma->vm_mm))
954
+ return true;
955
+
956
+ /*
957
+ * Assume that all vmas with a .name op should always be dumped.
958
+ * If this changes, a new vm_ops field can easily be added.
959
+ */
960
+ if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
961
+ return true;
962
+
963
+ /*
964
+ * arch_vma_name() returns non-NULL for special architecture mappings,
965
+ * such as vDSO sections.
966
+ */
967
+ if (arch_vma_name(vma))
968
+ return true;
969
+
970
+ return false;
971
+}
972
+
973
+#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
974
+
975
+/*
976
+ * Decide how much of @vma's contents should be included in a core dump.
977
+ */
978
+static unsigned long vma_dump_size(struct vm_area_struct *vma,
979
+ unsigned long mm_flags)
980
+{
981
+#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
982
+
983
+ /* always dump the vdso and vsyscall sections */
984
+ if (always_dump_vma(vma))
985
+ goto whole;
986
+
987
+ if (vma->vm_flags & VM_DONTDUMP)
988
+ return 0;
989
+
990
+ /* support for DAX */
991
+ if (vma_is_dax(vma)) {
992
+ if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
993
+ goto whole;
994
+ if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
995
+ goto whole;
996
+ return 0;
997
+ }
998
+
999
+ /* Hugetlb memory check */
1000
+ if (is_vm_hugetlb_page(vma)) {
1001
+ if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1002
+ goto whole;
1003
+ if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1004
+ goto whole;
1005
+ return 0;
1006
+ }
1007
+
1008
+ /* Do not dump I/O mapped devices or special mappings */
1009
+ if (vma->vm_flags & VM_IO)
1010
+ return 0;
1011
+
1012
+ /* By default, dump shared memory if mapped from an anonymous file. */
1013
+ if (vma->vm_flags & VM_SHARED) {
1014
+ if (file_inode(vma->vm_file)->i_nlink == 0 ?
1015
+ FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1016
+ goto whole;
1017
+ return 0;
1018
+ }
1019
+
1020
+ /* Dump segments that have been written to. */
1021
+ if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
1022
+ goto whole;
1023
+ if (vma->vm_file == NULL)
1024
+ return 0;
1025
+
1026
+ if (FILTER(MAPPED_PRIVATE))
1027
+ goto whole;
1028
+
1029
+ /*
1030
+ * If this is the beginning of an executable file mapping,
1031
+ * dump the first page to aid in determining what was mapped here.
1032
+ */
1033
+ if (FILTER(ELF_HEADERS) &&
1034
+ vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1035
+ if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
1036
+ return PAGE_SIZE;
1037
+
1038
+ /*
1039
+ * ELF libraries aren't always executable.
1040
+ * We'll want to check whether the mapping starts with the ELF
1041
+ * magic, but not now - we're holding the mmap lock,
1042
+ * so copy_from_user() doesn't work here.
1043
+ * Use a placeholder instead, and fix it up later in
1044
+ * dump_vma_snapshot().
1045
+ */
1046
+ return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
1047
+ }
1048
+
1049
+#undef FILTER
1050
+
1051
+ return 0;
1052
+
1053
+whole:
1054
+ return vma->vm_end - vma->vm_start;
1055
+}
1056
+
1057
+static struct vm_area_struct *first_vma(struct task_struct *tsk,
1058
+ struct vm_area_struct *gate_vma)
1059
+{
1060
+ struct vm_area_struct *ret = tsk->mm->mmap;
1061
+
1062
+ if (ret)
1063
+ return ret;
1064
+ return gate_vma;
1065
+}
1066
+
1067
+/*
1068
+ * Helper function for iterating across a vma list. It ensures that the caller
1069
+ * will visit `gate_vma' prior to terminating the search.
1070
+ */
1071
+static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1072
+ struct vm_area_struct *gate_vma)
1073
+{
1074
+ struct vm_area_struct *ret;
1075
+
1076
+ ret = this_vma->vm_next;
1077
+ if (ret)
1078
+ return ret;
1079
+ if (this_vma == gate_vma)
1080
+ return NULL;
1081
+ return gate_vma;
1082
+}
1083
+
1084
+/*
1085
+ * Under the mmap_lock, take a snapshot of relevant information about the task's
1086
+ * VMAs.
1087
+ */
1088
+int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
1089
+ struct core_vma_metadata **vma_meta,
1090
+ size_t *vma_data_size_ptr)
1091
+{
1092
+ struct vm_area_struct *vma, *gate_vma;
1093
+ struct mm_struct *mm = current->mm;
1094
+ int i;
1095
+ size_t vma_data_size = 0;
1096
+
1097
+ /*
1098
+ * Once the stack expansion code is fixed to not change VMA bounds
1099
+ * under mmap_lock in read mode, this can be changed to take the
1100
+ * mmap_lock in read mode.
1101
+ */
1102
+ if (mmap_write_lock_killable(mm))
1103
+ return -EINTR;
1104
+
1105
+ gate_vma = get_gate_vma(mm);
1106
+ *vma_count = mm->map_count + (gate_vma ? 1 : 0);
1107
+
1108
+ *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
1109
+ if (!*vma_meta) {
1110
+ mmap_write_unlock(mm);
1111
+ return -ENOMEM;
1112
+ }
1113
+
1114
+ for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
1115
+ vma = next_vma(vma, gate_vma), i++) {
1116
+ struct core_vma_metadata *m = (*vma_meta) + i;
1117
+
1118
+ m->start = vma->vm_start;
1119
+ m->end = vma->vm_end;
1120
+ m->flags = vma->vm_flags;
1121
+ m->dump_size = vma_dump_size(vma, cprm->mm_flags);
1122
+ }
1123
+
1124
+ mmap_write_unlock(mm);
1125
+
1126
+ if (WARN_ON(i != *vma_count)) {
1127
+ kvfree(*vma_meta);
1128
+ return -EFAULT;
1129
+ }
1130
+
1131
+ for (i = 0; i < *vma_count; i++) {
1132
+ struct core_vma_metadata *m = (*vma_meta) + i;
1133
+
1134
+ if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
1135
+ char elfmag[SELFMAG];
1136
+
1137
+ if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) ||
1138
+ memcmp(elfmag, ELFMAG, SELFMAG) != 0) {
1139
+ m->dump_size = 0;
1140
+ } else {
1141
+ m->dump_size = PAGE_SIZE;
1142
+ }
1143
+ }
1144
+
1145
+ vma_data_size += m->dump_size;
1146
+ }
1147
+
1148
+ *vma_data_size_ptr = vma_data_size;
1149
+ return 0;
1150
+}