From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/coredump.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 320 insertions(+), 29 deletions(-) diff --git a/kernel/fs/coredump.c b/kernel/fs/coredump.c index 6250eaf..7c5edad 100644 --- a/kernel/fs/coredump.c +++ b/kernel/fs/coredump.c @@ -7,6 +7,7 @@ #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> +#include <linux/ctype.h> #include <linux/string.h> #include <linux/init.h> #include <linux/pagemap.h> @@ -40,6 +41,7 @@ #include <linux/fs.h> #include <linux/path.h> #include <linux/timekeeping.h> +#include <linux/elf.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -152,10 +154,10 @@ return ret; } -static int cn_print_exe_file(struct core_name *cn) +static int cn_print_exe_file(struct core_name *cn, bool name_only) { struct file *exe_file; - char *pathbuf, *path; + char *pathbuf, *path, *ptr; int ret; exe_file = get_mm_exe_file(current->mm); @@ -174,6 +176,11 @@ goto free_buf; } + if (name_only) { + ptr = strrchr(path, '/'); + if (ptr) + path = ptr + 1; + } ret = cn_esc_printf(cn, "%s", path); free_buf: @@ -187,11 +194,13 @@ * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, struct coredump_params *cprm) +static int format_corename(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); + bool was_space = false; int pid_in_pattern = 0; int err = 0; @@ -201,12 +210,38 @@ return -ENOMEM; cn->corename[0] = '\0'; - if (ispipe) + if (ispipe) { + int argvs = sizeof(core_pattern) / 2; + (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); + if (!(*argv)) + return -ENOMEM; + (*argv)[(*argc)++] = 0; ++pat_ptr; + if (!(*pat_ptr)) + return -ENOMEM; + } /* Repeat as long as we have more pattern to process and more output space */ while (*pat_ptr) { + /* + * Split on spaces before doing template expansion so that + * %e and %E don't get split if they have spaces in them + */ + if (ispipe) { + if (isspace(*pat_ptr)) { + if (cn->used != 0) + was_space = true; + pat_ptr++; + continue; + } else if (was_space) { + was_space = false; + err = cn_printf(cn, "%c", '\0'); + if (err) + return err; + (*argv)[(*argc)++] = cn->used; + } + } if (*pat_ptr != '%') { err = cn_printf(cn, "%c", *pat_ptr++); } else { @@ -273,12 +308,16 @@ utsname()->nodename); up_read(&uts_sem); break; - /* executable */ + /* executable, could be changed by prctl PR_SET_NAME etc */ case 'e': err = cn_esc_printf(cn, "%s", current->comm); break; + /* file name of executable */ + case 'f': + err = cn_print_exe_file(cn, true); + break; case 'E': - err = cn_print_exe_file(cn); + err = cn_print_exe_file(cn, false); break; /* core limit size */ case 'c': @@ -365,7 +404,7 @@ * of ->siglock provides a memory barrier. * * do_exit: - * The caller holds mm->mmap_sem. This means that the task which + * The caller holds mm->mmap_lock. This means that the task which * uses this mm can't pass exit_mm(), so it can't exit or clear * its ->mm. * @@ -373,7 +412,7 @@ * It does list_replace_rcu(&leader->tasks, ¤t->tasks), * we must see either old or new leader, this does not matter. * However, it can change p->sighand, so lock_task_sighand(p) - * must be used. Since p->mm != NULL and we hold ->mmap_sem + * must be used. Since p->mm != NULL and we hold ->mmap_lock * it can't fail. * * Note also that "g" can be the old leader with ->mm == NULL @@ -417,12 +456,12 @@ core_state->dumper.task = tsk; core_state->dumper.next = NULL; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; if (!mm->core_state) core_waiters = zap_threads(tsk, mm, core_state, exit_code); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); if (core_waiters > 0) { struct core_thread *ptr; @@ -481,7 +520,7 @@ * but then we need to teach dump_write() to restart and clear * TIF_SIGPENDING. */ - return signal_pending(current); + return fatal_signal_pending(current) || freezing(current); } static void wait_for_dump_helpers(struct file *file) @@ -491,7 +530,7 @@ pipe_lock(pipe); pipe->readers++; pipe->writers--; - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); pipe_unlock(pipe); @@ -499,7 +538,7 @@ * We actually want wait_event_freezable() but then we need * to clear TIF_SIGPENDING and improve dump_interrupted(). */ - wait_event_interruptible(pipe->wait, pipe->readers == 1); + wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); pipe_lock(pipe); pipe->readers--; @@ -536,7 +575,7 @@ return err; } -void do_coredump(const siginfo_t *siginfo) +void do_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; @@ -546,6 +585,8 @@ struct cred *cred; int retval = 0; int ispipe; + size_t *argv = NULL; + int argc = 0; struct files_struct *displaced; /* require nonrelative corefile path and be extra careful */ bool need_suid_safe = false; @@ -592,9 +633,10 @@ old_cred = override_creds(cred); - ispipe = format_corename(&cn, &cprm); + ispipe = format_corename(&cn, &cprm, &argv, &argc); if (ispipe) { + int argi; int dump_count; char **helper_argv; struct subprocess_info *sub_info; @@ -637,12 +679,16 @@ goto fail_dropcount; } - helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), + GFP_KERNEL); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); goto fail_dropcount; } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn.corename + argv[argi]; + helper_argv[argi] = NULL; retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], @@ -652,7 +698,7 @@ retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); - argv_free(helper_argv); + kfree(helper_argv); if (retval) { printk(KERN_INFO "Core dump to |%s pipe failed\n", cn.corename); @@ -742,7 +788,7 @@ goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file)) + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; } @@ -774,6 +820,7 @@ if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + kfree(argv); kfree(cn.corename); coredump_finish(mm, core_dumped); revert_creds(old_cred); @@ -795,17 +842,17 @@ ssize_t n; if (cprm->written + nr > cprm->limit) return 0; - while (nr) { - if (dump_interrupted()) - return 0; - n = __kernel_write(file, addr, nr, &pos); - if (n <= 0) - return 0; - file->f_pos = pos; - cprm->written += n; - cprm->pos += n; - nr -= n; - } + + + if (dump_interrupted()) + return 0; + n = __kernel_write(file, addr, nr, &pos); + if (n != nr) + return 0; + file->f_pos = pos; + cprm->written += n; + cprm->pos += n; + return 1; } EXPORT_SYMBOL(dump_emit); @@ -830,6 +877,40 @@ } } EXPORT_SYMBOL(dump_skip); + +#ifdef CONFIG_ELF_CORE +int dump_user_range(struct coredump_params *cprm, unsigned long start, + unsigned long len) +{ + unsigned long addr; + + for (addr = start; addr < start + len; addr += PAGE_SIZE) { + struct page *page; + int stop; + + /* + * To avoid having to allocate page tables for virtual address + * ranges that have never been used yet, and also to make it + * easy to generate sparse core files, use a helper that returns + * NULL when encountering an empty page table entry that would + * otherwise have been filled with the zero page. + */ + page = get_dump_page(addr); + if (page) { + void *kaddr = kmap(page); + + stop = !dump_emit(cprm, kaddr, PAGE_SIZE); + kunmap(page); + put_user_page(page); + } else { + stop = !dump_skip(cprm, PAGE_SIZE); + } + if (stop) + return 0; + } + return 1; +} +#endif int dump_align(struct coredump_params *cprm, int align) { @@ -857,3 +938,213 @@ } } EXPORT_SYMBOL(dump_truncate); + +/* + * The purpose of always_dump_vma() is to make sure that special kernel mappings + * that are useful for post-mortem analysis are included in every core dump. + * In that way we ensure that the core dump is fully interpretable later + * without matching up the same kernel and hardware config to see what PC values + * meant. These special mappings include - vDSO, vsyscall, and other + * architecture specific mappings + */ +static bool always_dump_vma(struct vm_area_struct *vma) +{ + /* Any vsyscall mappings? */ + if (vma == get_gate_vma(vma->vm_mm)) + return true; + + /* + * Assume that all vmas with a .name op should always be dumped. + * If this changes, a new vm_ops field can easily be added. + */ + if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) + return true; + + /* + * arch_vma_name() returns non-NULL for special architecture mappings, + * such as vDSO sections. + */ + if (arch_vma_name(vma)) + return true; + + return false; +} + +#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 + +/* + * Decide how much of @vma's contents should be included in a core dump. + */ +static unsigned long vma_dump_size(struct vm_area_struct *vma, + unsigned long mm_flags) +{ +#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) + + /* always dump the vdso and vsyscall sections */ + if (always_dump_vma(vma)) + goto whole; + + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + /* support for DAX */ + if (vma_is_dax(vma)) { + if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) + goto whole; + if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) + goto whole; + return 0; + } + + /* Hugetlb memory check */ + if (is_vm_hugetlb_page(vma)) { + if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) + goto whole; + if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) + goto whole; + return 0; + } + + /* Do not dump I/O mapped devices or special mappings */ + if (vma->vm_flags & VM_IO) + return 0; + + /* By default, dump shared memory if mapped from an anonymous file. */ + if (vma->vm_flags & VM_SHARED) { + if (file_inode(vma->vm_file)->i_nlink == 0 ? + FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) + goto whole; + return 0; + } + + /* Dump segments that have been written to. */ + if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE)) + goto whole; + if (vma->vm_file == NULL) + return 0; + + if (FILTER(MAPPED_PRIVATE)) + goto whole; + + /* + * If this is the beginning of an executable file mapping, + * dump the first page to aid in determining what was mapped here. + */ + if (FILTER(ELF_HEADERS) && + vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { + if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) + return PAGE_SIZE; + + /* + * ELF libraries aren't always executable. + * We'll want to check whether the mapping starts with the ELF + * magic, but not now - we're holding the mmap lock, + * so copy_from_user() doesn't work here. + * Use a placeholder instead, and fix it up later in + * dump_vma_snapshot(). + */ + return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; + } + +#undef FILTER + + return 0; + +whole: + return vma->vm_end - vma->vm_start; +} + +static struct vm_area_struct *first_vma(struct task_struct *tsk, + struct vm_area_struct *gate_vma) +{ + struct vm_area_struct *ret = tsk->mm->mmap; + + if (ret) + return ret; + return gate_vma; +} + +/* + * Helper function for iterating across a vma list. It ensures that the caller + * will visit `gate_vma' prior to terminating the search. + */ +static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, + struct vm_area_struct *gate_vma) +{ + struct vm_area_struct *ret; + + ret = this_vma->vm_next; + if (ret) + return ret; + if (this_vma == gate_vma) + return NULL; + return gate_vma; +} + +/* + * Under the mmap_lock, take a snapshot of relevant information about the task's + * VMAs. + */ +int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, + struct core_vma_metadata **vma_meta, + size_t *vma_data_size_ptr) +{ + struct vm_area_struct *vma, *gate_vma; + struct mm_struct *mm = current->mm; + int i; + size_t vma_data_size = 0; + + /* + * Once the stack expansion code is fixed to not change VMA bounds + * under mmap_lock in read mode, this can be changed to take the + * mmap_lock in read mode. + */ + if (mmap_write_lock_killable(mm)) + return -EINTR; + + gate_vma = get_gate_vma(mm); + *vma_count = mm->map_count + (gate_vma ? 1 : 0); + + *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL); + if (!*vma_meta) { + mmap_write_unlock(mm); + return -ENOMEM; + } + + for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma), i++) { + struct core_vma_metadata *m = (*vma_meta) + i; + + m->start = vma->vm_start; + m->end = vma->vm_end; + m->flags = vma->vm_flags; + m->dump_size = vma_dump_size(vma, cprm->mm_flags); + } + + mmap_write_unlock(mm); + + if (WARN_ON(i != *vma_count)) { + kvfree(*vma_meta); + return -EFAULT; + } + + for (i = 0; i < *vma_count; i++) { + struct core_vma_metadata *m = (*vma_meta) + i; + + if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { + char elfmag[SELFMAG]; + + if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || + memcmp(elfmag, ELFMAG, SELFMAG) != 0) { + m->dump_size = 0; + } else { + m->dump_size = PAGE_SIZE; + } + } + + vma_data_size += m->dump_size; + } + + *vma_data_size_ptr = vma_data_size; + return 0; +} -- Gitblit v1.6.2