From 7fa4d2b5ce9d335d1de2b597ee82131ad079b309 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Fri, 24 Mar 2023 19:50:24 +0100 Subject: [PATCH 01/27] riscv: add VisionFive 2 cpu defconfig Signed-off-by: Daniel Maslowski --- arch/riscv/configs/vf2-cpu-defconfig | 233 +++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 arch/riscv/configs/vf2-cpu-defconfig diff --git a/arch/riscv/configs/vf2-cpu-defconfig b/arch/riscv/configs/vf2-cpu-defconfig new file mode 100644 index 0000000000000..fcfac931a2076 --- /dev/null +++ b/arch/riscv/configs/vf2-cpu-defconfig @@ -0,0 +1,233 @@ +CONFIG_DEFAULT_HOSTNAME="(cyrevolt)" +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_UNPRIV_DEFAULT_OFF=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="init.cpio.xz" +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_EXPERT=y +# CONFIG_SYSFS_SYSCALL is not set +CONFIG_PROFILING=y +CONFIG_SOC_MICROCHIP_POLARFIRE=y +CONFIG_SOC_STARFIVE=y +CONFIG_SOC_STARFIVE_JH7110=y +CONFIG_SOC_VIRT=y +CONFIG_SMP=y +CONFIG_KEXEC=y +# CONFIG_EFI is not set +CONFIG_CPU_IDLE=y +CONFIG_RISCV_SBI_CPUIDLE=y +CONFIG_JUMP_LABEL=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_INET_ESP=m +# CONFIG_IPV6 is not set +CONFIG_NETLINK_DIAG=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_CGROUP_NET_CLASSID=y +# CONFIG_WIRELESS is not set +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCIE_XILINX=y +CONFIG_PCIE_PLDA=y +CONFIG_PCIE_FU740=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FW_LOADER is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_NVME=y +CONFIG_NVME_MULTIPATH=y +CONFIG_NVME_HWMON=y +CONFIG_NVME_FC=y +CONFIG_NVME_TARGET=y +CONFIG_NVME_TARGET_PASSTHRU=y +CONFIG_NVME_TARGET_LOOP=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +CONFIG_MACB=y +# CONFIG_NET_VENDOR_CAVIUM is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_GOOGLE is not set +# CONFIG_NET_VENDOR_HUAWEI is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_LITEX is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MICROCHIP is not set +# CONFIG_NET_VENDOR_MICROSEMI is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETERION is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_PACKET_ENGINES is not set +# CONFIG_NET_VENDOR_PENSANDO is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set +CONFIG_STMMAC_ETH=y +CONFIG_DWMAC_STARFIVE_PLAT=y +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set +CONFIG_MICROSEMI_PHY=y +CONFIG_MOTORCOMM_PHY=y +# CONFIG_WLAN is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HW_RANDOM_STARFIVE=y +CONFIG_SPI=y +CONFIG_SPI_PL022_STARFIVE=y +CONFIG_SPI_SIFIVE=y +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_PINCTRL=y +CONFIG_PINCTRL_STARFIVE=y +CONFIG_PINCTRL_STARFIVE_JH7110=y +CONFIG_GPIO_SIFIVE=y +CONFIG_WATCHDOG=y +CONFIG_STARFIVE_WATCHDOG=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +# CONFIG_VGA_CONSOLE is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_STARFIVE=y +CONFIG_DMADEVICES=y +# CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set +CONFIG_MAILBOX=y +CONFIG_STARFIVE_MBOX=y +CONFIG_STARFIVE_MBOX_TEST=y +CONFIG_SIFIVE_L2_FLUSH_START=0x40000000 +CONFIG_SIFIVE_L2_FLUSH_SIZE=0x400000000 +CONFIG_STARFIVE_PMU=y +CONFIG_PWM=y +CONFIG_PWM_STARFIVE_PTC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_CRYPTO_AUTHENC=y +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_LIBCRC32C=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_MISC is not set +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_VM_PGFLAGS=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DEBUG_TIMEKEEPING=y +CONFIG_DEBUG_RWSEMS=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_STACKTRACE=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PLIST=y +CONFIG_DEBUG_SG=y +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_EQS_DEBUG=y +# CONFIG_FTRACE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_MEMTEST=y From 0cbcebc2e3cc5d216f7458aa15f118599e1499b9 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Sun, 26 Mar 2023 19:04:36 +0200 Subject: [PATCH 02/27] drivers/pci/controller/pcie-plda: rework a bit Signed-off-by: Daniel Maslowski --- drivers/pci/controller/pcie-plda.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-plda.c b/drivers/pci/controller/pcie-plda.c index d94afd33697c6..e1d6421c4ddd4 100644 --- a/drivers/pci/controller/pcie-plda.c +++ b/drivers/pci/controller/pcie-plda.c @@ -348,6 +348,7 @@ static void plda_pcie_isr(struct irq_desc *desc) chained_irq_enter(chip, desc); pcie = irq_desc_get_handler_data(desc); + // msleep(100); status = plda_readl(pcie, ISTATUS_LOCAL); while ((status = (plda_readl(pcie, ISTATUS_LOCAL) & INT_MASK))) { if (status & INT_INTX_MASK) @@ -860,6 +861,10 @@ static void plda_pcie_hw_init(struct plda_pcie *pcie) } } +/* Parameters for the waiting for PCIe PHY PLL to lock */ +#define PHY_PLL_LOCK_WAIT_USLEEP_MAX 10 * 1000 +#define PHY_PLL_LOCK_WAIT_TIMEOUT (40 * PHY_PLL_LOCK_WAIT_USLEEP_MAX) + static int plda_pcie_is_link_up(struct plda_pcie *pcie) { struct device *dev = &pcie->pdev->dev; @@ -871,7 +876,8 @@ static int plda_pcie_is_link_up(struct plda_pcie *pcie) pcie->stg_lnksta, stg_reg_val, stg_reg_val & PLDA_DATA_LINK_ACTIVE, - 10 * 1000, 100 * 1000); + PHY_PLL_LOCK_WAIT_USLEEP_MAX, + PHY_PLL_LOCK_WAIT_TIMEOUT); /* If the link is down (no device in slot), then exit. */ if (ret == -ETIMEDOUT) { From 518c5356bea0f0aeca9131abd4035f4405440898 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Sun, 26 Mar 2023 19:05:30 +0200 Subject: [PATCH 03/27] riscv: WIP: format JH7110 DSTI Signed-off-by: Daniel Maslowski --- arch/riscv/boot/dts/starfive/jh7110.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index b6a6386b95510..ccf64f13fc6e9 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -1662,7 +1662,7 @@ status = "disabled"; }; - mipi_dsi: mipi@295d0000 { + mipi_dsi: mipi@295d0000 { compatible = "starfive,jh7110-mipi_dsi","cdns,dsi"; reg = <0x0 0x295d0000 0x0 0x10000>; interrupts = <98>; From 0520dff7c3284bc48ee397b94ffd2732ff9d9376 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Sun, 26 Mar 2023 19:06:04 +0200 Subject: [PATCH 04/27] riscv: WIP: kexec file/ELF + purgatory Signed-off-by: Daniel Maslowski --- arch/riscv/Kconfig | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6fe5d24bcf30e..68b5b1027c000 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -405,6 +405,25 @@ config KEXEC The name comes from the similarity to the exec system call. +config KEXEC_FILE + bool "kexec file based systmem call" + depends on 64BIT && MMU + select HAVE_IMA_KEXEC if IMA + select KEXEC_CORE + select KEXEC_ELF + help + This is new version of kexec system call. This system call is + file based and takes file descriptors as system call argument + for kernel and initramfs as opposed to list of segments as + accepted by previous system call. + + If you don't know what to do here, say Y. + +config ARCH_HAS_KEXEC_PURGATORY + def_bool KEXEC_FILE + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + config CRASH_DUMP bool "Build kdump crash kernel" help From 4fce84761d9cb99d912ef3d3ec227e55c98c3da9 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Fri, 8 Apr 2022 18:09:11 +0800 Subject: [PATCH 05/27] RISC-V: Add kexec_file support This patch adds support for kexec_file on RISC-V. I tested it on riscv64 QEMU with busybear-linux and single core along with the OpenSBI firmware fw_jump.bin for generic platform. On SMP system, it depends on CONFIG_{HOTPLUG_CPU, RISCV_SBI} to resume/stop hart through OpenSBI firmware, it also needs a OpenSBI that support the HSM extension. Signed-off-by: Liao Chang Signed-off-by: Li Zhengyu Link: https://lore.kernel.org/r/20220408100914.150110-4-lizhengyu3@huawei.com [Palmer: Make 64-bit only] Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 4 + arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/elf_kexec.c | 180 +++++++++++++++++++++++++ arch/riscv/kernel/machine_kexec_file.c | 14 ++ 4 files changed, 199 insertions(+) create mode 100644 arch/riscv/kernel/elf_kexec.c create mode 100644 arch/riscv/kernel/machine_kexec_file.c diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index e4e291d40759f..206217b23301f 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -53,4 +53,8 @@ typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, extern riscv_kexec_method riscv_kexec_norelocate; +#ifdef CONFIG_KEXEC_FILE +extern const struct kexec_file_ops elf_kexec_ops; +#endif + #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 9163091d2b006..834d3deec49e6 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -61,6 +61,7 @@ endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c new file mode 100644 index 0000000000000..2d442a8498714 --- /dev/null +++ b/arch/riscv/kernel/elf_kexec.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include +#include +#include +#include +#include +#include + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + unsigned long initrd_pbase = 0UL; + void *fdt; + struct elfhdr ehdr; + struct kexec_buf kbuf; + struct kexec_elf_info elf_info; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + pr_notice("The entry point of kernel at 0x%lx\n", image->start); + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + kbuf.image = image; + kbuf.buf_min = new_kernel_pbase + kernel_len; + kbuf.buf_max = ULONG_MAX; + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c new file mode 100644 index 0000000000000..b0bf8c1722c0c --- /dev/null +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kexec_file for riscv, use vmlinux as the dump-capture kernel image. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + */ +#include + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &elf_kexec_ops, + NULL +}; From fb4a8bc3a0f9b18ee2742b91b1978778e9a12cb6 Mon Sep 17 00:00:00 2001 From: Li Zhengyu Date: Fri, 8 Apr 2022 18:09:12 +0800 Subject: [PATCH 06/27] RISC-V: Support for kexec_file on panic This patch adds support for loading a kexec on panic (kdump) kernel. It has been tested with vmcore-dmesg on riscv64 QEMU on both an smp and a non-smp system. Signed-off-by: Li Zhengyu Link: https://lore.kernel.org/r/20220408100914.150110-5-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 119 +++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 2d442a8498714..911d65d5a123c 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, @@ -97,6 +99,79 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, return ret; } +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr= exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} + static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, unsigned long initrd_len, char *cmdline, @@ -106,10 +181,12 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long old_kernel_pbase = ULONG_MAX; unsigned long new_kernel_pbase = 0UL; unsigned long initrd_pbase = 0UL; - void *fdt; + unsigned long headers_sz; + void *fdt, *headers; struct elfhdr ehdr; struct kexec_buf kbuf; struct kexec_elf_info elf_info; + char *modified_cmdline = NULL; ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); if (ret) @@ -130,6 +207,45 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, kbuf.image = image; kbuf.buf_min = new_kernel_pbase + kernel_len; kbuf.buf_max = ULONG_MAX; + + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + /* Add the initrd to the image */ if (initrd != NULL) { kbuf.buffer = initrd; @@ -170,6 +286,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, out_free_fdt: kvfree(fdt); out: + kfree(modified_cmdline); kexec_free_elf_info(&elf_info); return ret ? ERR_PTR(ret) : NULL; } From f2e826cf89f0966a7e0c17d8c9ff50db7bc9fe4e Mon Sep 17 00:00:00 2001 From: Li Zhengyu Date: Fri, 8 Apr 2022 18:09:13 +0800 Subject: [PATCH 07/27] RISC-V: Add purgatory This patch adds purgatory, the name and concept have been taken from kexec-tools. Purgatory runs between two kernels, and do verify sha256 hash to ensure the kernel to jump to is fine and has not been corrupted after loading. Makefile is modified based on x86 platform. Signed-off-by: Li Zhengyu Link: https://lore.kernel.org/r/20220408100914.150110-6-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kbuild | 4 ++ arch/riscv/Kconfig | 1 + arch/riscv/purgatory/.gitignore | 4 ++ arch/riscv/purgatory/Makefile | 95 ++++++++++++++++++++++++++++++++ arch/riscv/purgatory/entry.S | 47 ++++++++++++++++ arch/riscv/purgatory/purgatory.c | 45 +++++++++++++++ 6 files changed, 196 insertions(+) create mode 100644 arch/riscv/purgatory/.gitignore create mode 100644 arch/riscv/purgatory/Makefile create mode 100644 arch/riscv/purgatory/entry.S create mode 100644 arch/riscv/purgatory/purgatory.c diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 4614c01ba5b32..98be8dcf20466 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -2,3 +2,7 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ + +# for cleaning +subdir- += boot diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 68b5b1027c000..d62ce491edd30 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -421,6 +421,7 @@ config KEXEC_FILE config ARCH_HAS_KEXEC_PURGATORY def_bool KEXEC_FILE + select BUILD_BIN2C depends on CRYPTO=y depends on CRYPTO_SHA256=y diff --git a/arch/riscv/purgatory/.gitignore b/arch/riscv/purgatory/.gitignore new file mode 100644 index 0000000000000..38d7d1bda4d7c --- /dev/null +++ b/arch/riscv/purgatory/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +purgatory.chk +purgatory.ro +kexec-purgatory.c diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile new file mode 100644 index 0000000000000..d4df200f7edf9 --- /dev/null +++ b/arch/riscv/purgatory/Makefile @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0 +OBJECT_FILES_NON_STANDARD := y + +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o + +targets += $(purgatory-y) +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +$(obj)/string.o: $(srctree)/lib/string.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE + $(call if_changed_rule,cc_o_c) + +CFLAGS_sha256.o := -D__DISABLE_EXPORTS +CFLAGS_string.o := -D__DISABLE_EXPORTS +CFLAGS_ctype.o := -D__DISABLE_EXPORTS + +# When linking purgatory.ro with -r unresolved symbols are not checked, +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib +LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS) +LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS) +targets += purgatory.ro purgatory.chk + +# Sanitizer, etc. runtimes are unavailable and cannot be linked here. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# These are adjustments to the compiler flags used for objects that +# make up the standalone purgatory.ro + +PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel +PURGATORY_CFLAGS := -mcmodel=medany -ffreestanding -fno-zero-initialized-in-bss +PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += -fno-stack-protector -g0 + +# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That +# in turn leaves some undefined symbols like __fentry__ in purgatory and not +# sure how to relocate those. +ifdef CONFIG_FUNCTION_TRACER +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE) +endif + +ifdef CONFIG_STACKPROTECTOR +PURGATORY_CFLAGS_REMOVE += -fstack-protector +endif + +ifdef CONFIG_STACKPROTECTOR_STRONG +PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong +endif + +CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_sha256.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_string.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_ctype.o += $(PURGATORY_CFLAGS) + +AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE + $(call if_changed,ld) + +targets += kexec-purgatory.c + +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE + $(call if_changed,bin2c) + +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S new file mode 100644 index 0000000000000..0194f4554130a --- /dev/null +++ b/arch/riscv/purgatory/entry.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * purgatory: Runs between two kernels + * + * Copyright (C) 2022 Huawei Technologies Co, Ltd. + * + * Author: Li Zhengyu (lizhengyu3@huawei.com) + * + */ + +.macro size, sym:req + .size \sym, . - \sym +.endm + +.text + +.globl purgatory_start +purgatory_start: + + lla sp, .Lstack + mv s0, a0 /* The hartid of the current hart */ + mv s1, a1 /* Phys address of the FDT image */ + + jal purgatory + + /* Start new image. */ + mv a0, s0 + mv a1, s1 + ld a2, riscv_kernel_entry + jr a2 + +size purgatory_start + +.align 4 + .rept 256 + .quad 0 + .endr +.Lstack: + +.data + +.globl riscv_kernel_entry +riscv_kernel_entry: + .quad 0 +size riscv_kernel_entry + +.end diff --git a/arch/riscv/purgatory/purgatory.c b/arch/riscv/purgatory/purgatory.c new file mode 100644 index 0000000000000..80596ab5fb622 --- /dev/null +++ b/arch/riscv/purgatory/purgatory.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * purgatory: Runs between two kernels + * + * Copyright (C) 2022 Huawei Technologies Co, Ltd. + * + * Author: Li Zhengyu (lizhengyu3@huawei.com) + * + */ + +#include +#include +#include +#include + +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory"); + +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(".kexec-purgatory"); + +static int verify_sha256_digest(void) +{ + struct kexec_sha_region *ptr, *end; + struct sha256_state ss; + u8 digest[SHA256_DIGEST_SIZE]; + + sha256_init(&ss); + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + for (ptr = purgatory_sha_regions; ptr < end; ptr++) + sha256_update(&ss, (uint8_t *)(ptr->start), ptr->len); + sha256_final(&ss, digest); + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)) != 0) + return 1; + return 0; +} + +/* workaround for a warning with -Wmissing-prototypes */ +void purgatory(void); + +void purgatory(void) +{ + if (verify_sha256_digest()) + for (;;) + /* loop forever */ + ; +} From 3b91f0a097cab510ccc1b5d2e9f78c0d7f7a813c Mon Sep 17 00:00:00 2001 From: Li Zhengyu Date: Fri, 8 Apr 2022 18:09:14 +0800 Subject: [PATCH 08/27] RISC-V: Load purgatory in kexec_file This patch supports kexec_file to load and relocate purgatory. It works well on riscv64 QEMU, being tested with devmem. Signed-off-by: Li Zhengyu Link: https://lore.kernel.org/r/20220408100914.150110-7-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 151 ++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 911d65d5a123c..9cb85095fd459 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -182,6 +182,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long new_kernel_pbase = 0UL; unsigned long initrd_pbase = 0UL; unsigned long headers_sz; + unsigned long kernel_start; void *fdt, *headers; struct elfhdr ehdr; struct kexec_buf kbuf; @@ -196,6 +197,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, &old_kernel_pbase, &new_kernel_pbase); if (ret) goto out; + kernel_start = image->start; pr_notice("The entry point of kernel at 0x%lx\n", image->start); /* Add the kernel binary to the image */ @@ -246,6 +248,22 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, cmdline = modified_cmdline; } +#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */ + /* Add the initrd to the image */ if (initrd != NULL) { kbuf.buffer = initrd; @@ -291,6 +309,139 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, return ret ? ERR_PTR(ret) : NULL; } +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + const struct kexec_file_ops elf_kexec_ops = { .probe = kexec_elf_probe, .load = elf_kexec_load, From 8995930f5e2c2733b2504c676fcc11b2a2bdfdad Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Fri, 26 Nov 2021 20:04:10 +0200 Subject: [PATCH 09/27] riscv: use hart id instead of cpu id on machine_kexec raw_smp_processor_id() doesn't return the hart id as stated in arch/riscv/include/asm/smp.h, use smp_processor_id() instead to get the cpu id, and cpuid_to_hartid_map() to pass the hart id to the next kernel. This fixes kexec on HiFive Unleashed/Unmatched where cpu ids and hart ids don't match (on qemu-virt they match). Fixes: fba8a8674f68 ("RISC-V: Add kexec support") Signed-off-by: Nick Kossifidis Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/machine_kexec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index e6eca271a4d60..cbef0fc73afa8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -169,7 +169,8 @@ machine_kexec(struct kimage *image) struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; - unsigned long this_hart_id = raw_smp_processor_id(); + unsigned long this_cpu_id = smp_processor_id(); + unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; From 619a416956a24a22d924141e68ed81072434968a Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Fri, 26 Nov 2021 20:04:09 +0200 Subject: [PATCH 10/27] riscv: Don't use va_pa_offset on kdump On kdump instead of using an intermediate step to relocate the kernel, that lives in a "control buffer" outside the current kernel's mapping, we jump to the crash kernel directly by calling riscv_kexec_norelocate(). The current implementation uses va_pa_offset while switching to physical addressing, however since we moved the kernel outside the linear mapping this won't work anymore since riscv_kexec_norelocate() is part of the kernel mapping and we should use kernel_map.va_kernel_pa_offset, and also take XIP kernel into account. We don't really need to use va_pa_offset on riscv_kexec_norelocate, we can just set STVEC to the physical address of the new kernel instead and let the hart jump to the new kernel on the next instruction after setting SATP to zero. This fixes kdump and is also simpler/cleaner. I tested this on the latest qemu and HiFive Unmatched and works as expected. Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Nick Kossifidis Reviewed-by: Alexandre Ghiti Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/kexec_relocate.S | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index a80b52a74f58c..059c5e216ae75 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -159,25 +159,15 @@ SYM_CODE_START(riscv_kexec_norelocate) * s0: (const) Phys address to jump to * s1: (const) Phys address of the FDT image * s2: (const) The hartid of the current hart - * s3: (const) kernel_map.va_pa_offset, used when switching MMU off */ mv s0, a1 mv s1, a2 mv s2, a3 - mv s3, a4 /* Disable / cleanup interrupts */ csrw CSR_SIE, zero csrw CSR_SIP, zero - /* Switch to physical addressing */ - la s4, 1f - sub s4, s4, s3 - csrw CSR_STVEC, s4 - csrw CSR_SATP, zero - -.align 2 -1: /* Pass the arguments to the next kernel / Cleanup*/ mv a0, s2 mv a1, s1 @@ -214,7 +204,15 @@ SYM_CODE_START(riscv_kexec_norelocate) csrw CSR_SCAUSE, zero csrw CSR_SSCRATCH, zero - jalr zero, a2, 0 + /* + * Switch to physical addressing + * This will also trigger a jump to CSR_STVEC + * which in this case is the address of the new + * kernel. + */ + csrw CSR_STVEC, a2 + csrw CSR_SATP, zero + SYM_CODE_END(riscv_kexec_norelocate) .section ".rodata" From 273e70e6338718855196486d5dc7a66a39fc20a3 Mon Sep 17 00:00:00 2001 From: Li Zhengyu Date: Fri, 8 Apr 2022 18:09:13 +0800 Subject: [PATCH 11/27] RISC-V: Add purgatory This patch adds purgatory, the name and concept have been taken from kexec-tools. Purgatory runs between two kernels, and do verify sha256 hash to ensure the kernel to jump to is fine and has not been corrupted after loading. Makefile is modified based on x86 platform. Signed-off-by: Li Zhengyu Link: https://lore.kernel.org/r/20220408100914.150110-6-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kbuild | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild index 98be8dcf20466..55a6542877688 100644 --- a/arch/riscv/Kbuild +++ b/arch/riscv/Kbuild @@ -4,5 +4,7 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ + # for cleaning subdir- += boot From f60808e12689665e58d44363b7cd6dbd1d065345 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 26 Jun 2022 07:34:36 +0900 Subject: [PATCH 12/27] riscv/purgatory: hard-code obj-y in Makefile The purgatory/ directory is entirely guarded in arch/riscv/Kbuild. CONFIG_ARCH_HAS_KEXEC_PURGATORY is bool type. $(CONFIG_ARCH_HAS_KEXEC_PURGATORY) is always 'y' when Kbuild visits this Makefile for building. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20220625223438.835408-1-masahiroy@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index d4df200f7edf9..c2d14e2f345db 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -92,4 +92,4 @@ quiet_cmd_bin2c = BIN2C $@ $(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE $(call if_changed,bin2c) -obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o +obj-y += kexec-purgatory.o From 80ece3820a3669ab6c38f2fe09f81eeb9cd66404 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 26 Jun 2022 07:34:37 +0900 Subject: [PATCH 13/27] riscv/purgatory: Omit use of bin2c The .incbin assembler directive is much faster than bin2c + $(CC). Do similar refactoring as in commit 4c0f032d4963 ("s390/purgatory: Omit use of bin2c"). Please note the .quad directive matches to size_t in C (both 8 byte) because the purgatory is compiled only for the 64-bit kernel. (KEXEC_FILE depends on 64BIT). Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20220625223438.835408-2-masahiroy@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - arch/riscv/purgatory/.gitignore | 1 - arch/riscv/purgatory/Makefile | 8 +------- arch/riscv/purgatory/kexec-purgatory.S | 14 ++++++++++++++ scripts/remove-stale-files | 6 ++++++ 5 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 arch/riscv/purgatory/kexec-purgatory.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d62ce491edd30..68b5b1027c000 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -421,7 +421,6 @@ config KEXEC_FILE config ARCH_HAS_KEXEC_PURGATORY def_bool KEXEC_FILE - select BUILD_BIN2C depends on CRYPTO=y depends on CRYPTO_SHA256=y diff --git a/arch/riscv/purgatory/.gitignore b/arch/riscv/purgatory/.gitignore index 38d7d1bda4d7c..6e4dfb024ad2b 100644 --- a/arch/riscv/purgatory/.gitignore +++ b/arch/riscv/purgatory/.gitignore @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only purgatory.chk purgatory.ro -kexec-purgatory.c diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index c2d14e2f345db..dd58e1d993972 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -84,12 +84,6 @@ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE $(call if_changed,ld) -targets += kexec-purgatory.c - -quiet_cmd_bin2c = BIN2C $@ - cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ - -$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE - $(call if_changed,bin2c) +$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro $(obj)/purgatory.chk obj-y += kexec-purgatory.o diff --git a/arch/riscv/purgatory/kexec-purgatory.S b/arch/riscv/purgatory/kexec-purgatory.S new file mode 100644 index 0000000000000..0e91888157187 --- /dev/null +++ b/arch/riscv/purgatory/kexec-purgatory.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + + .section .rodata, "a" + + .align 8 +kexec_purgatory: + .globl kexec_purgatory + .incbin "arch/riscv/purgatory/purgatory.ro" +.Lkexec_purgatroy_end: + + .align 8 +kexec_purgatory_size: + .globl kexec_purgatory_size + .quad .Lkexec_purgatroy_end - kexec_purgatory diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files index c3eb81c3f7de1..36fd1568eaadd 100755 --- a/scripts/remove-stale-files +++ b/scripts/remove-stale-files @@ -29,3 +29,9 @@ if [ -n "${building_out_of_srctree}" ]; then rm -f arch/arm/boot/compressed/${f} done fi + +rm -f arch/riscv/purgatory/kexec-purgatory.c + +rm -f scripts/extract-cert + +rm -f arch/x86/purgatory/kexec-purgatory.c From 655617a2b5dedad09aadb7c92c380afb87f7b9b3 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 13 Jan 2023 22:23:00 +0100 Subject: [PATCH 14/27] RISC-V: add infrastructure to allow different str* implementations Depending on supported extensions on specific RISC-V cores, optimized str* functions might make sense. This adds basic infrastructure to allow patching the function calls via alternatives later on. The Linux kernel provides standard implementations for string functions but when architectures want to extend them, they need to provide their own. The added generic string functions are done in assembler (taken from disassembling the main-kernel functions for now) to allow us to control the used registers and extend them with optimized variants. This doesn't override the compiler's use of builtin replacements. So still first of all the compiler will select if a builtin will be better suitable i.e. for known strings. For all regular cases we will want to later select possible optimized variants and in the worst case fall back to the generic implemention added with this change. Reviewed-by: Andrew Jones Signed-off-by: Heiko Stuebner Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230113212301.3534711-2-heiko@sntech.de Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/string.h | 10 ++++++++ arch/riscv/kernel/riscv_ksyms.c | 3 +++ arch/riscv/lib/Makefile | 3 +++ arch/riscv/lib/strcmp.S | 36 +++++++++++++++++++++++++++++ arch/riscv/lib/strlen.S | 28 ++++++++++++++++++++++ arch/riscv/lib/strncmp.S | 41 +++++++++++++++++++++++++++++++++ arch/riscv/purgatory/Makefile | 13 +++++++++++ 7 files changed, 134 insertions(+) create mode 100644 arch/riscv/lib/strcmp.S create mode 100644 arch/riscv/lib/strlen.S create mode 100644 arch/riscv/lib/strncmp.S diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 9090493665555..a96b1fea24fe4 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); + +#define __HAVE_ARCH_STRCMP +extern asmlinkage int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRLEN +extern asmlinkage __kernel_size_t strlen(const char *); + +#define __HAVE_ARCH_STRNCMP +extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); + /* For those files which don't want to check by kasan. */ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) #define memcpy(dst, src, len) __memcpy(dst, src, len) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 5ab1c7e1a6ed5..a72879b4249a5 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,6 +12,9 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 25d5c9664e57e..6c74b0bedd60d 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,9 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += strcmp.o +lib-y += strlen.o +lib-y += strncmp.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S new file mode 100644 index 0000000000000..8babd712b9587 --- /dev/null +++ b/arch/riscv/lib/strcmp.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +/* int strcmp(const char *cs, const char *ct) */ +SYM_FUNC_START(strcmp) + /* + * Returns + * a0 - comparison result, value like strcmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * + * Clobbers + * t0, t1 + */ +1: + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 2f + bnez t0, 1b + li a0, 0 + ret +2: + /* + * strcmp only needs to return (< 0, 0, > 0) values + * not necessarily -1, 0, +1 + */ + sub a0, t0, t1 + ret +SYM_FUNC_END(strcmp) diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S new file mode 100644 index 0000000000000..0a3b11853efdb --- /dev/null +++ b/arch/riscv/lib/strlen.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +/* int strlen(const char *s) */ +SYM_FUNC_START(strlen) + /* + * Returns + * a0 - string length + * + * Parameters + * a0 - String to measure + * + * Clobbers: + * t0, t1 + */ + mv t1, a0 +1: + lbu t0, 0(t1) + beqz t0, 2f + addi t1, t1, 1 + j 1b +2: + sub a0, t1, a0 + ret +SYM_FUNC_END(strlen) diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S new file mode 100644 index 0000000000000..1f644d0a93f68 --- /dev/null +++ b/arch/riscv/lib/strncmp.S @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +/* int strncmp(const char *cs, const char *ct, size_t count) */ +SYM_FUNC_START(strncmp) + /* + * Returns + * a0 - comparison result, value like strncmp + * + * Parameters + * a0 - string1 + * a1 - string2 + * a2 - number of characters to compare + * + * Clobbers + * t0, t1, t2 + */ + li t2, 0 +1: + beq a2, t2, 2f + lbu t0, 0(a0) + lbu t1, 0(a1) + addi a0, a0, 1 + addi a1, a1, 1 + bne t0, t1, 3f + addi t2, t2, 1 + bnez t0, 1b +2: + li a0, 0 + ret +3: + /* + * strncmp only needs to return (< 0, 0, > 0) values + * not necessarily -1, 0, +1 + */ + sub a0, t0, t1 + ret +SYM_FUNC_END(strncmp) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index dd58e1d993972..d16bf715a586b 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -2,6 +2,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y += strcmp.o strlen.o strncmp.o targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) @@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE + $(call if_changed_rule,as_o_S) + $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) @@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 +AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From 78f7fe21dfc0a653f7a80f17e8dc3b67975e9e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 30 May 2022 09:42:02 +0200 Subject: [PATCH 15/27] RISC-V: Prepare dropping week attribute from arch_kexec_apply_relocations[_add] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this change arch/riscv/kernel/elf_kexec.c fails to compile once commit 233c1e6c319c ("kexec_file: drop weak attribute from arch_kexec_apply_relocations[_add]") is also contained in the tree. This currently happens in next-20220527. Prepare the RISC-V similar to the s390 adaption done in 233c1e6c319c. This is safe to do on top of the riscv change even without the change to arch_kexec_apply_relocations. Fixes: 838b3e28488f ("RISC-V: Load purgatory in kexec_file") Looks-good-to: liaochang (A) Signed-off-by: Uwe Kleine-König Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 206217b23301f..eee260e8ab308 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -55,6 +55,13 @@ extern riscv_kexec_method riscv_kexec_norelocate; #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops elf_kexec_ops; + +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add #endif #endif From d9700bd2a19544bf00b251c08bf087d2d3403625 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:57 +0800 Subject: [PATCH 16/27] RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Reviewed-by: Liao Chang Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 5 +++++ arch/riscv/kernel/elf_kexec.c | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index eee260e8ab308..2b56769cb530c 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs, #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + void *fdt; /* For CONFIG_KEXEC_FILE */ unsigned long fdt_addr; }; @@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +struct kimage; +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 9cb85095fd459..c839b1b302216 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -21,6 +21,14 @@ #include #include +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + return kexec_image_post_load_cleanup_default(image); +} + static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, unsigned long new_pbase) @@ -298,6 +306,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error add DTB kbuf ret=%d\n", ret); goto out_free_fdt; } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; From e041aaf3dd1cc05172eecd775f6c61f928d401e3 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:58 +0800 Subject: [PATCH 17/27] RISC-V: kexec: Fix memory leak of elf header buffer This is reported by kmemleak detector: unreferenced object 0xff2000000403d000 (size 4096): comm "kexec", pid 146, jiffies 4294900633 (age 64.792s) hex dump (first 32 bytes): 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 04 00 f3 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000566ca97c>] kmemleak_vmalloc+0x3c/0xbe [<00000000979283d8>] __vmalloc_node_range+0x3ac/0x560 [<00000000b4b3712a>] __vmalloc_node+0x56/0x62 [<00000000854f75e2>] vzalloc+0x2c/0x34 [<00000000e9a00db9>] crash_prepare_elf64_headers+0x80/0x30c [<0000000067e8bf48>] elf_kexec_load+0x3e8/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via vzalloc() to store elf headers. While it's not freed back to system when kdump kernel is reloaded or unloaded, or when image->elf_header is successfully set and then fails to load kdump kernel for some reason. Fix it by freeing the buffer in arch_kimage_file_post_load_cleanup(). Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221104095658.141222-2-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index c839b1b302216..4405061a2b1f9 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -26,6 +26,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) kvfree(image->arch.fdt); image->arch.fdt = NULL; + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } From 308d67d788e560040dbd0c445e2772cb973cb8c9 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:36 -0500 Subject: [PATCH 18/27] riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument The 'retp' is a pointer to the return address on the stack, so we must pass the current return address pointer as the 'retp' argument to ftrace_push_return_trace(). Not parent function's return address on the stack. Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 315db3d0229bf..d8580c5e9e499 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -59,7 +59,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, - (unsigned long *)(fp - 8)); + &frame->ra); } } From 8d73996c0722ded1999d5a029d3b629c4f24f9d8 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:37 -0500 Subject: [PATCH 19/27] riscv: stacktrace: Make walk_stackframe cross pt_regs frame The current walk_stackframe with FRAME_POINTER would stop unwinding at ret_from_exception: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc The optimization would help walk_stackframe cross the pt_regs frame and get more backtrace of debug info: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc [] riscv_intc_irq+0x1a/0x72 [] ret_from_exception+0x0/0xc [] vma_link+0x54/0x160 [] mmap_region+0x2cc/0x4d0 [] do_mmap+0x2d8/0x3ac [] vm_mmap_pgoff+0x70/0xb8 [] vm_mmap+0x2a/0x36 [] elf_map+0x72/0x84 [] load_elf_binary+0x69a/0xec8 [] bprm_execve+0x246/0x53a [] kernel_execve+0xe8/0x124 [] run_init_process+0xfa/0x10c [] try_to_run_init_process+0x12/0x3c [] kernel_init+0xb4/0xf8 [] ret_from_exception+0x0/0xc Here is the error injection test code for the above output: drivers/irqchip/irq-riscv-intc.c: static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + u32 tmp; __get_user(tmp, (u32 *)0); Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org [Palmer: use SYM_CODE_*] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 3 ++- arch/riscv/kernel/stacktrace.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 98f502654edd3..466a61ebdf22e 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -231,7 +231,7 @@ ret_from_syscall_rejected: andi t0, t0, _TIF_SYSCALL_WORK bnez t0, handle_syscall_trace_exit -ret_from_exception: +SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS @@ -245,6 +245,7 @@ ret_from_exception: andi s0, s0, SR_SPP #endif bnez s0, resume_kernel +SYM_CODE_END(ret_from_exception) resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index d8580c5e9e499..2f4061fff8d3e 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -18,6 +18,8 @@ register unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER +extern asmlinkage void ret_from_exception(void); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -60,6 +62,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, &frame->ra); + if (pc == (unsigned long)ret_from_exception) { + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + break; + + pc = ((struct pt_regs *)sp)->epc; + fp = ((struct pt_regs *)sp)->s0; + } } } From 2ef2a3afe14eb8ade9a845d6cd09a82674ce1d3d Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 29 Aug 2022 23:52:19 +0300 Subject: [PATCH 20/27] riscv: mm: notify remote harts about mmu cache updates Current implementation of update_mmu_cache function performs local TLB flush. It does not take into account ASID information. Besides, it does not take into account other harts currently running the same mm context or possible migration of the running context to other harts. Meanwhile TLB flush is not performed for every context switch if ASID support is enabled. Patch [1] proposed to add ASID support to update_mmu_cache to avoid flushing local TLB entirely. This patch takes into account other harts currently running the same mm context as well as possible migration of this context to other harts. For this purpose the approach from flush_icache_mm is reused. Remote harts currently running the same mm context are informed via SBI calls that they need to flush their local TLBs. All the other harts are marked as needing a deferred TLB flush when this mm context runs on them. [1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/ Signed-off-by: Sergey Matyukevich Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#t Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 2 ++ arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/include/asm/tlbflush.h | 18 ++++++++++++++++++ arch/riscv/mm/context.c | 10 ++++++++++ arch/riscv/mm/tlbflush.c | 28 +++++++++++----------------- 5 files changed, 42 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 0099dc1161683..5ff1f19fd45c2 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* A local tlb flush is needed before user execution can resume. */ + cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 39b550310ec64..799c16e065253 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -386,7 +386,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + flush_tlb_page(vma, address); } static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea3..907b9efd39a87 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ee3459cb6750b..cc4a47bda82a0 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,6 +196,16 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) if (need_flush_tlb) local_flush_tlb_all(); +#ifdef CONFIG_SMP + else { + cpumask_t *mask = &mm->context.tlb_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + local_flush_tlb_all_asid(cntx & asid_mask); + } + } +#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 64f8201237c24..efefc3986c48c 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,23 +5,7 @@ #include #include #include - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} +#include void flush_tlb_all(void) { @@ -31,6 +15,7 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { + struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); struct cpumask hmask; unsigned int cpuid; @@ -45,6 +30,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); + /* + * TLB will be immediately flushed on harts concurrently + * executing this MM context. TLB flush on other harts + * is deferred until this MM context migrates there. + */ + cpumask_setall(pmask); + cpumask_clear_cpu(cpuid, pmask); + cpumask_andnot(pmask, pmask, cmask); + if (broadcast) { riscv_cpuid_to_hartid_mask(cmask, &hmask); sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), From 64d2f8277845f923d002f10ad6e87ba63f22bb4f Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Wed, 1 Sep 2021 03:20:25 +0000 Subject: [PATCH 21/27] riscv/vdso: Add support for time namespaces Implement generic vdso time namespace support which also enables time namespaces for riscv. This is quite similar to what arm64 does. selftest/timens test result: 1..10 ok 1 Passed for CLOCK_BOOTTIME (syscall) ok 2 Passed for CLOCK_BOOTTIME (vdso) ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported ok 5 Passed for CLOCK_MONOTONIC (syscall) ok 6 Passed for CLOCK_MONOTONIC (vdso) ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall) ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso) ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall) ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso) # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0 Signed-off-by: Tong Tiangen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/vdso.h | 2 +- arch/riscv/include/asm/vdso/gettimeofday.h | 7 + arch/riscv/kernel/vdso.c | 250 ++++++++++++++++----- arch/riscv/kernel/vdso/vdso.lds.S | 3 + 5 files changed, 209 insertions(+), 54 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 68b5b1027c000..fb80a77bbfe1c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -63,6 +63,7 @@ config RISCV select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 208e31bc5d1c2..e310b22fe3f32 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -22,7 +22,7 @@ */ #ifdef CONFIG_MMU -#define __VVAR_PAGES 1 +#define __VVAR_PAGES 2 #ifndef __ASSEMBLY__ #include diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index f839f16e0d2a9..77d9c2f721c46 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return _vdso_data; } +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return _timens_data; +} +#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index b70956d804081..a9436a65161a4 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_TIME_VSYSCALL #include @@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[]; enum vvar_pages { VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, VVAR_NR_PAGES, }; #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) -static unsigned int vdso_pages __ro_after_init; -static struct page **vdso_pagelist __ro_after_init; - /* * The vDSO data page. */ @@ -42,83 +41,228 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = &vdso_data_store.data; -static int __init vdso_init(void) +struct __vdso_info { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_info vdso_info __ro_after_init = { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, +}; + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static int __init __vdso_init(void) { unsigned int i; + struct page **vdso_pagelist; + unsigned long pfn; - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - vdso_pagelist = - kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL); - if (unlikely(vdso_pagelist == NULL)) { - pr_err("vdso: pagelist allocation failed\n"); - return -ENOMEM; + if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; } - for (i = 0; i < vdso_pages; i++) { - struct page *pg; + vdso_info.vdso_pages = ( + vdso_info.vdso_code_end - + vdso_info.vdso_code_start) >> + PAGE_SHIFT; + + vdso_pagelist = kcalloc(vdso_info.vdso_pages, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + return -ENOMEM; + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_info.vdso_code_start); + + for (i = 0; i < vdso_info.vdso_pages; i++) + vdso_pagelist[i] = pfn_to_page(pfn + i); + + vdso_info.cm->pages = vdso_pagelist; + + return 0; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); - pg = virt_to_page(vdso_start + (i << PAGE_SHIFT)); - vdso_pagelist[i] = pg; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); } - vdso_pagelist[i] = virt_to_page(vdso_data); + mmap_read_unlock(mm); return 0; } + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +enum rv_vdso_map { + RV_VDSO_MAP_VVAR, + RV_VDSO_MAP_VDSO, +}; + +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { + [RV_VDSO_MAP_VVAR] = { + .name = "[vvar]", + .fault = vvar_fault, + }, + [RV_VDSO_MAP_VDSO] = { + .name = "[vdso]", + .mremap = vdso_mremap, + }, +}; + +static int __init vdso_init(void) +{ + vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR]; + vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO]; + + return __vdso_init(); +} arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +static int __setup_additional_pages(struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp) { - struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_len; - int ret; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); - vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT; + vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + VVAR_SIZE; - if (mmap_write_lock_killable(mm)) - return -EINTR; - - vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; - goto end; + ret = ERR_PTR(vdso_base); + goto up_fail; } - mm->context.vdso = NULL; - ret = install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); - if (unlikely(ret)) - goto end; + ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, + (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm); + if (IS_ERR(ret)) + goto up_fail; + vdso_base += VVAR_SIZE; + mm->context.vdso = (void *)vdso_base; ret = - install_special_mapping(mm, vdso_base + VVAR_SIZE, - vdso_pages << PAGE_SHIFT, + _install_special_mapping(mm, vdso_base, vdso_text_len, (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), - vdso_pagelist); + vdso_info.cm); - if (unlikely(ret)) - goto end; + if (IS_ERR(ret)) + goto up_fail; - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). - */ - mm->context.vdso = (void *)vdso_base + VVAR_SIZE; + return 0; -end: - mmap_write_unlock(mm); - return ret; +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); } -const char *arch_vma_name(struct vm_area_struct *vma) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) - return "[vdso]"; - if (vma->vm_mm && (vma->vm_start == - (long)vma->vm_mm->context.vdso - VVAR_SIZE)) - return "[vdso_data]"; - return NULL; + struct mm_struct *mm = current->mm; + int ret; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + ret = __setup_additional_pages(mm, bprm, uses_interp); + mmap_write_unlock(mm); + + return ret; } diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index e9111f700af08..01d94aae5bf51 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From eb1f42cdf95216953113569d87557ea9b094c731 Mon Sep 17 00:00:00 2001 From: Daniel Maslowski Date: Sun, 26 Mar 2023 20:00:21 +0200 Subject: [PATCH 22/27] riscv: WIP: purge purgatory :-) Signed-off-by: Daniel Maslowski --- arch/riscv/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fb80a77bbfe1c..f0ddcf0ecd0c1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -420,11 +420,6 @@ config KEXEC_FILE If you don't know what to do here, say Y. -config ARCH_HAS_KEXEC_PURGATORY - def_bool KEXEC_FILE - depends on CRYPTO=y - depends on CRYPTO_SHA256=y - config CRASH_DUMP bool "Build kdump crash kernel" help From ab594fb937d75a0bf27636a19fd15cbf6699dcef Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Fri, 8 Apr 2022 18:09:09 +0800 Subject: [PATCH 23/27] kexec_file: Fix kexec_file.c build error for riscv platform When CONFIG_KEXEC_FILE is set for riscv platform, the compilation of kernel/kexec_file.c generate build error: kernel/kexec_file.c: In function 'crash_prepare_elf64_headers': ./arch/riscv/include/asm/page.h:110:71: error: request for member 'virt_addr' in something not a structure or union 110 | ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr)) | ^ ./arch/riscv/include/asm/page.h:131:2: note: in expansion of macro 'is_linear_mapping' 131 | is_linear_mapping(_x) ? \ | ^~~~~~~~~~~~~~~~~ ./arch/riscv/include/asm/page.h:140:31: note: in expansion of macro '__va_to_pa_nodebug' 140 | #define __phys_addr_symbol(x) __va_to_pa_nodebug(x) | ^~~~~~~~~~~~~~~~~~ ./arch/riscv/include/asm/page.h:143:24: note: in expansion of macro '__phys_addr_symbol' 143 | #define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) | ^~~~~~~~~~~~~~~~~~ kernel/kexec_file.c:1327:36: note: in expansion of macro '__pa_symbol' 1327 | phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); This occurs is because the "kernel_map" referenced in macro is_linear_mapping() is suppose to be the one of struct kernel_mapping defined in arch/riscv/mm/init.c, but the 2nd argument of crash_prepare_elf64_header() has same symbol name, in expansion of macro is_linear_mapping in function crash_prepare_elf64_header(), "kernel_map" actually is the local variable. Signed-off-by: Liao Chang Link: https://lore.kernel.org/r/20220408100914.150110-2-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- include/linux/kexec.h | 2 +- kernel/kexec_file.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729e..25b91cca80770 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -221,7 +221,7 @@ struct crash_mem { extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend); -extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, +extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); #endif /* CONFIG_KEXEC_FILE */ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 33400ff051a84..0837d8c0e7929 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -1255,7 +1255,7 @@ int crash_exclude_mem_range(struct crash_mem *mem, return 0; } -int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, +int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { Elf64_Ehdr *ehdr; @@ -1319,7 +1319,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, phdr++; /* Prepare PT_LOAD type program header for kernel text region */ - if (kernel_map) { + if (need_kernel_map) { phdr->p_type = PT_LOAD; phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_vaddr = (unsigned long) _text; From 4448329afd4884173a1d5c8fc84a025a70789c72 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 May 2022 14:42:37 +0530 Subject: [PATCH 24/27] kexec_file: drop weak attribute from arch_kexec_apply_relocations[_add] Since commit d1bcae833b32f1 ("ELF: Don't generate unused section symbols") [1], binutils (v2.36+) started dropping section symbols that it thought were unused. This isn't an issue in general, but with kexec_file.c, gcc is placing kexec_arch_apply_relocations[_add] into a separate .text.unlikely section and the section symbol ".text.unlikely" is being dropped. Due to this, recordmcount is unable to find a non-weak symbol in .text.unlikely to generate a relocation record against. Address this by dropping the weak attribute from these functions. Instead, follow the existing pattern of having architectures #define the name of the function they want to override in their headers. [1] https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d1bcae833b32f1 [akpm@linux-foundation.org: arch/s390/include/asm/kexec.h needs linux/module.h] Link: https://lkml.kernel.org/r/20220519091237.676736-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Michael Ellerman Signed-off-by: Naveen N. Rao Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton --- arch/s390/include/asm/kexec.h | 10 ++++++++ arch/x86/include/asm/kexec.h | 8 ++++++ include/linux/kexec.h | 46 +++++++++++++++++++++++++++++------ kernel/kexec_file.c | 34 -------------------------- 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f6432..9241846d302ef 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -9,6 +9,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H +#include + #include #include #include @@ -77,4 +79,12 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif /*_S390_KEXEC_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 0a6e34b070175..c7c924e15011d 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -186,6 +186,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif typedef void crash_vmclear_fn(void); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 25b91cca80770..79398c0b64f94 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -187,14 +187,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len); void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); -int arch_kexec_apply_relocations(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); int arch_kimage_file_post_load_cleanup(struct kimage *image); #ifdef CONFIG_KEXEC_SIG int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, @@ -223,6 +215,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); + +#ifndef arch_kexec_apply_relocations_add +/* + * arch_kexec_apply_relocations_add - apply relocations of type RELA + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELAs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("RELA relocation unsupported.\n"); + return -ENOEXEC; +} +#endif + +#ifndef arch_kexec_apply_relocations +/* + * arch_kexec_apply_relocations - apply relocations of type REL + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("REL relocation unsupported.\n"); + return -ENOEXEC; +} +#endif #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_KEXEC_ELF diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 0837d8c0e7929..a1ecaede921e0 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -108,40 +108,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, } #endif -/* - * arch_kexec_apply_relocations_add - apply relocations of type RELA - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELAs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("RELA relocation unsupported.\n"); - return -ENOEXEC; -} - -/* - * arch_kexec_apply_relocations - apply relocations of type REL - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("REL relocation unsupported.\n"); - return -ENOEXEC; -} - /* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have From 4fadd245b353e9e99026a255878dfbfb8d7db8a9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 1 Jul 2022 13:04:04 +0530 Subject: [PATCH 25/27] kexec_file: drop weak attribute from functions As requested (http://lkml.kernel.org/r/87ee0q7b92.fsf@email.froward.int.ebiederm.org), this series converts weak functions in kexec to use the #ifdef approach. Quoting the 3e35142ef99fe ("kexec_file: drop weak attribute from arch_kexec_apply_relocations[_add]") changelog: : Since commit d1bcae833b32f1 ("ELF: Don't generate unused section symbols") : [1], binutils (v2.36+) started dropping section symbols that it thought : were unused. This isn't an issue in general, but with kexec_file.c, gcc : is placing kexec_arch_apply_relocations[_add] into a separate : .text.unlikely section and the section symbol ".text.unlikely" is being : dropped. Due to this, recordmcount is unable to find a non-weak symbol in : .text.unlikely to generate a relocation record against. This patch (of 2); Drop __weak attribute from functions in kexec_file.c: - arch_kexec_kernel_image_probe() - arch_kimage_file_post_load_cleanup() - arch_kexec_kernel_image_load() - arch_kexec_locate_mem_hole() - arch_kexec_kernel_verify_sig() arch_kexec_kernel_image_load() calls into kexec_image_load_default(), so drop the static attribute for the latter. arch_kexec_kernel_verify_sig() is not overridden by any architecture, so drop the __weak attribute. Link: https://lkml.kernel.org/r/cover.1656659357.git.naveen.n.rao@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/2cd7ca1fe4d6bb6ca38e3283c717878388ed6788.1656659357.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Naveen N. Rao Suggested-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Mimi Zohar --- arch/arm64/include/asm/kexec.h | 4 ++- arch/powerpc/include/asm/kexec.h | 9 +++++++ arch/s390/include/asm/kexec.h | 3 +++ arch/x86/include/asm/kexec.h | 6 +++++ include/linux/kexec.h | 44 +++++++++++++++++++++++++++----- kernel/kexec_file.c | 35 ++----------------------- 6 files changed, 61 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 00dbcc71aeb29..91d81824f8693 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -103,7 +103,9 @@ extern const struct kexec_file_ops kexec_image_ops; struct kimage; -extern int arch_kimage_file_post_load_cleanup(struct kimage *image); +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 88d0d7cf3a790..6152fa2200546 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -119,6 +119,15 @@ int setup_purgatory(struct kimage *image, const void *slave_code, #ifdef CONFIG_PPC64 struct kexec_buf; +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len); +#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); +#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole + int load_crashdump_segments_ppc64(struct kimage *image, struct kexec_buf *kbuf); int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 9241846d302ef..2a3a931e28b50 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -86,5 +86,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif /*_S390_KEXEC_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index c7c924e15011d..5b6e2ae54906c 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -193,6 +193,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +void *arch_kexec_kernel_image_load(struct kimage *image); +#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 79398c0b64f94..81c2115b2100d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -182,21 +182,53 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +void *kexec_image_load_default(struct kimage *image); + +#ifndef arch_kexec_kernel_image_probe +static inline int +arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) +{ + return kexec_image_probe_default(image, buf, buf_len); +} +#endif + +#ifndef arch_kimage_file_post_load_cleanup +static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + return kexec_image_post_load_cleanup_default(image); +} +#endif + +#ifndef arch_kexec_kernel_image_load +static inline void *arch_kexec_kernel_image_load(struct kimage *image) +{ + return kexec_image_load_default(image); +} +#endif -/* Architectures may override the below functions */ -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kimage_file_post_load_cleanup(struct kimage *image); #ifdef CONFIG_KEXEC_SIG int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len); #endif -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); +#ifndef arch_kexec_locate_mem_hole +/** + * arch_kexec_locate_mem_hole - Find free memory to place the segments. + * @kbuf: Parameters for the memory search. + * + * On success, kbuf->mem will have the start address of the memory region found. + * + * Return: 0 on success, negative errno on error. + */ +static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + return kexec_locate_mem_hole(kbuf); +} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index a1ecaede921e0..b372725aba868 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -53,14 +53,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf, return ret; } -/* Architectures can provide this probe function */ -int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) -{ - return kexec_image_probe_default(image, buf, buf_len); -} - -static void *kexec_image_load_default(struct kimage *image) +void *kexec_image_load_default(struct kimage *image) { if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); @@ -71,11 +64,6 @@ static void *kexec_image_load_default(struct kimage *image) image->cmdline_buf_len); } -void * __weak arch_kexec_kernel_image_load(struct kimage *image) -{ - return kexec_image_load_default(image); -} - int kexec_image_post_load_cleanup_default(struct kimage *image) { if (!image->fops || !image->fops->cleanup) @@ -84,11 +72,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } -int __weak arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - return kexec_image_post_load_cleanup_default(image); -} - #ifdef CONFIG_KEXEC_SIG static int kexec_image_verify_sig_default(struct kimage *image, void *buf, unsigned long buf_len) @@ -101,8 +84,7 @@ static int kexec_image_verify_sig_default(struct kimage *image, void *buf, return image->fops->verify_sig(buf, buf_len); } -int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len) +int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len) { return kexec_image_verify_sig_default(image, buf, buf_len); } @@ -607,19 +589,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) return ret == 1 ? 0 : -EADDRNOTAVAIL; } -/** - * arch_kexec_locate_mem_hole - Find free memory to place the segments. - * @kbuf: Parameters for the memory search. - * - * On success, kbuf->mem will have the start address of the memory region found. - * - * Return: 0 on success, negative errno on error. - */ -int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) -{ - return kexec_locate_mem_hole(kbuf); -} - /** * kexec_add_buffer - place a buffer in a kexec segment * @kbuf: Buffer contents and memory parameters. From a932523eed19b700f4fe0fcaac54dfe5d5c3d3fd Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Thu, 14 Jul 2022 21:40:24 +0800 Subject: [PATCH 26/27] kexec: clean up arch_kexec_kernel_verify_sig Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from functions"), there was already no arch-specific implementation of arch_kexec_kernel_verify_sig. With weak attribute dropped by that commit, arch_kexec_kernel_verify_sig is completely useless. So clean it up. Note later patches are dependent on this patch so it should be backported to the stable tree as well. Cc: stable@vger.kernel.org Suggested-by: Eric W. Biederman Reviewed-by: Michal Suchanek Acked-by: Baoquan He Signed-off-by: Coiby Xu [zohar@linux.ibm.com: reworded patch description "Note"] Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat.com/ Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 5 ----- kernel/kexec_file.c | 33 +++++++++++++-------------------- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 81c2115b2100d..599a59d2cb34c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -206,11 +206,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image) } #endif -#ifdef CONFIG_KEXEC_SIG -int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len); -#endif - extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b372725aba868..f392654e2b784 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -72,24 +72,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } -#ifdef CONFIG_KEXEC_SIG -static int kexec_image_verify_sig_default(struct kimage *image, void *buf, - unsigned long buf_len) -{ - if (!image->fops || !image->fops->verify_sig) { - pr_debug("kernel loader does not support signature verification.\n"); - return -EKEYREJECTED; - } - - return image->fops->verify_sig(buf, buf_len); -} - -int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len) -{ - return kexec_image_verify_sig_default(image, buf, buf_len); -} -#endif - /* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have @@ -132,13 +114,24 @@ void kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_KEXEC_SIG +static int kexec_image_verify_sig(struct kimage *image, void *buf, + unsigned long buf_len) +{ + if (!image->fops || !image->fops->verify_sig) { + pr_debug("kernel loader does not support signature verification.\n"); + return -EKEYREJECTED; + } + + return image->fops->verify_sig(buf, buf_len); +} + static int kimage_validate_signature(struct kimage *image) { int ret; - ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, - image->kernel_buf_len); + ret = kexec_image_verify_sig(image, image->kernel_buf, + image->kernel_buf_len); if (ret) { if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { From d8026ecc1fc68001bd15b455cb136a98e7a4f94c Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Fri, 8 Apr 2022 18:09:10 +0800 Subject: [PATCH 27/27] RISC-V: use memcpy for kexec_file mode The pointer to buffer loading kernel binaries is in kernel space for kexec_fil mode, When copy_from_user copies data from pointer to a block of memory, it checkes that the pointer is in the user space range, on RISCV-V that is: static inline bool __access_ok(unsigned long addr, unsigned long size) { return size <= TASK_SIZE && addr <= TASK_SIZE - size; } and TASK_SIZE is 0x4000000000 for 64-bits, which now causes copy_from_user to reject the access of the field 'buf' of struct kexec_segment that is in range [CONFIG_PAGE_OFFSET - VMALLOC_SIZE, CONFIG_PAGE_OFFSET), is invalid user space pointer. This patch fixes this issue by skipping access_ok(), use mempcy() instead. Signed-off-by: Liao Chang Link: https://lore.kernel.org/r/20220408100914.150110-3-lizhengyu3@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/machine_kexec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cbef0fc73afa8..df8e24559035c 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -65,7 +65,9 @@ machine_kexec_prepare(struct kimage *image) if (image->segment[i].memsz <= sizeof(fdt)) continue; - if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) + if (image->file_mode) + memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); + else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) continue; if (fdt_check_header(&fdt))