$NetBSD: patch-XSA443,v 1.1 2023/11/15 15:59:36 bouyer Exp $ From c4d597f63832a53bbb1b826af7a4677e40e9fded Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Thu, 14 Sep 2023 13:22:50 +0100 Subject: [PATCH 01/11] libfsimage/xfs: Remove dead code xfs_info.agnolog (and related code) and XFS_INO_AGBNO_BITS are dead code that serve no purpose. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Reviewed-by: Jan Beulich --- tools/libfsimage/xfs/fsys_xfs.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c index d735a88e55f3..2800699f5985 100644 --- tools/libfsimage/xfs/fsys_xfs.c.orig +++ tools/libfsimage/xfs/fsys_xfs.c @@ -37,7 +37,6 @@ struct xfs_info { int blklog; int inopblog; int agblklog; - int agnolog; unsigned int nextents; xfs_daddr_t next; xfs_daddr_t daddr; @@ -65,9 +64,7 @@ static struct xfs_info xfs; #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1)) #define XFS_INO_OFFSET_BITS xfs.inopblog -#define XFS_INO_AGBNO_BITS xfs.agblklog #define XFS_INO_AGINO_BITS (xfs.agblklog + xfs.inopblog) -#define XFS_INO_AGNO_BITS xfs.agnolog static inline xfs_agblock_t agino2agbno (xfs_agino_t agino) @@ -149,20 +146,6 @@ xt_len (xfs_bmbt_rec_32_t *r) return le32(r->l3) & mask32lo(21); } -static inline int -xfs_highbit32(xfs_uint32_t v) -{ - int i; - - if (--v) { - for (i = 0; i < 31; i++, v >>= 1) { - if (v == 0) - return i; - } - } - return 0; -} - static int isinxt (xfs_fileoff_t key, xfs_fileoff_t offset, xfs_filblks_t len) { @@ -472,7 +455,6 @@ xfs_mount (fsi_file_t *ffi, const char *options) xfs.inopblog = super.sb_inopblog; xfs.agblklog = super.sb_agblklog; - xfs.agnolog = xfs_highbit32 (le32(super.sb_agcount)); xfs.btnode_ptr0_off = ((xfs.bsize - sizeof(xfs_btree_block_t)) / -- 2.42.0 From f75b0a70da392672fb7d9feed2a9e9515d74df2c Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Thu, 14 Sep 2023 13:22:51 +0100 Subject: [PATCH 02/11] libfsimage/xfs: Amend mask32lo() to allow the value 32 agblklog could plausibly be 32, but that would overflow this shift. Perform the shift as ULL and cast to u32 at the end instead. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Acked-by: Jan Beulich --- tools/libfsimage/xfs/fsys_xfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c index 2800699f5985..4720bb4505c8 100644 --- tools/libfsimage/xfs/fsys_xfs.c.orig +++ tools/libfsimage/xfs/fsys_xfs.c @@ -60,7 +60,7 @@ static struct xfs_info xfs; #define inode ((xfs_dinode_t *)((char *)FSYS_BUF + 8192)) #define icore (inode->di_core) -#define mask32lo(n) (((xfs_uint32_t)1 << (n)) - 1) +#define mask32lo(n) ((xfs_uint32_t)((1ull << (n)) - 1)) #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1)) #define XFS_INO_OFFSET_BITS xfs.inopblog -- 2.42.0 From 25fae23b32ee4d990ae11368ee21e28e66dbfa25 Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Thu, 14 Sep 2023 13:22:52 +0100 Subject: [PATCH 03/11] libfsimage/xfs: Sanity-check the superblock during mounts Sanity-check the XFS superblock for wellformedness at the mount handler. This forces pygrub to abort parsing a potentially malformed filesystem and ensures the invariants assumed throughout the rest of the code hold. Also, derive parameters from previously sanitized parameters where possible (rather than reading them off the superblock) The code doesn't try to avoid overflowing the end of the disk, because that's an unlikely and benign error. Parameters used in calculations of xfs_daddr_t (like the root inode index) aren't in critical need of being sanitized. The sanitization of agblklog is basically checking that no obvious overflows happen on agblklog, and then ensuring agblocks is contained in the range (2^(sb_agblklog-1), 2^sb_agblklog]. This is part of XSA-443 / CVE-2023-34325 Reported-by: Ferdinand Nölscher Signed-off-by: Alejandro Vallejo Reviewed-by: Jan Beulich --- tools/libfsimage/xfs/fsys_xfs.c | 48 ++++++++++++++++++++++++++------- tools/libfsimage/xfs/xfs.h | 12 +++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c index 4720bb4505c8..e4eb7e1ee26f 100644 --- tools/libfsimage/xfs/fsys_xfs.c.orig +++ tools/libfsimage/xfs/fsys_xfs.c @@ -17,6 +17,7 @@ * along with this program; If not, see . */ +#include #include #include "xfs.h" @@ -433,29 +434,56 @@ first_dentry (fsi_file_t *ffi, xfs_ino_t *ino) return next_dentry (ffi, ino); } +static bool +xfs_sb_is_invalid (const xfs_sb_t *super) +{ + return (le32(super->sb_magicnum) != XFS_SB_MAGIC) + || ((le16(super->sb_versionnum) & XFS_SB_VERSION_NUMBITS) != + XFS_SB_VERSION_4) + || (super->sb_inodelog < XFS_SB_INODELOG_MIN) + || (super->sb_inodelog > XFS_SB_INODELOG_MAX) + || (super->sb_blocklog < XFS_SB_BLOCKLOG_MIN) + || (super->sb_blocklog > XFS_SB_BLOCKLOG_MAX) + || (super->sb_blocklog < super->sb_inodelog) + || (super->sb_agblklog > XFS_SB_AGBLKLOG_MAX) + || ((1ull << super->sb_agblklog) < le32(super->sb_agblocks)) + || (((1ull << super->sb_agblklog) >> 1) >= + le32(super->sb_agblocks)) + || ((super->sb_blocklog + super->sb_dirblklog) >= + XFS_SB_DIRBLK_NUMBITS); +} + static int xfs_mount (fsi_file_t *ffi, const char *options) { xfs_sb_t super; if (!devread (ffi, 0, 0, sizeof(super), (char *)&super) - || (le32(super.sb_magicnum) != XFS_SB_MAGIC) - || ((le16(super.sb_versionnum) - & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4) ) { + || xfs_sb_is_invalid(&super)) { return 0; } - xfs.bsize = le32 (super.sb_blocksize); - xfs.blklog = super.sb_blocklog; - xfs.bdlog = xfs.blklog - SECTOR_BITS; + /* + * Not sanitized. It's exclusively used to generate disk addresses, + * so it's not important from a security standpoint. + */ xfs.rootino = le64 (super.sb_rootino); - xfs.isize = le16 (super.sb_inodesize); - xfs.agblocks = le32 (super.sb_agblocks); - xfs.dirbsize = xfs.bsize << super.sb_dirblklog; - xfs.inopblog = super.sb_inopblog; + /* + * Sanitized to be consistent with each other, only used to + * generate disk addresses, so it's safe + */ + xfs.agblocks = le32 (super.sb_agblocks); xfs.agblklog = super.sb_agblklog; + /* Derived from sanitized parameters */ + xfs.bsize = 1 << super.sb_blocklog; + xfs.blklog = super.sb_blocklog; + xfs.bdlog = super.sb_blocklog - SECTOR_BITS; + xfs.isize = 1 << super.sb_inodelog; + xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog); + xfs.inopblog = super.sb_blocklog - super.sb_inodelog; + xfs.btnode_ptr0_off = ((xfs.bsize - sizeof(xfs_btree_block_t)) / (sizeof (xfs_bmbt_key_t) + sizeof (xfs_bmbt_ptr_t))) diff --git a/tools/libfsimage/xfs/xfs.h b/tools/libfsimage/xfs/xfs.h index 40699281e44d..b87e37d3d7e9 100644 --- tools/libfsimage/xfs/xfs.h.orig +++ tools/libfsimage/xfs/xfs.h @@ -134,6 +134,18 @@ typedef struct xfs_sb xfs_uint8_t sb_dummy[7]; /* padding */ } xfs_sb_t; +/* Bound taken from xfs.c in GRUB2. It doesn't exist in the spec */ +#define XFS_SB_DIRBLK_NUMBITS 27 +/* Implied by the XFS specification. The minimum block size is 512 octets */ +#define XFS_SB_BLOCKLOG_MIN 9 +/* Implied by the XFS specification. The maximum block size is 65536 octets */ +#define XFS_SB_BLOCKLOG_MAX 16 +/* Implied by the XFS specification. The minimum inode size is 256 octets */ +#define XFS_SB_INODELOG_MIN 8 +/* Implied by the XFS specification. The maximum inode size is 2048 octets */ +#define XFS_SB_INODELOG_MAX 11 +/* High bound for sb_agblklog */ +#define XFS_SB_AGBLKLOG_MAX 32 /* those are from xfs_btree.h */ -- 2.42.0 From e72c68e702dd930bc6013182bb44d3e8fbbb6bf4 Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Thu, 14 Sep 2023 13:22:53 +0100 Subject: [PATCH 04/11] libfsimage/xfs: Add compile-time check to libfsimage Adds the common tools include folder to the -I compile flags of libfsimage. This allows us to use: xen-tools/common-macros.h:BUILD_BUG_ON() With it, statically assert a sanitized "blocklog - SECTOR_BITS" cannot underflow. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Reviewed-by: Jan Beulich --- tools/libfsimage/Rules.mk | 2 +- tools/libfsimage/xfs/fsys_xfs.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/libfsimage/Rules.mk b/tools/libfsimage/Rules.mk index bb6d42abb494..80598fb70aa7 100644 --- tools/libfsimage/Rules.mk.orig +++ tools/libfsimage/Rules.mk @@ -1,6 +1,6 @@ include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" +CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ $(CFLAGS_xeninclude) -DFSIMAGE_FSDIR=\"$(FSDIR)\" CFLAGS += -Werror -D_GNU_SOURCE LDFLAGS += -L../common/ diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c index e4eb7e1ee26f..4a8dd6f2397b 100644 --- tools/libfsimage/xfs/fsys_xfs.c.orig +++ tools/libfsimage/xfs/fsys_xfs.c @@ -19,6 +19,7 @@ #include #include +#include #include "xfs.h" #define MAX_LINK_COUNT 8 @@ -477,9 +478,10 @@ xfs_mount (fsi_file_t *ffi, const char *options) xfs.agblklog = super.sb_agblklog; /* Derived from sanitized parameters */ + BUILD_BUG_ON(XFS_SB_BLOCKLOG_MIN < SECTOR_BITS); + xfs.bdlog = super.sb_blocklog - SECTOR_BITS; xfs.bsize = 1 << super.sb_blocklog; xfs.blklog = super.sb_blocklog; - xfs.bdlog = super.sb_blocklog - SECTOR_BITS; xfs.isize = 1 << super.sb_inodelog; xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog); xfs.inopblog = super.sb_blocklog - super.sb_inodelog; -- 2.42.0 From 75fdc03c5a6b7fac0c3a5ac06a5beaac73aad36f Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Mon, 25 Sep 2023 18:32:21 +0100 Subject: [PATCH 05/11] tools/pygrub: Remove unnecessary hypercall There's a hypercall being issued in order to determine whether PV64 is supported, but since Xen 4.3 that's strictly true so it's not required. Plus, this way we can avoid mapping the privcmd interface altogether in the depriv pygrub. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Reviewed-by: Andrew Cooper --- tools/pygrub/src/pygrub | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index ce7ab0eb8cf3..ce4e07d3e823 100755 --- tools/pygrub/src/pygrub.orig +++ tools/pygrub/src/pygrub @@ -18,7 +18,6 @@ import os, sys, string, struct, tempfile, re, traceback, stat, errno import copy import logging import platform -import xen.lowlevel.xc import curses, _curses, curses.textpad, curses.ascii import getopt @@ -668,14 +667,6 @@ def run_grub(file, entry, fs, cfg_args): return grubcfg -def supports64bitPVguest(): - xc = xen.lowlevel.xc.xc() - caps = xc.xeninfo()['xen_caps'].split(" ") - for cap in caps: - if cap == "xen-3.0-x86_64": - return True - return False - # If nothing has been specified, look for a Solaris domU. If found, perform the # necessary tweaks. def sniff_solaris(fs, cfg): @@ -684,8 +675,7 @@ def sniff_solaris(fs, cfg): return cfg if not cfg["kernel"]: - if supports64bitPVguest() and \ - fs.file_exists("/platform/i86xpv/kernel/amd64/unix"): + if fs.file_exists("/platform/i86xpv/kernel/amd64/unix"): cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix" cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" elif fs.file_exists("/platform/i86xpv/kernel/unix"): -- 2.42.0 From 1083a16f63461e844e9515ac4d35d48bf55785af Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Mon, 25 Sep 2023 18:32:22 +0100 Subject: [PATCH 06/11] tools/pygrub: Small refactors Small tidy up to ensure output_directory always has a trailing '/' to ease concatenating paths and that `output` can only be a filename or None. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Acked-by: Andrew Cooper --- tools/pygrub/src/pygrub | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index ce4e07d3e823..1042c05b8676 100755 --- tools/pygrub/src/pygrub.orig +++ tools/pygrub/src/pygrub @@ -793,7 +793,7 @@ if __name__ == "__main__": debug = False not_really = False output_format = "sxp" - output_directory = "/var/run/xen/pygrub" + output_directory = "/var/run/xen/pygrub/" # what was passed in incfg = { "kernel": None, "ramdisk": None, "args": "" } @@ -815,7 +815,8 @@ if __name__ == "__main__": usage() sys.exit() elif o in ("--output",): - output = a + if a != "-": + output = a elif o in ("--kernel",): incfg["kernel"] = a elif o in ("--ramdisk",): @@ -847,12 +848,11 @@ if __name__ == "__main__": if not os.path.isdir(a): print("%s is not an existing directory" % a) sys.exit(1) - output_directory = a + output_directory = a + '/' if debug: logging.basicConfig(level=logging.DEBUG) - try: os.makedirs(output_directory, 0o700) except OSError as e: @@ -861,7 +861,7 @@ if __name__ == "__main__": else: raise - if output is None or output == "-": + if output is None: fd = sys.stdout.fileno() else: fd = os.open(output, os.O_WRONLY) -- 2.42.0 From 350db30e33f39af40c1e3752d73c0a30ef2d26e7 Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Mon, 25 Sep 2023 18:32:23 +0100 Subject: [PATCH 07/11] tools/pygrub: Open the output files earlier This patch allows pygrub to get ahold of every RW file descriptor it needs early on. A later patch will clamp the filesystem it can access so it can't obtain any others. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Acked-by: Andrew Cooper --- tools/pygrub/src/pygrub | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index 1042c05b8676..91e2ec2ab105 100755 --- tools/pygrub/src/pygrub.orig +++ tools/pygrub/src/pygrub @@ -738,8 +738,7 @@ if __name__ == "__main__": def usage(): print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] " %(sys.argv[0],), file=sys.stderr) - def copy_from_image(fs, file_to_read, file_type, output_directory, - not_really): + def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really): if not_really: if fs.file_exists(file_to_read): return "<%s:%s>" % (file_type, file_to_read) @@ -750,21 +749,18 @@ if __name__ == "__main__": except Exception as e: print(e, file=sys.stderr) sys.exit("Error opening %s in guest" % file_to_read) - (tfd, ret) = tempfile.mkstemp(prefix="boot_"+file_type+".", - dir=output_directory) dataoff = 0 while True: data = datafile.read(FS_READ_MAX, dataoff) if len(data) == 0: - os.close(tfd) + os.close(fd_dst) del datafile - return ret + return try: - os.write(tfd, data) + os.write(fd_dst, data) except Exception as e: print(e, file=sys.stderr) - os.close(tfd) - os.unlink(ret) + os.unlink(path_dst) del datafile sys.exit("Error writing temporary copy of "+file_type) dataoff += len(data) @@ -861,6 +857,14 @@ if __name__ == "__main__": else: raise + if not_really: + fd_kernel = path_kernel = fd_ramdisk = path_ramdisk = None + else: + (fd_kernel, path_kernel) = tempfile.mkstemp(prefix="boot_kernel.", + dir=output_directory) + (fd_ramdisk, path_ramdisk) = tempfile.mkstemp(prefix="boot_ramdisk.", + dir=output_directory) + if output is None: fd = sys.stdout.fileno() else: @@ -920,20 +924,23 @@ if __name__ == "__main__": if fs is None: raise RuntimeError("Unable to find partition containing kernel") - bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel", - output_directory, not_really) + copy_from_image(fs, chosencfg["kernel"], "kernel", + fd_kernel, path_kernel, not_really) + bootcfg["kernel"] = path_kernel if chosencfg["ramdisk"]: try: - bootcfg["ramdisk"] = copy_from_image(fs, chosencfg["ramdisk"], - "ramdisk", output_directory, - not_really) + copy_from_image(fs, chosencfg["ramdisk"], "ramdisk", + fd_ramdisk, path_ramdisk, not_really) except: if not not_really: - os.unlink(bootcfg["kernel"]) + os.unlink(path_kernel) raise + bootcfg["ramdisk"] = path_ramdisk else: initrd = None + if not not_really: + os.unlink(path_ramdisk) args = None if chosencfg["args"]: -- 2.42.0 From 1548ad2291ec7a72ae6949c11d2e50cea135a48d Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Mon, 25 Sep 2023 18:32:24 +0100 Subject: [PATCH 08/11] tools/libfsimage: Export a new function to preload all plugins This is work required in order to let pygrub operate in highly deprivileged chroot mode. This patch adds a function that preloads every plugin, hence ensuring that a on function exit, every shared library is loaded in memory. The new "init" function is supposed to be used before depriv, but that's fine because it's not acting on untrusted data. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Acked-by: Andrew Cooper --- tools/libfsimage/common/fsimage_plugin.c | 4 ++-- tools/libfsimage/common/mapfile-GNU | 1 + tools/libfsimage/common/mapfile-SunOS | 1 + tools/libfsimage/common/xenfsimage.h | 8 ++++++++ tools/pygrub/src/fsimage/fsimage.c | 15 +++++++++++++++ 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c index de1412b4233a..d0cb9e96a654 100644 --- tools/libfsimage/common/fsimage_plugin.c.orig +++ tools/libfsimage/common/fsimage_plugin.c @@ -119,7 +119,7 @@ fail: return (-1); } -static int load_plugins(void) +int fsi_init(void) { const char *fsdir = getenv("XEN_FSIMAGE_FSDIR"); struct dirent *dp = NULL; @@ -180,7 +180,7 @@ int find_plugin(fsi_t *fsi, const char *path, const char *options) fsi_plugin_t *fp; int ret = 0; - if (plugins == NULL && (ret = load_plugins()) != 0) + if (plugins == NULL && (ret = fsi_init()) != 0) goto out; for (fp = plugins; fp != NULL; fp = fp->fp_next) { diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU index 26d4d7a69ec7..2d54d527d7f5 100644 --- tools/libfsimage/common/mapfile-GNU.orig +++ tools/libfsimage/common/mapfile-GNU @@ -1,6 +1,7 @@ VERSION { libfsimage.so.1.0 { global: + fsi_init; fsi_open_fsimage; fsi_close_fsimage; fsi_file_exists; diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS index e99b90b65077..48deedb4252f 100644 --- tools/libfsimage/common/mapfile-SunOS.orig +++ tools/libfsimage/common/mapfile-SunOS @@ -1,5 +1,6 @@ libfsimage.so.1.0 { global: + fsi_init; fsi_open_fsimage; fsi_close_fsimage; fsi_file_exists; diff --git a/tools/libfsimage/common/xenfsimage.h b/tools/libfsimage/common/xenfsimage.h index 201abd54f23a..341883b2d71a 100644 --- tools/libfsimage/common/xenfsimage.h.orig +++ tools/libfsimage/common/xenfsimage.h @@ -35,6 +35,14 @@ extern C { typedef struct fsi fsi_t; typedef struct fsi_file fsi_file_t; +/* + * Optional initialization function. If invoked it loads the associated + * dynamic libraries for the backends ahead of time. This is required if + * the library is to run as part of a highly deprivileged executable, as + * the libraries may not be reachable after depriv. + */ +int fsi_init(void); + fsi_t *fsi_open_fsimage(const char *, uint64_t, const char *); void fsi_close_fsimage(fsi_t *); diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c index 2ebbbe35df92..92fbf2851f01 100644 --- tools/pygrub/src/fsimage/fsimage.c.orig +++ tools/pygrub/src/fsimage/fsimage.c @@ -286,6 +286,15 @@ fsimage_getbootstring(PyObject *o, PyObject *args) return Py_BuildValue("s", bootstring); } +static PyObject * +fsimage_init(PyObject *o, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) + return (NULL); + + return Py_BuildValue("i", fsi_init()); +} + PyDoc_STRVAR(fsimage_open__doc__, "open(name, [offset=off]) - Open the given file as a filesystem image.\n" "\n" @@ -297,7 +306,13 @@ PyDoc_STRVAR(fsimage_getbootstring__doc__, "getbootstring(fs) - Return the boot string needed for this file system " "or NULL if none is needed.\n"); +PyDoc_STRVAR(fsimage_init__doc__, + "init() - Loads every dynamic library contained in xenfsimage " + "into memory so that it can be used in chrooted environments.\n"); + static struct PyMethodDef fsimage_module_methods[] = { + { "init", (PyCFunction)fsimage_init, + METH_VARARGS, fsimage_init__doc__ }, { "open", (PyCFunction)fsimage_open, METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ }, { "getbootstring", (PyCFunction)fsimage_getbootstring, -- 2.42.0 From 4d331b0b914dfc17bd2d883bc55aeb798930832a Mon Sep 17 00:00:00 2001 From: Alejandro Vallejo Date: Mon, 25 Sep 2023 18:32:25 +0100 Subject: [PATCH 09/11] tools/pygrub: Deprivilege pygrub Introduce a --runas= flag to deprivilege pygrub on Linux and *BSDs. It also implicitly creates a chroot env where it drops a deprivileged forked process. The chroot itself is cleaned up at the end. If the --runas arg is present, then pygrub forks, leaving the child to deprivilege itself, and waiting for it to complete. When the child exists, the parent performs cleanup and exits with the same error code. This is roughly what the child does: 1. Initialize libfsimage (this loads every .so in memory so the chroot can avoid bind-mounting /{,usr}/lib* 2. Create a temporary empty chroot directory 3. Mount tmpfs in it 4. Bind mount the disk inside, because libfsimage expects a path, not a file descriptor. 5. Remount the root tmpfs to be stricter (ro,nosuid,nodev) 6. Set RLIMIT_FSIZE to a sensibly high amount (128 MiB) 7. Depriv gid, groups and uid With this scheme in place, the "output" files are writable (up to RLIMIT_FSIZE octets) and the exposed filesystem is immutable and contains the single only file we can't easily get rid of (the disk). If running on Linux, the child process also unshares mount, IPC, and network namespaces before dropping its privileges. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Alejandro Vallejo Acked-by: Andrew Cooper --- tools/pygrub/setup.py | 2 +- tools/pygrub/src/pygrub | 162 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 154 insertions(+), 10 deletions(-) diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py index b8f1dc4590cf..f16187b6d118 100644 --- tools/pygrub/setup.py.orig +++ tools/pygrub/setup.py @@ -17,7 +17,7 @@ xenfsimage = Extension("xenfsimage", pkgs = [ 'grub' ] setup(name='pygrub', - version='0.6', + version='0.7', description='Boot loader that looks a lot like grub for Xen', author='Jeremy Katz', author_email='katzj@redhat.com', diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index 91e2ec2ab105..7cea496ade08 100755 --- tools/pygrub/src/pygrub.orig +++ tools/pygrub/src/pygrub @@ -16,8 +16,11 @@ from __future__ import print_function import os, sys, string, struct, tempfile, re, traceback, stat, errno import copy +import ctypes, ctypes.util import logging import platform +import resource +import subprocess import curses, _curses, curses.textpad, curses.ascii import getopt @@ -27,10 +30,135 @@ import grub.GrubConf import grub.LiloConf import grub.ExtLinuxConf -PYGRUB_VER = 0.6 +PYGRUB_VER = 0.7 FS_READ_MAX = 1024 * 1024 SECTOR_SIZE = 512 +# Unless provided through the env variable PYGRUB_MAX_FILE_SIZE_MB, then +# this is the maximum filesize allowed for files written by the depriv +# pygrub +LIMIT_FSIZE = 128 << 20 + +CLONE_NEWNS = 0x00020000 # mount namespace +CLONE_NEWNET = 0x40000000 # network namespace +CLONE_NEWIPC = 0x08000000 # IPC namespace + +def unshare(flags): + if not sys.platform.startswith("linux"): + print("skip_unshare reason=not_linux platform=%s", sys.platform, file=sys.stderr) + return + + libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) + unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True) + unshare = unshare_prototype(('unshare', libc)) + + if unshare(flags) < 0: + raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno())) + +def bind_mount(src, dst, options): + open(dst, "a").close() # touch + + rc = subprocess.call(["mount", "--bind", "-o", options, src, dst]) + if rc != 0: + raise RuntimeError("bad_mount: src=%s dst=%s opts=%s" % + (src, dst, options)) + +def downgrade_rlimits(): + # Wipe the authority to use unrequired resources + resource.setrlimit(resource.RLIMIT_NPROC, (0, 0)) + resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) + resource.setrlimit(resource.RLIMIT_MEMLOCK, (0, 0)) + + # py2's resource module doesn't know about resource.RLIMIT_MSGQUEUE + # + # TODO: Use resource.RLIMIT_MSGQUEUE after python2 is deprecated + if sys.platform.startswith('linux'): + RLIMIT_MSGQUEUE = 12 + resource.setrlimit(RLIMIT_MSGQUEUE, (0, 0)) + + # The final look of the filesystem for this process is fully RO, but + # note we have some file descriptor already open (notably, kernel and + # ramdisk). In order to avoid a compromised pygrub from filling up the + # filesystem we set RLIMIT_FSIZE to a high bound, so that the file + # write permissions are bound. + fsize = LIMIT_FSIZE + if "PYGRUB_MAX_FILE_SIZE_MB" in os.environ.keys(): + fsize = os.environ["PYGRUB_MAX_FILE_SIZE_MB"] << 20 + + resource.setrlimit(resource.RLIMIT_FSIZE, (fsize, fsize)) + +def depriv(output_directory, output, device, uid, path_kernel, path_ramdisk): + # The only point of this call is to force the loading of libfsimage. + # That way, we don't need to bind-mount it into the chroot + rc = xenfsimage.init() + if rc != 0: + os.unlink(path_ramdisk) + os.unlink(path_kernel) + raise RuntimeError("bad_xenfsimage: rc=%d" % rc) + + # Create a temporary directory for the chroot + chroot = tempfile.mkdtemp(prefix=str(uid)+'-', dir=output_directory) + '/' + device_path = '/device' + + pid = os.fork() + if pid: + # parent + _, rc = os.waitpid(pid, 0) + + for path in [path_kernel, path_ramdisk]: + # If the child didn't write anything, just get rid of it, + # otherwise we end up consuming a 0-size file when parsing + # systems without a ramdisk that the ultimate caller of pygrub + # may just be unaware of + if rc != 0 or os.path.getsize(path) == 0: + os.unlink(path) + + # Normally, unshare(CLONE_NEWNS) will ensure this is not required. + # However, this syscall doesn't exist in *BSD systems and doesn't + # auto-unmount everything on older Linux kernels (At least as of + # Linux 4.19, but it seems fixed in 5.15). Either way, + # recursively unmount everything if needed. Quietly. + with open('/dev/null', 'w') as devnull: + subprocess.call(["umount", "-f", chroot + device_path], + stdout=devnull, stderr=devnull) + subprocess.call(["umount", "-f", chroot], + stdout=devnull, stderr=devnull) + os.rmdir(chroot) + + sys.exit(rc) + + # By unsharing the namespace we're making sure it's all bulk-released + # at the end, when the namespaces disappear. This means the kernel does + # (almost) all the cleanup for us and the parent just has to remove the + # temporary directory. + unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWNET) + + # Set sensible limits using the setrlimit interface + downgrade_rlimits() + + # We'll mount tmpfs on the chroot to ensure the deprivileged child + # cannot affect the persistent state. It's RW now in order to + # bind-mount the device, but note it's remounted RO after that. + rc = subprocess.call(["mount", "-t", "tmpfs", "none", chroot]) + if rc != 0: + raise RuntimeError("mount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot)) + + # Bind the untrusted device RO + bind_mount(device, chroot + device_path, "ro,nosuid,noexec") + + rc = subprocess.call(["mount", "-t", "tmpfs", "-o", "remount,ro,nosuid,noexec,nodev", "none", chroot]) + if rc != 0: + raise RuntimeError("remount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot)) + + # Drop superpowers! + os.chroot(chroot) + os.chdir('/') + os.setgid(uid) + os.setgroups([uid]) + os.setuid(uid) + + return device_path + def read_size_roundup(fd, size): if platform.system() != 'FreeBSD': return size @@ -736,7 +864,7 @@ if __name__ == "__main__": sel = None def usage(): - print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] " %(sys.argv[0],), file=sys.stderr) + print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--runas=] [--offset=] " %(sys.argv[0],), file=sys.stderr) def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really): if not_really: @@ -760,7 +888,8 @@ if __name__ == "__main__": os.write(fd_dst, data) except Exception as e: print(e, file=sys.stderr) - os.unlink(path_dst) + if path_dst: + os.unlink(path_dst) del datafile sys.exit("Error writing temporary copy of "+file_type) dataoff += len(data) @@ -769,7 +898,7 @@ if __name__ == "__main__": opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::', ["quiet", "interactive", "list-entries", "not-really", "help", "output=", "output-format=", "output-directory=", "offset=", - "entry=", "kernel=", + "runas=", "entry=", "kernel=", "ramdisk=", "args=", "isconfig", "debug"]) except getopt.GetoptError: usage() @@ -790,6 +919,7 @@ if __name__ == "__main__": not_really = False output_format = "sxp" output_directory = "/var/run/xen/pygrub/" + uid = None # what was passed in incfg = { "kernel": None, "ramdisk": None, "args": "" } @@ -813,6 +943,13 @@ if __name__ == "__main__": elif o in ("--output",): if a != "-": output = a + elif o in ("--runas",): + try: + uid = int(a) + except ValueError: + print("runas value must be an integer user id") + usage() + sys.exit(1) elif o in ("--kernel",): incfg["kernel"] = a elif o in ("--ramdisk",): @@ -849,6 +986,10 @@ if __name__ == "__main__": if debug: logging.basicConfig(level=logging.DEBUG) + if interactive and uid: + print("In order to use --runas, you must also set --entry or -q", file=sys.stderr) + sys.exit(1) + try: os.makedirs(output_directory, 0o700) except OSError as e: @@ -870,6 +1011,9 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) + if uid: + file = depriv(output_directory, output, file, uid, path_kernel, path_ramdisk) + # debug if isconfig: chosencfg = run_grub(file, entry, fs, incfg["args"]) @@ -925,21 +1069,21 @@ if __name__ == "__main__": raise RuntimeError("Unable to find partition containing kernel") copy_from_image(fs, chosencfg["kernel"], "kernel", - fd_kernel, path_kernel, not_really) + fd_kernel, None if uid else path_kernel, not_really) bootcfg["kernel"] = path_kernel if chosencfg["ramdisk"]: try: copy_from_image(fs, chosencfg["ramdisk"], "ramdisk", - fd_ramdisk, path_ramdisk, not_really) + fd_ramdisk, None if uid else path_ramdisk, not_really) except: - if not not_really: - os.unlink(path_kernel) + if not uid and not not_really: + os.unlink(path_kernel) raise bootcfg["ramdisk"] = path_ramdisk else: initrd = None - if not not_really: + if not uid and not not_really: os.unlink(path_ramdisk) args = None -- 2.42.0 From 576e7aa02ab838b6768b498f310c70ca49537202 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 25 Sep 2023 14:30:20 +0200 Subject: [PATCH 10/11] libxl: add support for running bootloader in restricted mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Much like the device model depriv mode, add the same kind of support for the bootloader. Such feature allows passing a UID as a parameter for the bootloader to run as, together with the bootloader itself taking the necessary actions to isolate. Note that the user to run the bootloader as must have the right permissions to access the guest disk image (in read mode only), and that the bootloader will be run in non-interactive mode when restricted. If enabled bootloader restrict mode will attempt to re-use the user(s) from the QEMU depriv implementation if no user is provided on the configuration file or the environment. See docs/features/qemu-deprivilege.pandoc for more information about how to setup those users. Bootloader restrict mode is not enabled by default as it requires certain setup to be done first (setup of the user(s) to use in restrict mode). This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Roger Pau Monné Reviewed-by: Anthony PERARD --- docs/man/xl.1.pod.in | 33 +++++++++++ tools/libs/light/libxl_bootloader.c | 89 ++++++++++++++++++++++++++++- tools/libs/light/libxl_dm.c | 8 +-- tools/libs/light/libxl_internal.h | 8 +++ 4 files changed, 131 insertions(+), 7 deletions(-) diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in index 45e1430aeb74..96e6fb1c32a3 100644 --- docs/man/xl.1.pod.in.orig +++ docs/man/xl.1.pod.in @@ -1976,6 +1976,39 @@ ignored: =back +=head1 ENVIRONMENT VARIABLES + +The following environment variables shall affect the execution of xl: + +=over 4 + +=item LIBXL_BOOTLOADER_RESTRICT + +Attempt to restrict the bootloader after startup, to limit the +consequences of security vulnerabilities due to parsing guest +owned image files. + +See docs/features/qemu-deprivilege.pandoc for more information +on how to setup the unprivileged users. + +Note that running the bootloader in restricted mode also implies using +non-interactive mode, and the disk image must be readable by the +restricted user. + +Having this variable set is equivalent to enabling the option, even if the +value is 0. + +=item LIBXL_BOOTLOADER_USER + +When using bootloader_restrict, run the bootloader as this user. If +not set the default QEMU restrict users will be used. + +NOTE: Each domain MUST have a SEPARATE username. + +See docs/features/qemu-deprivilege.pandoc for more information. + +=back + =head1 SEE ALSO The following man pages: diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c index 18e9ebd7148c..97d9bf4ddc0a 100644 --- tools/libs/light/libxl_bootloader.c.orig +++ tools/libs/light/libxl_bootloader.c @@ -14,6 +14,7 @@ #include "libxl_osdeps.h" /* must come before any other headers */ +#include #include #ifdef HAVE_UTMP_H #include @@ -46,8 +47,71 @@ static void bootloader_arg(libxl__bootloader_state *bl, const char *arg) bl->args[bl->nargs++] = arg; } -static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, - const char *bootloader_path) +static int bootloader_uid(libxl__gc *gc, domid_t guest_domid, + const char *user, uid_t *intended_uid) +{ + struct passwd *user_base, user_pwbuf; + int rc; + + if (user) { + rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base); + if (rc) return rc; + + if (!user_base) { + LOGD(ERROR, guest_domid, "Couldn't find user %s", user); + return ERROR_INVAL; + } + + *intended_uid = user_base->pw_uid; + return 0; + } + + /* Re-use QEMU user range for the bootloader. */ + rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_RANGE_BASE, + &user_pwbuf, &user_base); + if (rc) return rc; + + if (user_base) { + struct passwd *user_clash, user_clash_pwbuf; + uid_t temp_uid = user_base->pw_uid + guest_domid; + + rc = userlookup_helper_getpwuid(gc, temp_uid, &user_clash_pwbuf, + &user_clash); + if (rc) return rc; + + if (user_clash) { + LOGD(ERROR, guest_domid, + "wanted to use uid %ld (%s + %d) but that is user %s !", + (long)temp_uid, LIBXL_QEMU_USER_RANGE_BASE, + guest_domid, user_clash->pw_name); + return ERROR_INVAL; + } + + *intended_uid = temp_uid; + return 0; + } + + rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_SHARED, &user_pwbuf, + &user_base); + if (rc) return rc; + + if (user_base) { + LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s", + LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED); + *intended_uid = user_base->pw_uid; + + return 0; + } + + LOGD(ERROR, guest_domid, + "Could not find user %s or range base pseudo-user %s, cannot restrict", + LIBXL_QEMU_USER_SHARED, LIBXL_QEMU_USER_RANGE_BASE); + + return ERROR_INVAL; +} + +static int make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, + const char *bootloader_path) { const libxl_domain_build_info *info = bl->info; @@ -65,6 +129,23 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, ARG(GCSPRINTF("--ramdisk=%s", info->ramdisk)); if (info->cmdline && *info->cmdline != '\0') ARG(GCSPRINTF("--args=%s", info->cmdline)); + if (getenv("LIBXL_BOOTLOADER_RESTRICT") || + getenv("LIBXL_BOOTLOADER_USER")) { + uid_t uid = -1; + int rc = bootloader_uid(gc, bl->domid, getenv("LIBXL_BOOTLOADER_USER"), + &uid); + + if (rc) return rc; + + assert(uid != -1); + if (!uid) { + LOGD(ERROR, bl->domid, "bootloader restrict UID is 0 (root)!"); + return ERROR_INVAL; + } + LOGD(DEBUG, bl->domid, "using uid %ld", (long)uid); + ARG(GCSPRINTF("--runas=%ld", (long)uid)); + ARG("--quiet"); + } ARG(GCSPRINTF("--output=%s", bl->outputpath)); ARG("--output-format=simple0"); @@ -83,6 +164,7 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl, /* Sentinel for execv */ ARG(NULL); + return 0; #undef ARG } @@ -447,7 +529,8 @@ static void bootloader_disk_attached_cb(libxl__egc *egc, bootloader = bltmp; } - make_bootloader_args(gc, bl, bootloader); + rc = make_bootloader_args(gc, bl, bootloader); + if (rc) goto out; bl->openpty.ao = ao; bl->openpty.callback = bootloader_gotptys; diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c index b86e8ccc858f..59de5c1ae22f 100644 --- tools/libs/light/libxl_dm.c.orig +++ tools/libs/light/libxl_dm.c @@ -80,10 +80,10 @@ static int libxl__create_qemu_logfile(libxl__gc *gc, char *name) * On error, return a libxl-style error code. */ #define DEFINE_USERLOOKUP_HELPER(NAME,SPEC_TYPE,STRUCTNAME,SYSCONF) \ - static int userlookup_helper_##NAME(libxl__gc *gc, \ - SPEC_TYPE spec, \ - struct STRUCTNAME *resultbuf, \ - struct STRUCTNAME **out) \ + int userlookup_helper_##NAME(libxl__gc *gc, \ + SPEC_TYPE spec, \ + struct STRUCTNAME *resultbuf, \ + struct STRUCTNAME **out) \ { \ struct STRUCTNAME *resultp = NULL; \ char *buf = NULL; \ diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h index cc27c72ecf30..8415d1feed16 100644 --- tools/libs/light/libxl_internal.h.orig +++ tools/libs/light/libxl_internal.h @@ -4864,6 +4864,14 @@ struct libxl__cpu_policy { struct xc_msr *msr; }; +struct passwd; +_hidden int userlookup_helper_getpwnam(libxl__gc*, const char *user, + struct passwd *res, + struct passwd **out); +_hidden int userlookup_helper_getpwuid(libxl__gc*, uid_t uid, + struct passwd *res, + struct passwd **out); + #endif /* -- 2.42.0 From 34221884752bb835bbdab66378b3cecbf133e3d3 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Thu, 28 Sep 2023 12:22:35 +0200 Subject: [PATCH 11/11] libxl: limit bootloader execution in restricted mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a timeout for bootloader execution when running in restricted mode. Allow overwriting the default time out with an environment provided value. This is part of XSA-443 / CVE-2023-34325 Signed-off-by: Roger Pau Monné Reviewed-by: Anthony PERARD --- docs/man/xl.1.pod.in | 8 ++++++ tools/libs/light/libxl_bootloader.c | 40 +++++++++++++++++++++++++++++ tools/libs/light/libxl_internal.h | 2 ++ 3 files changed, 50 insertions(+) diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in index 96e6fb1c32a3..8f056450a730 100644 --- docs/man/xl.1.pod.in.orig +++ docs/man/xl.1.pod.in @@ -2007,6 +2007,14 @@ NOTE: Each domain MUST have a SEPARATE username. See docs/features/qemu-deprivilege.pandoc for more information. +=item LIBXL_BOOTLOADER_TIMEOUT + +Timeout in seconds for bootloader execution when running in restricted mode. +Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used. + +If defined the value must be an unsigned integer between 0 and INT_MAX, +otherwise behavior is undefined. Setting to 0 disables the timeout. + =back =head1 SEE ALSO diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c index 97d9bf4ddc0a..3ca6463e5f63 100644 --- tools/libs/light/libxl_bootloader.c.orig +++ tools/libs/light/libxl_bootloader.c @@ -34,6 +34,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc, libxl__datacopier_state *dc, int rc, int onwrite, int errnoval); static void bootloader_display_copyfail(libxl__egc *egc, libxl__datacopier_state *dc, int rc, int onwrite, int errnoval); +static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs, int rc); static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc, int rc); static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child, @@ -301,6 +303,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl) bl->ptys[0].master = bl->ptys[0].slave = 0; bl->ptys[1].master = bl->ptys[1].slave = 0; libxl__ev_child_init(&bl->child); + libxl__ev_time_init(&bl->time); libxl__domaindeathcheck_init(&bl->deathcheck); bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes); bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display); @@ -318,6 +321,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl) libxl__domaindeathcheck_stop(gc,&bl->deathcheck); libxl__datacopier_kill(&bl->keystrokes); libxl__datacopier_kill(&bl->display); + libxl__ev_time_deregister(gc, &bl->time); for (i=0; i<2; i++) { libxl__carefd_close(bl->ptys[i].master); libxl__carefd_close(bl->ptys[i].slave); @@ -379,6 +383,7 @@ static void bootloader_stop(libxl__egc *egc, libxl__datacopier_kill(&bl->keystrokes); libxl__datacopier_kill(&bl->display); + libxl__ev_time_deregister(gc, &bl->time); if (libxl__ev_child_inuse(&bl->child)) { r = kill(bl->child.pid, SIGTERM); if (r) LOGED(WARN, bl->domid, "%sfailed to kill bootloader [%lu]", @@ -641,6 +646,25 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op) struct termios termattr; + if (getenv("LIBXL_BOOTLOADER_RESTRICT") || + getenv("LIBXL_BOOTLOADER_USER")) { + const char *timeout_env = getenv("LIBXL_BOOTLOADER_TIMEOUT"); + int timeout = timeout_env ? atoi(timeout_env) + : LIBXL_BOOTLOADER_TIMEOUT; + + if (timeout) { + /* Set execution timeout */ + rc = libxl__ev_time_register_rel(ao, &bl->time, + bootloader_timeout, + timeout * 1000); + if (rc) { + LOGED(ERROR, bl->domid, + "unable to register timeout for bootloader execution"); + goto out; + } + } + } + pid_t pid = libxl__ev_child_fork(gc, &bl->child, bootloader_finished); if (pid == -1) { rc = ERROR_FAIL; @@ -706,6 +730,21 @@ static void bootloader_display_copyfail(libxl__egc *egc, libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display); bootloader_copyfail(egc, "bootloader output", bl, 1, rc,onwrite,errnoval); } +static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs, int rc) +{ + libxl__bootloader_state *bl = CONTAINER_OF(ev, *bl, time); + STATE_AO_GC(bl->ao); + + libxl__ev_time_deregister(gc, &bl->time); + + assert(libxl__ev_child_inuse(&bl->child)); + LOGD(ERROR, bl->domid, "killing bootloader because of timeout"); + + libxl__ev_child_kill_deregister(ao, &bl->child, SIGKILL); + + bootloader_callback(egc, bl, rc); +} static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck *dc, @@ -722,6 +761,7 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child, STATE_AO_GC(bl->ao); int rc; + libxl__ev_time_deregister(gc, &bl->time); libxl__datacopier_kill(&bl->keystrokes); libxl__datacopier_kill(&bl->display); diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h index 8415d1feed16..a9581289f462 100644 --- tools/libs/light/libxl_internal.h.orig +++ tools/libs/light/libxl_internal.h @@ -103,6 +103,7 @@ #define LIBXL_QMP_CMD_TIMEOUT 10 #define LIBXL_STUBDOM_START_TIMEOUT 30 #define LIBXL_QEMU_BODGE_TIMEOUT 2 +#define LIBXL_BOOTLOADER_TIMEOUT 120 #define LIBXL_XENCONSOLE_LIMIT 1048576 #define LIBXL_XENCONSOLE_PROTOCOL "vt100" #define LIBXL_MAXMEM_CONSTANT 1024 @@ -3738,6 +3739,7 @@ struct libxl__bootloader_state { libxl__openpty_state openpty; libxl__openpty_result ptys[2]; /* [0] is for bootloader */ libxl__ev_child child; + libxl__ev_time time; libxl__domaindeathcheck deathcheck; int nargs, argsspace; const char **args; -- 2.42.0