namespaces/simple_init.cThis is namespaces/simple_init.c, an example to accompany the book, The Linux Programming Interface. This file is not printed in the book; it demonstrates Linux features that are not described in the book (typically features that have appeared since the book was published). The source code file is copyright 2024, Michael Kerrisk, and is licensed under the GNU General Public License, version 3. In the listing below, the names of Linux system calls and C library functions are hyperlinked to manual pages from the Linux man-pages project, and the names of functions implemented in the book are hyperlinked to the implementations of those functions.
|
/* simple_init.c A simple init(1)-style program to be used as the init program in a PID namespace. The program reaps the status of its children and provides a simple shell facility for executing commands. See https://lwn.net/Articles/532748/ */ #define _GNU_SOURCE #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <signal.h> #include <wordexp.h> #include <errno.h> #include <sys/wait.h> #include <sys/mount.h> #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \ } while (0) static int verbose = 0; /* Display wait status (from waitpid() or similar) given in 'wstatus' */
/* SIGCHLD handler: reap child processes as they change state */ static void child_handler(int sig) { pid_t pid; int wstatus; /* WUNTRACED and WCONTINUED allow waitpid() to catch stopped and continued children (in addition to terminated children) */ while ((pid = waitpid(-1, &wstatus, WNOHANG | WUNTRACED | WCONTINUED)) != 0) { if (pid == -1) { if (errno == ECHILD) /* No more children */ break; else perror("waitpid"); /* Unexpected error */ } if (verbose) printf("\tinit: SIGCHLD handler: PID %ld terminated\n", (long) pid); } }
/* Perform word expansion on string in 'cmd', allocating and returning a vector of words on success or NULL on failure */ static char ** expand_words(char *cmd) { wordexp_t pwordexp; int s = wordexp(cmd, &pwordexp, 0); if (s != 0) { fprintf(stderr, "Word expansion failed.\n" "\tNote that only simple " "commands plus arguments are supported\n" "\t(no pipelines, I/O redirection, and so on)\n"); return NULL; } char **arg_vec = calloc(pwordexp.we_wordc + 1, sizeof(char *)); if (arg_vec == NULL) errExit("calloc"); for (int j = 0; j < pwordexp.we_wordc; j++) arg_vec[j] = pwordexp.we_wordv[j]; arg_vec[pwordexp.we_wordc] = NULL; return arg_vec; }
static void usage(char *pname) { fprintf(stderr, "Usage: %s [-v] [-p proc-mount]\n", pname); fprintf(stderr, "\t-v Provide verbose logging\n"); fprintf(stderr, "\t-p proc-mount Mount a procfs at specified path\n"); exit(EXIT_FAILURE); }
int main(int argc, char *argv[]) { char *proc_path = NULL; int opt; while ((opt = getopt(argc, argv, "p:v")) != -1) { switch (opt) { case 'p': proc_path = optarg; break; case 'v': verbose = 1; break; default: usage(argv[0]); } } struct sigaction sa; sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; sigemptyset(&sa.sa_mask); sa.sa_handler = child_handler; if (sigaction(SIGCHLD, &sa, NULL) == -1) errExit("sigaction"); if (verbose) printf("\tinit: my PID is %ld\n", (long) getpid()); /* Performing terminal operations while not being the foreground process group for the terminal generates a SIGTTOU that stops the process. However our init "shell" needs to be able to perform such operations (just like a normal shell), so we ignore that signal, which allows the operations to proceed successfully. */ signal(SIGTTOU, SIG_IGN); /* Become leader of a new process group and make that process group the foreground process group for the terminal */ if (setpgid(0, 0) == -1) errExit("setpgid");; if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) errExit("tcsetpgrp-child"); /* If the user asked to mount a procfs, mount it at the specified path */ if (proc_path != NULL) { /* Some distributions enable mount propagation (mount --make-shared) by default. This would cause the mount that we create here to propagate to other namespaces. If we were mounting the procfs for this new PID namespace at "/proc" (which is typical), then this would hide the original "/proc" mount point in the initial namespace, which we probably don't want, since it will confuse a lot of system tools. To prevent propagation from occurring, we need to mark the mount point either as a slave mount or as a private mount. For further information on this topic, see the kernel source file Documentation/filesystems/sharedsubtree.txt and the mount(8) man page */ if (verbose) printf("Making %s a private mount\n", proc_path); /* EINVAL is the case that occurs if 'proc_path' exists but is not (yet) a mount point */ if (mount("none", proc_path, NULL, MS_SLAVE, NULL) == -1 && errno != EINVAL) perror("mount-make-slave-/"); if (verbose) printf("Mounting procfs at %s\n", proc_path); if (mount("proc", proc_path, "proc", 0, NULL) == -1) errExit("mount-procfs"); } /* Loop executing "shell" commands. Note that our shell facility is very simple: it handles simple commands with arguments, and performs wordexp() expansions (globbing, variable and command substitution, tilde expansion, and quote removal). Complex commands (pipelines, ||, &&) and I/O redirections, and standard shell features are not supported. */ while (1) { /* Read a shell command; exit on end of file */ #define CMD_SIZE 10000 char cmd[CMD_SIZE]; printf("init$ "); if (fgets(cmd, CMD_SIZE, stdin) == NULL) { if (verbose) printf("\tinit: exiting"); printf("\n"); break; } if (cmd[strlen(cmd) - 1] == '\n') cmd[strlen(cmd) - 1] = '\0'; /* Strip trailing '\n' */ if (strlen(cmd) == 0) continue; /* Ignore empty commands */ pid_t pid = fork(); /* Create child process */ if (pid == -1) { perror("fork"); break; } if (pid == 0) { /* Child */ char **arg_vec; arg_vec = expand_words(cmd); if (arg_vec == NULL) /* Word expansion failed */ exit(EXIT_FAILURE); /* Make child the leader of a new process group and make that process group the foreground process group for the terminal */ if (setpgid(0, 0) == -1) errExit("setpgid");; if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) errExit("tcsetpgrp-child"); /* Child executes shell command and terminates */ execvp(arg_vec[0], arg_vec); errExit("execvp"); /* Only reached if execvp() fails */ } /* Parent falls through to here */ if (verbose) printf("\tinit: created child %ld\n", (long) pid); pause(); /* Will be interrupted by signal handler */ /* After child changes state, ensure that the 'init' program is the foreground process group for the terminal */ if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) errExit("tcsetpgrp-parent"); } /* If we mounted a procfs earlier, unmount it before terminating */ if (proc_path != NULL) { if (verbose) printf("Unmounting procfs at %s\n", proc_path); if (umount(proc_path) == -1) errExit("umount-procfs"); } exit(EXIT_SUCCESS); }
Note that, in most cases, the programs rendered in these web pages are not free standing: you'll typically also need a few other source files (mostly in the lib/ subdirectory) as well. Generally, it's easier to just download the entire source tarball and build the programs with make(1). By hovering your mouse over the various hyperlinked include files and function calls above, you can see which other source files this file depends on.