blob: 43c9af6f339be05c5a841da22474b3449144ab17 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
// Some headers on Android are missing cdefs: crbug.com/172337.
// (We can't use OS_ANDROID here since build_config.h is not included).
#if defined(ANDROID)
#include <sys/cdefs.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <linux/filter.h>
#include <signal.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include "base/compiler_specific.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/memory/scoped_ptr.h"
#include "base/posix/eintr_wrapper.h"
#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
#include "sandbox/linux/bpf_dsl/dump_bpf.h"
#include "sandbox/linux/bpf_dsl/policy.h"
#include "sandbox/linux/bpf_dsl/policy_compiler.h"
#include "sandbox/linux/seccomp-bpf/codegen.h"
#include "sandbox/linux/seccomp-bpf/die.h"
#include "sandbox/linux/seccomp-bpf/errorcode.h"
#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
#include "sandbox/linux/seccomp-bpf/syscall.h"
#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
#include "sandbox/linux/seccomp-bpf/trap.h"
#include "sandbox/linux/seccomp-bpf/verifier.h"
#include "sandbox/linux/services/linux_syscalls.h"
#include "sandbox/linux/services/syscall_wrappers.h"
using sandbox::bpf_dsl::Allow;
using sandbox::bpf_dsl::Error;
using sandbox::bpf_dsl::ResultExpr;
namespace sandbox {
namespace {
const int kExpectedExitCode = 100;
#if !defined(NDEBUG)
void WriteFailedStderrSetupMessage(int out_fd) {
const char* error_string = strerror(errno);
static const char msg[] =
"You have reproduced a puzzling issue.\n"
"Please, report to crbug.com/152530!\n"
"Failed to set up stderr: ";
if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg) - 1)) > 0 && error_string &&
HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
HANDLE_EINTR(write(out_fd, "\n", 1))) {
}
}
#endif // !defined(NDEBUG)
// We define a really simple sandbox policy. It is just good enough for us
// to tell that the sandbox has actually been activated.
class ProbePolicy : public bpf_dsl::Policy {
public:
ProbePolicy() {}
virtual ~ProbePolicy() {}
virtual ResultExpr EvaluateSyscall(int sysnum) const override {
switch (sysnum) {
case __NR_getpid:
// Return EPERM so that we can check that the filter actually ran.
return Error(EPERM);
case __NR_exit_group:
// Allow exit() with a non-default return code.
return Allow();
default:
// Make everything else fail in an easily recognizable way.
return Error(EINVAL);
}
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbePolicy);
};
void ProbeProcess(void) {
if (sys_getpid() < 0 && errno == EPERM) {
sys_exit_group(kExpectedExitCode);
}
}
class AllowAllPolicy : public bpf_dsl::Policy {
public:
AllowAllPolicy() {}
virtual ~AllowAllPolicy() {}
virtual ResultExpr EvaluateSyscall(int sysnum) const override {
DCHECK(SandboxBPF::IsValidSyscallNumber(sysnum));
return Allow();
}
private:
DISALLOW_COPY_AND_ASSIGN(AllowAllPolicy);
};
void TryVsyscallProcess(void) {
time_t current_time;
// time() is implemented as a vsyscall. With an older glibc, with
// vsyscall=emulate and some versions of the seccomp BPF patch
// we may get SIGKILL-ed. Detect this!
if (time(&current_time) != static_cast<time_t>(-1)) {
sys_exit_group(kExpectedExitCode);
}
}
bool IsSingleThreaded(int proc_fd) {
if (proc_fd < 0) {
// Cannot determine whether program is single-threaded. Hope for
// the best...
return true;
}
struct stat sb;
int task = -1;
if ((task = openat(proc_fd, "self/task", O_RDONLY | O_DIRECTORY)) < 0 ||
fstat(task, &sb) != 0 || sb.st_nlink != 3 || IGNORE_EINTR(close(task))) {
if (task >= 0) {
if (IGNORE_EINTR(close(task))) {
}
}
return false;
}
return true;
}
} // namespace
SandboxBPF::SandboxBPF()
: quiet_(false), proc_fd_(-1), sandbox_has_started_(false), policy_() {
}
SandboxBPF::~SandboxBPF() {
}
bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
return SyscallSet::IsValid(sysnum);
}
bool SandboxBPF::RunFunctionInPolicy(void (*code_in_sandbox)(),
scoped_ptr<bpf_dsl::Policy> policy) {
// Block all signals before forking a child process. This prevents an
// attacker from manipulating our test by sending us an unexpected signal.
sigset_t old_mask, new_mask;
if (sigfillset(&new_mask) || sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
SANDBOX_DIE("sigprocmask() failed");
}
int fds[2];
if (pipe2(fds, O_NONBLOCK | O_CLOEXEC)) {
SANDBOX_DIE("pipe() failed");
}
if (fds[0] <= 2 || fds[1] <= 2) {
SANDBOX_DIE("Process started without standard file descriptors");
}
// This code is using fork() and should only ever run single-threaded.
// Most of the code below is "async-signal-safe" and only minor changes
// would be needed to support threads.
DCHECK(IsSingleThreaded(proc_fd_));
pid_t pid = fork();
if (pid < 0) {
// Die if we cannot fork(). We would probably fail a little later
// anyway, as the machine is likely very close to running out of
// memory.
// But what we don't want to do is return "false", as a crafty
// attacker might cause fork() to fail at will and could trick us
// into running without a sandbox.
sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails
SANDBOX_DIE("fork() failed unexpectedly");
}
// In the child process
if (!pid) {
// Test a very simple sandbox policy to verify that we can
// successfully turn on sandboxing.
Die::EnableSimpleExit();
errno = 0;
if (IGNORE_EINTR(close(fds[0]))) {
// This call to close() has been failing in strange ways. See
// crbug.com/152530. So we only fail in debug mode now.
#if !defined(NDEBUG)
WriteFailedStderrSetupMessage(fds[1]);
SANDBOX_DIE(NULL);
#endif
}
if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
// Stderr could very well be a file descriptor to .xsession-errors, or
// another file, which could be backed by a file system that could cause
// dup2 to fail while trying to close stderr. It's important that we do
// not fail on trying to close stderr.
// If dup2 fails here, we will continue normally, this means that our
// parent won't cause a fatal failure if something writes to stderr in
// this child.
#if !defined(NDEBUG)
// In DEBUG builds, we still want to get a report.
WriteFailedStderrSetupMessage(fds[1]);
SANDBOX_DIE(NULL);
#endif
}
if (IGNORE_EINTR(close(fds[1]))) {
// This call to close() has been failing in strange ways. See
// crbug.com/152530. So we only fail in debug mode now.
#if !defined(NDEBUG)
WriteFailedStderrSetupMessage(fds[1]);
SANDBOX_DIE(NULL);
#endif
}
SetSandboxPolicy(policy.release());
if (!StartSandbox(PROCESS_SINGLE_THREADED)) {
SANDBOX_DIE(NULL);
}
// Run our code in the sandbox.
code_in_sandbox();
// code_in_sandbox() is not supposed to return here.
SANDBOX_DIE(NULL);
}
// In the parent process.
if (IGNORE_EINTR(close(fds[1]))) {
SANDBOX_DIE("close() failed");
}
if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
SANDBOX_DIE("sigprocmask() failed");
}
int status;
if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
SANDBOX_DIE("waitpid() failed unexpectedly");
}
bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;
// If we fail to support sandboxing, there might be an additional
// error message. If so, this was an entirely unexpected and fatal
// failure. We should report the failure and somebody must fix
// things. This is probably a security-critical bug in the sandboxing
// code.
if (!rc) {
char buf[4096];
ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));
if (len > 0) {
while (len > 1 && buf[len - 1] == '\n') {
--len;
}
buf[len] = '\000';
SANDBOX_DIE(buf);
}
}
if (IGNORE_EINTR(close(fds[0]))) {
SANDBOX_DIE("close() failed");
}
return rc;
}
bool SandboxBPF::KernelSupportSeccompBPF() {
return RunFunctionInPolicy(ProbeProcess,
scoped_ptr<bpf_dsl::Policy>(new ProbePolicy())) &&
RunFunctionInPolicy(TryVsyscallProcess,
scoped_ptr<bpf_dsl::Policy>(new AllowAllPolicy()));
}
// static
SandboxBPF::SandboxStatus SandboxBPF::SupportsSeccompSandbox(int proc_fd) {
// It the sandbox is currently active, we clearly must have support for
// sandboxing.
if (status_ == STATUS_ENABLED) {
return status_;
}
// Even if the sandbox was previously available, something might have
// changed in our run-time environment. Check one more time.
if (status_ == STATUS_AVAILABLE) {
if (!IsSingleThreaded(proc_fd)) {
status_ = STATUS_UNAVAILABLE;
}
return status_;
}
if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
// All state transitions resulting in STATUS_UNAVAILABLE are immediately
// preceded by STATUS_AVAILABLE. Furthermore, these transitions all
// happen, if and only if they are triggered by the process being multi-
// threaded.
// In other words, if a single-threaded process is currently in the
// STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
// actually available.
status_ = STATUS_AVAILABLE;
return status_;
}
// If we have not previously checked for availability of the sandbox or if
// we otherwise don't believe to have a good cached value, we have to
// perform a thorough check now.
if (status_ == STATUS_UNKNOWN) {
// We create our own private copy of a "Sandbox" object. This ensures that
// the object does not have any policies configured, that might interfere
// with the tests done by "KernelSupportSeccompBPF()".
SandboxBPF sandbox;
// By setting "quiet_ = true" we suppress messages for expected and benign
// failures (e.g. if the current kernel lacks support for BPF filters).
sandbox.quiet_ = true;
sandbox.set_proc_fd(proc_fd);
status_ = sandbox.KernelSupportSeccompBPF() ? STATUS_AVAILABLE
: STATUS_UNSUPPORTED;
// As we are performing our tests from a child process, the run-time
// environment that is visible to the sandbox is always guaranteed to be
// single-threaded. Let's check here whether the caller is single-
// threaded. Otherwise, we mark the sandbox as temporarily unavailable.
if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
status_ = STATUS_UNAVAILABLE;
}
}
return status_;
}
// static
SandboxBPF::SandboxStatus
SandboxBPF::SupportsSeccompThreadFilterSynchronization() {
// Applying NO_NEW_PRIVS, a BPF filter, and synchronizing the filter across
// the thread group are all handled atomically by this syscall.
const int rv = syscall(
__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, NULL);
if (rv == -1 && errno == EFAULT) {
return STATUS_AVAILABLE;
} else {
// TODO(jln): turn these into DCHECK after 417888 is considered fixed.
CHECK_EQ(-1, rv);
CHECK(ENOSYS == errno || EINVAL == errno);
return STATUS_UNSUPPORTED;
}
}
void SandboxBPF::set_proc_fd(int proc_fd) { proc_fd_ = proc_fd; }
bool SandboxBPF::StartSandbox(SandboxThreadState thread_state) {
CHECK(thread_state == PROCESS_SINGLE_THREADED ||
thread_state == PROCESS_MULTI_THREADED);
if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
SANDBOX_DIE(
"Trying to start sandbox, even though it is known to be "
"unavailable");
return false;
} else if (sandbox_has_started_) {
SANDBOX_DIE(
"Cannot repeatedly start sandbox. Create a separate Sandbox "
"object instead.");
return false;
}
if (proc_fd_ < 0) {
proc_fd_ = open("/proc", O_RDONLY | O_DIRECTORY);
}
if (proc_fd_ < 0) {
// For now, continue in degraded mode, if we can't access /proc.
// In the future, we might want to tighten this requirement.
}
bool supports_tsync =
SupportsSeccompThreadFilterSynchronization() == STATUS_AVAILABLE;
if (thread_state == PROCESS_SINGLE_THREADED) {
if (!IsSingleThreaded(proc_fd_)) {
SANDBOX_DIE("Cannot start sandbox; process is already multi-threaded");
return false;
}
} else if (thread_state == PROCESS_MULTI_THREADED) {
if (IsSingleThreaded(proc_fd_)) {
SANDBOX_DIE("Cannot start sandbox; "
"process may be single-threaded when reported as not");
return false;
}
if (!supports_tsync) {
SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
"filters for a threadgroup");
return false;
}
}
// We no longer need access to any files in /proc. We want to do this
// before installing the filters, just in case that our policy denies
// close().
if (proc_fd_ >= 0) {
if (IGNORE_EINTR(close(proc_fd_))) {
SANDBOX_DIE("Failed to close file descriptor for /proc");
return false;
}
proc_fd_ = -1;
}
// Install the filters.
InstallFilter(supports_tsync || thread_state == PROCESS_MULTI_THREADED);
// We are now inside the sandbox.
status_ = STATUS_ENABLED;
return true;
}
// Don't take a scoped_ptr here, polymorphism make their use awkward.
void SandboxBPF::SetSandboxPolicy(bpf_dsl::Policy* policy) {
DCHECK(!policy_);
if (sandbox_has_started_) {
SANDBOX_DIE("Cannot change policy after sandbox has started");
}
policy_.reset(policy);
}
void SandboxBPF::InstallFilter(bool must_sync_threads) {
// We want to be very careful in not imposing any requirements on the
// policies that are set with SetSandboxPolicy(). This means, as soon as
// the sandbox is active, we shouldn't be relying on libraries that could
// be making system calls. This, for example, means we should avoid
// using the heap and we should avoid using STL functions.
// Temporarily copy the contents of the "program" vector into a
// stack-allocated array; and then explicitly destroy that object.
// This makes sure we don't ex- or implicitly call new/delete after we
// installed the BPF filter program in the kernel. Depending on the
// system memory allocator that is in effect, these operators can result
// in system calls to things like munmap() or brk().
CodeGen::Program* program = AssembleFilter(false).release();
struct sock_filter bpf[program->size()];
const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),
bpf};
memcpy(bpf, &(*program)[0], sizeof(bpf));
delete program;
// Make an attempt to release memory that is no longer needed here, rather
// than in the destructor. Try to avoid as much as possible to presume of
// what will be possible to do in the new (sandboxed) execution environment.
policy_.reset();
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
}
// Install BPF filter program. If the thread state indicates multi-threading
// support, then the kernel hass the seccomp system call. Otherwise, fall
// back on prctl, which requires the process to be single-threaded.
if (must_sync_threads) {
int rv = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast<const char*>(&prog));
if (rv) {
SANDBOX_DIE(quiet_ ? NULL :
"Kernel refuses to turn on and synchronize threads for BPF filters");
}
} else {
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
}
}
sandbox_has_started_ = true;
}
scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(
bool force_verification) {
#if !defined(NDEBUG)
force_verification = true;
#endif
bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
scoped_ptr<CodeGen::Program> program = compiler.Compile();
// Make sure compilation resulted in BPF program that executes
// correctly. Otherwise, there is an internal error in our BPF compiler.
// There is really nothing the caller can do until the bug is fixed.
if (force_verification) {
// Verification is expensive. We only perform this step, if we are
// compiled in debug mode, or if the caller explicitly requested
// verification.
const char* err = NULL;
if (!Verifier::VerifyBPF(&compiler, *program, *policy_, &err)) {
bpf_dsl::DumpBPF::PrintProgram(*program);
SANDBOX_DIE(err);
}
}
return program.Pass();
}
bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
}
intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
return Syscall::Call(args.nr,
static_cast<intptr_t>(args.args[0]),
static_cast<intptr_t>(args.args[1]),
static_cast<intptr_t>(args.args[2]),
static_cast<intptr_t>(args.args[3]),
static_cast<intptr_t>(args.args[4]),
static_cast<intptr_t>(args.args[5]));
}
SandboxBPF::SandboxStatus SandboxBPF::status_ = STATUS_UNKNOWN;
} // namespace sandbox