sandbox/linux/seccomp-bpf/sandbox_bpf.cc - mojo - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

 // Some headers on Android are missing cdefs: crbug.com/172337.
 // (We can't use OS_ANDROID here since build_config.h is not included).
 #if defined(ANDROID)
 #include <sys/cdefs.h>
 #endif

 #include <errno.h>
 #include <fcntl.h>
 #include <linux/filter.h>
 #include <signal.h>
 #include <string.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>

 #include "base/compiler_specific.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/posix/eintr_wrapper.h"
 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
 #include "sandbox/linux/bpf_dsl/dump_bpf.h"
 #include "sandbox/linux/bpf_dsl/policy.h"
 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
 #include "sandbox/linux/seccomp-bpf/codegen.h"
 #include "sandbox/linux/seccomp-bpf/die.h"
 #include "sandbox/linux/seccomp-bpf/errorcode.h"
 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
 #include "sandbox/linux/seccomp-bpf/syscall.h"
 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
 #include "sandbox/linux/seccomp-bpf/trap.h"
 #include "sandbox/linux/seccomp-bpf/verifier.h"
 #include "sandbox/linux/services/linux_syscalls.h"
 #include "sandbox/linux/services/syscall_wrappers.h"

 using sandbox::bpf_dsl::Allow;
 using sandbox::bpf_dsl::Error;
 using sandbox::bpf_dsl::ResultExpr;

 namespace sandbox {

 namespace {

 const int kExpectedExitCode = 100;

 #if !defined(NDEBUG)
 void WriteFailedStderrSetupMessage(int out_fd) {
   const char* error_string = strerror(errno);
   static const char msg[] =
       "You have reproduced a puzzling issue.\n"
       "Please, report to crbug.com/152530!\n"
       "Failed to set up stderr: ";
   if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg) - 1)) > 0 && error_string &&
       HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
       HANDLE_EINTR(write(out_fd, "\n", 1))) {
   }
 }
 #endif  // !defined(NDEBUG)

 // We define a really simple sandbox policy. It is just good enough for us
 // to tell that the sandbox has actually been activated.
 class ProbePolicy : public bpf_dsl::Policy {
  public:
   ProbePolicy() {}
   virtual ~ProbePolicy() {}

   virtual ResultExpr EvaluateSyscall(int sysnum) const override {
     switch (sysnum) {
       case __NR_getpid:
         // Return EPERM so that we can check that the filter actually ran.
         return Error(EPERM);
       case __NR_exit_group:
         // Allow exit() with a non-default return code.
         return Allow();
       default:
         // Make everything else fail in an easily recognizable way.
         return Error(EINVAL);
     }
   }

  private:
   DISALLOW_COPY_AND_ASSIGN(ProbePolicy);
 };

 void ProbeProcess(void) {
   if (sys_getpid() < 0 && errno == EPERM) {
     sys_exit_group(kExpectedExitCode);
   }
 }

 class AllowAllPolicy : public bpf_dsl::Policy {
  public:
   AllowAllPolicy() {}
   virtual ~AllowAllPolicy() {}

   virtual ResultExpr EvaluateSyscall(int sysnum) const override {
     DCHECK(SandboxBPF::IsValidSyscallNumber(sysnum));
     return Allow();
   }

  private:
   DISALLOW_COPY_AND_ASSIGN(AllowAllPolicy);
 };

 void TryVsyscallProcess(void) {
   time_t current_time;
   // time() is implemented as a vsyscall. With an older glibc, with
   // vsyscall=emulate and some versions of the seccomp BPF patch
   // we may get SIGKILL-ed. Detect this!
   if (time(&current_time) != static_cast<time_t>(-1)) {
     sys_exit_group(kExpectedExitCode);
   }
 }

 bool IsSingleThreaded(int proc_fd) {
   if (proc_fd < 0) {
     // Cannot determine whether program is single-threaded. Hope for
     // the best...
     return true;
   }

   struct stat sb;
   int task = -1;
   if ((task = openat(proc_fd, "self/task", O_RDONLY | O_DIRECTORY)) < 0 ||
       fstat(task, &sb) != 0 || sb.st_nlink != 3 || IGNORE_EINTR(close(task))) {
     if (task >= 0) {
       if (IGNORE_EINTR(close(task))) {
       }
     }
     return false;
   }
   return true;
 }

 }  // namespace

 SandboxBPF::SandboxBPF()
     : quiet_(false), proc_fd_(-1), sandbox_has_started_(false), policy_() {
 }

 SandboxBPF::~SandboxBPF() {
 }

 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
   return SyscallSet::IsValid(sysnum);
 }

 bool SandboxBPF::RunFunctionInPolicy(void (*code_in_sandbox)(),
                                      scoped_ptr<bpf_dsl::Policy> policy) {
   // Block all signals before forking a child process. This prevents an
   // attacker from manipulating our test by sending us an unexpected signal.
   sigset_t old_mask, new_mask;
   if (sigfillset(&new_mask) || sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
     SANDBOX_DIE("sigprocmask() failed");
   }
   int fds[2];
   if (pipe2(fds, O_NONBLOCK | O_CLOEXEC)) {
     SANDBOX_DIE("pipe() failed");
   }

   if (fds[0] <= 2 || fds[1] <= 2) {
     SANDBOX_DIE("Process started without standard file descriptors");
   }

   // This code is using fork() and should only ever run single-threaded.
   // Most of the code below is "async-signal-safe" and only minor changes
   // would be needed to support threads.
   DCHECK(IsSingleThreaded(proc_fd_));
   pid_t pid = fork();
   if (pid < 0) {
     // Die if we cannot fork(). We would probably fail a little later
     // anyway, as the machine is likely very close to running out of
     // memory.
     // But what we don't want to do is return "false", as a crafty
     // attacker might cause fork() to fail at will and could trick us
     // into running without a sandbox.
     sigprocmask(SIG_SETMASK, &old_mask, NULL);  // OK, if it fails
     SANDBOX_DIE("fork() failed unexpectedly");
   }

   // In the child process
   if (!pid) {
     // Test a very simple sandbox policy to verify that we can
     // successfully turn on sandboxing.
     Die::EnableSimpleExit();

     errno = 0;
     if (IGNORE_EINTR(close(fds[0]))) {
       // This call to close() has been failing in strange ways. See
       // crbug.com/152530. So we only fail in debug mode now.
 #if !defined(NDEBUG)
       WriteFailedStderrSetupMessage(fds[1]);
       SANDBOX_DIE(NULL);
 #endif
     }
     if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
       // Stderr could very well be a file descriptor to .xsession-errors, or
       // another file, which could be backed by a file system that could cause
       // dup2 to fail while trying to close stderr. It's important that we do
       // not fail on trying to close stderr.
       // If dup2 fails here, we will continue normally, this means that our
       // parent won't cause a fatal failure if something writes to stderr in
       // this child.
 #if !defined(NDEBUG)
       // In DEBUG builds, we still want to get a report.
       WriteFailedStderrSetupMessage(fds[1]);
       SANDBOX_DIE(NULL);
 #endif
     }
     if (IGNORE_EINTR(close(fds[1]))) {
       // This call to close() has been failing in strange ways. See
       // crbug.com/152530. So we only fail in debug mode now.
 #if !defined(NDEBUG)
       WriteFailedStderrSetupMessage(fds[1]);
       SANDBOX_DIE(NULL);
 #endif
     }

     SetSandboxPolicy(policy.release());
     if (!StartSandbox(PROCESS_SINGLE_THREADED)) {
       SANDBOX_DIE(NULL);
     }

     // Run our code in the sandbox.
     code_in_sandbox();

     // code_in_sandbox() is not supposed to return here.
     SANDBOX_DIE(NULL);
   }

   // In the parent process.
   if (IGNORE_EINTR(close(fds[1]))) {
     SANDBOX_DIE("close() failed");
   }
   if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
     SANDBOX_DIE("sigprocmask() failed");
   }
   int status;
   if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
     SANDBOX_DIE("waitpid() failed unexpectedly");
   }
   bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;

   // If we fail to support sandboxing, there might be an additional
   // error message. If so, this was an entirely unexpected and fatal
   // failure. We should report the failure and somebody must fix
   // things. This is probably a security-critical bug in the sandboxing
   // code.
   if (!rc) {
     char buf[4096];
     ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));
     if (len > 0) {
       while (len > 1 && buf[len - 1] == '\n') {
         --len;
       }
       buf[len] = '\000';
       SANDBOX_DIE(buf);
     }
   }
   if (IGNORE_EINTR(close(fds[0]))) {
     SANDBOX_DIE("close() failed");
   }

   return rc;
 }

 bool SandboxBPF::KernelSupportSeccompBPF() {
   return RunFunctionInPolicy(ProbeProcess,
                              scoped_ptr<bpf_dsl::Policy>(new ProbePolicy())) &&
          RunFunctionInPolicy(TryVsyscallProcess,
                              scoped_ptr<bpf_dsl::Policy>(new AllowAllPolicy()));
 }

 // static
 SandboxBPF::SandboxStatus SandboxBPF::SupportsSeccompSandbox(int proc_fd) {
   // It the sandbox is currently active, we clearly must have support for
   // sandboxing.
   if (status_ == STATUS_ENABLED) {
     return status_;
   }

   // Even if the sandbox was previously available, something might have
   // changed in our run-time environment. Check one more time.
   if (status_ == STATUS_AVAILABLE) {
     if (!IsSingleThreaded(proc_fd)) {
       status_ = STATUS_UNAVAILABLE;
     }
     return status_;
   }

   if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
     // All state transitions resulting in STATUS_UNAVAILABLE are immediately
     // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
     // happen, if and only if they are triggered by the process being multi-
     // threaded.
     // In other words, if a single-threaded process is currently in the
     // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
     // actually available.
     status_ = STATUS_AVAILABLE;
     return status_;
   }

   // If we have not previously checked for availability of the sandbox or if
   // we otherwise don't believe to have a good cached value, we have to
   // perform a thorough check now.
   if (status_ == STATUS_UNKNOWN) {
     // We create our own private copy of a "Sandbox" object. This ensures that
     // the object does not have any policies configured, that might interfere
     // with the tests done by "KernelSupportSeccompBPF()".
     SandboxBPF sandbox;

     // By setting "quiet_ = true" we suppress messages for expected and benign
     // failures (e.g. if the current kernel lacks support for BPF filters).
     sandbox.quiet_ = true;
     sandbox.set_proc_fd(proc_fd);
     status_ = sandbox.KernelSupportSeccompBPF() ? STATUS_AVAILABLE
                                                 : STATUS_UNSUPPORTED;

     // As we are performing our tests from a child process, the run-time
     // environment that is visible to the sandbox is always guaranteed to be
     // single-threaded. Let's check here whether the caller is single-
     // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
     if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
       status_ = STATUS_UNAVAILABLE;
     }
   }
   return status_;
 }

 // static
 SandboxBPF::SandboxStatus
 SandboxBPF::SupportsSeccompThreadFilterSynchronization() {
   // Applying NO_NEW_PRIVS, a BPF filter, and synchronizing the filter across
   // the thread group are all handled atomically by this syscall.
   const int rv = syscall(
       __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, NULL);

   if (rv == -1 && errno == EFAULT) {
     return STATUS_AVAILABLE;
   } else {
     // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
     CHECK_EQ(-1, rv);
     CHECK(ENOSYS == errno || EINVAL == errno);
     return STATUS_UNSUPPORTED;
   }
 }

 void SandboxBPF::set_proc_fd(int proc_fd) { proc_fd_ = proc_fd; }

 bool SandboxBPF::StartSandbox(SandboxThreadState thread_state) {
   CHECK(thread_state == PROCESS_SINGLE_THREADED ||
         thread_state == PROCESS_MULTI_THREADED);

   if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
     SANDBOX_DIE(
         "Trying to start sandbox, even though it is known to be "
         "unavailable");
     return false;
   } else if (sandbox_has_started_) {
     SANDBOX_DIE(
         "Cannot repeatedly start sandbox. Create a separate Sandbox "
         "object instead.");
     return false;
   }
   if (proc_fd_ < 0) {
     proc_fd_ = open("/proc", O_RDONLY | O_DIRECTORY);
   }
   if (proc_fd_ < 0) {
     // For now, continue in degraded mode, if we can't access /proc.
     // In the future, we might want to tighten this requirement.
   }

   bool supports_tsync =
       SupportsSeccompThreadFilterSynchronization() == STATUS_AVAILABLE;

   if (thread_state == PROCESS_SINGLE_THREADED) {
     if (!IsSingleThreaded(proc_fd_)) {
       SANDBOX_DIE("Cannot start sandbox; process is already multi-threaded");
       return false;
     }
   } else if (thread_state == PROCESS_MULTI_THREADED) {
     if (IsSingleThreaded(proc_fd_)) {
       SANDBOX_DIE("Cannot start sandbox; "
                   "process may be single-threaded when reported as not");
       return false;
     }
     if (!supports_tsync) {
       SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
                   "filters for a threadgroup");
       return false;
     }
   }

   // We no longer need access to any files in /proc. We want to do this
   // before installing the filters, just in case that our policy denies
   // close().
   if (proc_fd_ >= 0) {
     if (IGNORE_EINTR(close(proc_fd_))) {
       SANDBOX_DIE("Failed to close file descriptor for /proc");
       return false;
     }
     proc_fd_ = -1;
   }

   // Install the filters.
   InstallFilter(supports_tsync || thread_state == PROCESS_MULTI_THREADED);

   // We are now inside the sandbox.
   status_ = STATUS_ENABLED;

   return true;
 }

 // Don't take a scoped_ptr here, polymorphism make their use awkward.
 void SandboxBPF::SetSandboxPolicy(bpf_dsl::Policy* policy) {
   DCHECK(!policy_);
   if (sandbox_has_started_) {
     SANDBOX_DIE("Cannot change policy after sandbox has started");
   }
   policy_.reset(policy);
 }

 void SandboxBPF::InstallFilter(bool must_sync_threads) {
   // We want to be very careful in not imposing any requirements on the
   // policies that are set with SetSandboxPolicy(). This means, as soon as
   // the sandbox is active, we shouldn't be relying on libraries that could
   // be making system calls. This, for example, means we should avoid
   // using the heap and we should avoid using STL functions.
   // Temporarily copy the contents of the "program" vector into a
   // stack-allocated array; and then explicitly destroy that object.
   // This makes sure we don't ex- or implicitly call new/delete after we
   // installed the BPF filter program in the kernel. Depending on the
   // system memory allocator that is in effect, these operators can result
   // in system calls to things like munmap() or brk().
   CodeGen::Program* program = AssembleFilter(false).release();

   struct sock_filter bpf[program->size()];
   const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),
                                   bpf};
   memcpy(bpf, &(*program)[0], sizeof(bpf));
   delete program;

   // Make an attempt to release memory that is no longer needed here, rather
   // than in the destructor. Try to avoid as much as possible to presume of
   // what will be possible to do in the new (sandboxed) execution environment.
   policy_.reset();

   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
     SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
   }

   // Install BPF filter program. If the thread state indicates multi-threading
   // support, then the kernel hass the seccomp system call. Otherwise, fall
   // back on prctl, which requires the process to be single-threaded.
   if (must_sync_threads) {
     int rv = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
         SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast<const char*>(&prog));
     if (rv) {
       SANDBOX_DIE(quiet_ ? NULL :
           "Kernel refuses to turn on and synchronize threads for BPF filters");
     }
   } else {
     if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
       SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
     }
   }

   sandbox_has_started_ = true;
 }

 scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(
     bool force_verification) {
 #if !defined(NDEBUG)
   force_verification = true;
 #endif

   bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
   scoped_ptr<CodeGen::Program> program = compiler.Compile();

   // Make sure compilation resulted in BPF program that executes
   // correctly. Otherwise, there is an internal error in our BPF compiler.
   // There is really nothing the caller can do until the bug is fixed.
   if (force_verification) {
     // Verification is expensive. We only perform this step, if we are
     // compiled in debug mode, or if the caller explicitly requested
     // verification.

     const char* err = NULL;
     if (!Verifier::VerifyBPF(&compiler, *program, *policy_, &err)) {
       bpf_dsl::DumpBPF::PrintProgram(*program);
       SANDBOX_DIE(err);
     }
   }

   return program.Pass();
 }

 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
   return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
 }

 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
   return Syscall::Call(args.nr,
                        static_cast<intptr_t>(args.args[0]),
                        static_cast<intptr_t>(args.args[1]),
                        static_cast<intptr_t>(args.args[2]),
                        static_cast<intptr_t>(args.args[3]),
                        static_cast<intptr_t>(args.args[4]),
                        static_cast<intptr_t>(args.args[5]));
 }

 SandboxBPF::SandboxStatus SandboxBPF::status_ = STATUS_UNKNOWN;

 }  // namespace sandbox
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

	// Some headers on Android are missing cdefs: crbug.com/172337.
	// (We can't use OS_ANDROID here since build_config.h is not included).
	#if defined(ANDROID)
	#include <sys/cdefs.h>
	#endif

	#include <errno.h>
	#include <fcntl.h>
	#include <linux/filter.h>
	#include <signal.h>
	#include <string.h>
	#include <sys/prctl.h>
	#include <sys/stat.h>
	#include <sys/syscall.h>
	#include <sys/types.h>
	#include <sys/wait.h>
	#include <time.h>
	#include <unistd.h>

	#include "base/compiler_specific.h"
	#include "base/logging.h"
	#include "base/macros.h"
	#include "base/memory/scoped_ptr.h"
	#include "base/posix/eintr_wrapper.h"
	#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
	#include "sandbox/linux/bpf_dsl/dump_bpf.h"
	#include "sandbox/linux/bpf_dsl/policy.h"
	#include "sandbox/linux/bpf_dsl/policy_compiler.h"
	#include "sandbox/linux/seccomp-bpf/codegen.h"
	#include "sandbox/linux/seccomp-bpf/die.h"
	#include "sandbox/linux/seccomp-bpf/errorcode.h"
	#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
	#include "sandbox/linux/seccomp-bpf/syscall.h"
	#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
	#include "sandbox/linux/seccomp-bpf/trap.h"
	#include "sandbox/linux/seccomp-bpf/verifier.h"
	#include "sandbox/linux/services/linux_syscalls.h"
	#include "sandbox/linux/services/syscall_wrappers.h"

	using sandbox::bpf_dsl::Allow;
	using sandbox::bpf_dsl::Error;
	using sandbox::bpf_dsl::ResultExpr;

	namespace sandbox {

	namespace {

	const int kExpectedExitCode = 100;

	#if !defined(NDEBUG)
	void WriteFailedStderrSetupMessage(int out_fd) {
	const char* error_string = strerror(errno);
	static const char msg[] =
	"You have reproduced a puzzling issue.\n"
	"Please, report to crbug.com/152530!\n"
	"Failed to set up stderr: ";
	if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg) - 1)) > 0 && error_string &&
	HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
	HANDLE_EINTR(write(out_fd, "\n", 1))) {
	}
	}
	#endif // !defined(NDEBUG)

	// We define a really simple sandbox policy. It is just good enough for us
	// to tell that the sandbox has actually been activated.
	class ProbePolicy : public bpf_dsl::Policy {
	public:
	ProbePolicy() {}
	virtual ~ProbePolicy() {}

	virtual ResultExpr EvaluateSyscall(int sysnum) const override {
	switch (sysnum) {
	case __NR_getpid:
	// Return EPERM so that we can check that the filter actually ran.
	return Error(EPERM);
	case __NR_exit_group:
	// Allow exit() with a non-default return code.
	return Allow();
	default:
	// Make everything else fail in an easily recognizable way.
	return Error(EINVAL);
	}
	}

	private:
	DISALLOW_COPY_AND_ASSIGN(ProbePolicy);
	};

	void ProbeProcess(void) {
	if (sys_getpid() < 0 && errno == EPERM) {
	sys_exit_group(kExpectedExitCode);
	}
	}

	class AllowAllPolicy : public bpf_dsl::Policy {
	public:
	AllowAllPolicy() {}
	virtual ~AllowAllPolicy() {}

	virtual ResultExpr EvaluateSyscall(int sysnum) const override {
	DCHECK(SandboxBPF::IsValidSyscallNumber(sysnum));
	return Allow();
	}

	private:
	DISALLOW_COPY_AND_ASSIGN(AllowAllPolicy);
	};

	void TryVsyscallProcess(void) {
	time_t current_time;
	// time() is implemented as a vsyscall. With an older glibc, with
	// vsyscall=emulate and some versions of the seccomp BPF patch
	// we may get SIGKILL-ed. Detect this!
	if (time(&current_time) != static_cast<time_t>(-1)) {
	sys_exit_group(kExpectedExitCode);
	}
	}

	bool IsSingleThreaded(int proc_fd) {
	if (proc_fd < 0) {
	// Cannot determine whether program is single-threaded. Hope for
	// the best...
	return true;
	}

	struct stat sb;
	int task = -1;
	if ((task = openat(proc_fd, "self/task", O_RDONLY \| O_DIRECTORY)) < 0 \|\|
	fstat(task, &sb) != 0 \|\| sb.st_nlink != 3 \|\| IGNORE_EINTR(close(task))) {
	if (task >= 0) {
	if (IGNORE_EINTR(close(task))) {
	}
	}
	return false;
	}
	return true;
	}

	} // namespace

	SandboxBPF::SandboxBPF()
	: quiet_(false), proc_fd_(-1), sandbox_has_started_(false), policy_() {
	}

	SandboxBPF::~SandboxBPF() {
	}

	bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
	return SyscallSet::IsValid(sysnum);
	}

	bool SandboxBPF::RunFunctionInPolicy(void (*code_in_sandbox)(),
	scoped_ptr<bpf_dsl::Policy> policy) {
	// Block all signals before forking a child process. This prevents an
	// attacker from manipulating our test by sending us an unexpected signal.
	sigset_t old_mask, new_mask;
	if (sigfillset(&new_mask) \|\| sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
	SANDBOX_DIE("sigprocmask() failed");
	}
	int fds[2];
	if (pipe2(fds, O_NONBLOCK \| O_CLOEXEC)) {
	SANDBOX_DIE("pipe() failed");
	}

	if (fds[0] <= 2 \|\| fds[1] <= 2) {
	SANDBOX_DIE("Process started without standard file descriptors");
	}

	// This code is using fork() and should only ever run single-threaded.
	// Most of the code below is "async-signal-safe" and only minor changes
	// would be needed to support threads.
	DCHECK(IsSingleThreaded(proc_fd_));
	pid_t pid = fork();
	if (pid < 0) {
	// Die if we cannot fork(). We would probably fail a little later
	// anyway, as the machine is likely very close to running out of
	// memory.
	// But what we don't want to do is return "false", as a crafty
	// attacker might cause fork() to fail at will and could trick us
	// into running without a sandbox.
	sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails
	SANDBOX_DIE("fork() failed unexpectedly");
	}

	// In the child process
	if (!pid) {
	// Test a very simple sandbox policy to verify that we can
	// successfully turn on sandboxing.
	Die::EnableSimpleExit();

	errno = 0;
	if (IGNORE_EINTR(close(fds[0]))) {
	// This call to close() has been failing in strange ways. See
	// crbug.com/152530. So we only fail in debug mode now.
	#if !defined(NDEBUG)
	WriteFailedStderrSetupMessage(fds[1]);
	SANDBOX_DIE(NULL);
	#endif
	}
	if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
	// Stderr could very well be a file descriptor to .xsession-errors, or
	// another file, which could be backed by a file system that could cause
	// dup2 to fail while trying to close stderr. It's important that we do
	// not fail on trying to close stderr.
	// If dup2 fails here, we will continue normally, this means that our
	// parent won't cause a fatal failure if something writes to stderr in
	// this child.
	#if !defined(NDEBUG)
	// In DEBUG builds, we still want to get a report.
	WriteFailedStderrSetupMessage(fds[1]);
	SANDBOX_DIE(NULL);
	#endif
	}
	if (IGNORE_EINTR(close(fds[1]))) {
	// This call to close() has been failing in strange ways. See
	// crbug.com/152530. So we only fail in debug mode now.
	#if !defined(NDEBUG)
	WriteFailedStderrSetupMessage(fds[1]);
	SANDBOX_DIE(NULL);
	#endif
	}

	SetSandboxPolicy(policy.release());
	if (!StartSandbox(PROCESS_SINGLE_THREADED)) {
	SANDBOX_DIE(NULL);
	}

	// Run our code in the sandbox.
	code_in_sandbox();

	// code_in_sandbox() is not supposed to return here.
	SANDBOX_DIE(NULL);
	}

	// In the parent process.
	if (IGNORE_EINTR(close(fds[1]))) {
	SANDBOX_DIE("close() failed");
	}
	if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
	SANDBOX_DIE("sigprocmask() failed");
	}
	int status;
	if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
	SANDBOX_DIE("waitpid() failed unexpectedly");
	}
	bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;

	// If we fail to support sandboxing, there might be an additional
	// error message. If so, this was an entirely unexpected and fatal
	// failure. We should report the failure and somebody must fix
	// things. This is probably a security-critical bug in the sandboxing
	// code.
	if (!rc) {
	char buf[4096];
	ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));
	if (len > 0) {
	while (len > 1 && buf[len - 1] == '\n') {
	--len;
	}
	buf[len] = '\000';
	SANDBOX_DIE(buf);
	}
	}
	if (IGNORE_EINTR(close(fds[0]))) {
	SANDBOX_DIE("close() failed");
	}

	return rc;
	}

	bool SandboxBPF::KernelSupportSeccompBPF() {
	return RunFunctionInPolicy(ProbeProcess,
	scoped_ptr<bpf_dsl::Policy>(new ProbePolicy())) &&
	RunFunctionInPolicy(TryVsyscallProcess,
	scoped_ptr<bpf_dsl::Policy>(new AllowAllPolicy()));
	}

	// static
	SandboxBPF::SandboxStatus SandboxBPF::SupportsSeccompSandbox(int proc_fd) {
	// It the sandbox is currently active, we clearly must have support for
	// sandboxing.
	if (status_ == STATUS_ENABLED) {
	return status_;
	}

	// Even if the sandbox was previously available, something might have
	// changed in our run-time environment. Check one more time.
	if (status_ == STATUS_AVAILABLE) {
	if (!IsSingleThreaded(proc_fd)) {
	status_ = STATUS_UNAVAILABLE;
	}
	return status_;
	}

	if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
	// All state transitions resulting in STATUS_UNAVAILABLE are immediately
	// preceded by STATUS_AVAILABLE. Furthermore, these transitions all
	// happen, if and only if they are triggered by the process being multi-
	// threaded.
	// In other words, if a single-threaded process is currently in the
	// STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
	// actually available.
	status_ = STATUS_AVAILABLE;
	return status_;
	}

	// If we have not previously checked for availability of the sandbox or if
	// we otherwise don't believe to have a good cached value, we have to
	// perform a thorough check now.
	if (status_ == STATUS_UNKNOWN) {
	// We create our own private copy of a "Sandbox" object. This ensures that
	// the object does not have any policies configured, that might interfere
	// with the tests done by "KernelSupportSeccompBPF()".
	SandboxBPF sandbox;

	// By setting "quiet_ = true" we suppress messages for expected and benign
	// failures (e.g. if the current kernel lacks support for BPF filters).
	sandbox.quiet_ = true;
	sandbox.set_proc_fd(proc_fd);
	status_ = sandbox.KernelSupportSeccompBPF() ? STATUS_AVAILABLE
	: STATUS_UNSUPPORTED;

	// As we are performing our tests from a child process, the run-time
	// environment that is visible to the sandbox is always guaranteed to be
	// single-threaded. Let's check here whether the caller is single-
	// threaded. Otherwise, we mark the sandbox as temporarily unavailable.
	if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
	status_ = STATUS_UNAVAILABLE;
	}
	}
	return status_;
	}

	// static
	SandboxBPF::SandboxStatus
	SandboxBPF::SupportsSeccompThreadFilterSynchronization() {
	// Applying NO_NEW_PRIVS, a BPF filter, and synchronizing the filter across
	// the thread group are all handled atomically by this syscall.
	const int rv = syscall(
	__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, NULL);

	if (rv == -1 && errno == EFAULT) {
	return STATUS_AVAILABLE;
	} else {
	// TODO(jln): turn these into DCHECK after 417888 is considered fixed.
	CHECK_EQ(-1, rv);
	CHECK(ENOSYS == errno \|\| EINVAL == errno);
	return STATUS_UNSUPPORTED;
	}
	}

	void SandboxBPF::set_proc_fd(int proc_fd) { proc_fd_ = proc_fd; }

	bool SandboxBPF::StartSandbox(SandboxThreadState thread_state) {
	CHECK(thread_state == PROCESS_SINGLE_THREADED \|\|
	thread_state == PROCESS_MULTI_THREADED);

	if (status_ == STATUS_UNSUPPORTED \|\| status_ == STATUS_UNAVAILABLE) {
	SANDBOX_DIE(
	"Trying to start sandbox, even though it is known to be "
	"unavailable");
	return false;
	} else if (sandbox_has_started_) {
	SANDBOX_DIE(
	"Cannot repeatedly start sandbox. Create a separate Sandbox "
	"object instead.");
	return false;
	}
	if (proc_fd_ < 0) {
	proc_fd_ = open("/proc", O_RDONLY \| O_DIRECTORY);
	}
	if (proc_fd_ < 0) {
	// For now, continue in degraded mode, if we can't access /proc.
	// In the future, we might want to tighten this requirement.
	}

	bool supports_tsync =
	SupportsSeccompThreadFilterSynchronization() == STATUS_AVAILABLE;

	if (thread_state == PROCESS_SINGLE_THREADED) {
	if (!IsSingleThreaded(proc_fd_)) {
	SANDBOX_DIE("Cannot start sandbox; process is already multi-threaded");
	return false;
	}
	} else if (thread_state == PROCESS_MULTI_THREADED) {
	if (IsSingleThreaded(proc_fd_)) {
	SANDBOX_DIE("Cannot start sandbox; "
	"process may be single-threaded when reported as not");
	return false;
	}
	if (!supports_tsync) {
	SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
	"filters for a threadgroup");
	return false;
	}
	}

	// We no longer need access to any files in /proc. We want to do this
	// before installing the filters, just in case that our policy denies
	// close().
	if (proc_fd_ >= 0) {
	if (IGNORE_EINTR(close(proc_fd_))) {
	SANDBOX_DIE("Failed to close file descriptor for /proc");
	return false;
	}
	proc_fd_ = -1;
	}

	// Install the filters.
	InstallFilter(supports_tsync \|\| thread_state == PROCESS_MULTI_THREADED);

	// We are now inside the sandbox.
	status_ = STATUS_ENABLED;

	return true;
	}

	// Don't take a scoped_ptr here, polymorphism make their use awkward.
	void SandboxBPF::SetSandboxPolicy(bpf_dsl::Policy* policy) {
	DCHECK(!policy_);
	if (sandbox_has_started_) {
	SANDBOX_DIE("Cannot change policy after sandbox has started");
	}
	policy_.reset(policy);
	}

	void SandboxBPF::InstallFilter(bool must_sync_threads) {
	// We want to be very careful in not imposing any requirements on the
	// policies that are set with SetSandboxPolicy(). This means, as soon as
	// the sandbox is active, we shouldn't be relying on libraries that could
	// be making system calls. This, for example, means we should avoid
	// using the heap and we should avoid using STL functions.
	// Temporarily copy the contents of the "program" vector into a
	// stack-allocated array; and then explicitly destroy that object.
	// This makes sure we don't ex- or implicitly call new/delete after we
	// installed the BPF filter program in the kernel. Depending on the
	// system memory allocator that is in effect, these operators can result
	// in system calls to things like munmap() or brk().
	CodeGen::Program* program = AssembleFilter(false).release();

	struct sock_filter bpf[program->size()];
	const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),
	bpf};
	memcpy(bpf, &(*program)[0], sizeof(bpf));
	delete program;

	// Make an attempt to release memory that is no longer needed here, rather
	// than in the destructor. Try to avoid as much as possible to presume of
	// what will be possible to do in the new (sandboxed) execution environment.
	policy_.reset();

	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
	SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
	}

	// Install BPF filter program. If the thread state indicates multi-threading
	// support, then the kernel hass the seccomp system call. Otherwise, fall
	// back on prctl, which requires the process to be single-threaded.
	if (must_sync_threads) {
	int rv = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
	SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast<const char*>(&prog));
	if (rv) {
	SANDBOX_DIE(quiet_ ? NULL :
	"Kernel refuses to turn on and synchronize threads for BPF filters");
	}
	} else {
	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
	SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
	}
	}

	sandbox_has_started_ = true;
	}

	scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(
	bool force_verification) {
	#if !defined(NDEBUG)
	force_verification = true;
	#endif

	bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
	scoped_ptr<CodeGen::Program> program = compiler.Compile();

	// Make sure compilation resulted in BPF program that executes
	// correctly. Otherwise, there is an internal error in our BPF compiler.
	// There is really nothing the caller can do until the bug is fixed.
	if (force_verification) {
	// Verification is expensive. We only perform this step, if we are
	// compiled in debug mode, or if the caller explicitly requested
	// verification.

	const char* err = NULL;
	if (!Verifier::VerifyBPF(&compiler, program, policy_, &err)) {
	bpf_dsl::DumpBPF::PrintProgram(*program);
	SANDBOX_DIE(err);
	}
	}

	return program.Pass();
	}

	bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
	return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
	}

	intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
	return Syscall::Call(args.nr,
	static_cast<intptr_t>(args.args[0]),
	static_cast<intptr_t>(args.args[1]),
	static_cast<intptr_t>(args.args[2]),
	static_cast<intptr_t>(args.args[3]),
	static_cast<intptr_t>(args.args[4]),
	static_cast<intptr_t>(args.args[5]));
	}

	SandboxBPF::SandboxStatus SandboxBPF::status_ = STATUS_UNKNOWN;

	} // namespace sandbox