Add a "fork" of musl as //fusl.

This is musl verbatim at d5f8394f6ea9549607567bd92de12a2446c15614.

See http://www.musl-libc.org/.

R=kulakowski@chromium.org

Review URL: https://codereview.chromium.org/1573973002 .
diff --git a/fusl/src/thread/__futex.c b/fusl/src/thread/__futex.c
new file mode 100644
index 0000000..96307c0
--- /dev/null
+++ b/fusl/src/thread/__futex.c
@@ -0,0 +1,7 @@
+#include "futex.h"
+#include "syscall.h"
+
+int __futex(volatile int *addr, int op, int val, void *ts)
+{
+	return syscall(SYS_futex, addr, op, val, ts);
+}
diff --git a/fusl/src/thread/__lock.c b/fusl/src/thread/__lock.c
new file mode 100644
index 0000000..0874c04
--- /dev/null
+++ b/fusl/src/thread/__lock.c
@@ -0,0 +1,15 @@
+#include "pthread_impl.h"
+
+void __lock(volatile int *l)
+{
+	if (libc.threads_minus_1)
+		while (a_swap(l, 1)) __wait(l, l+1, 1, 1);
+}
+
+void __unlock(volatile int *l)
+{
+	if (l[0]) {
+		a_store(l, 0);
+		if (l[1]) __wake(l, 1, 1);
+	}
+}
diff --git a/fusl/src/thread/__set_thread_area.c b/fusl/src/thread/__set_thread_area.c
new file mode 100644
index 0000000..152a6a2
--- /dev/null
+++ b/fusl/src/thread/__set_thread_area.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+
+int __set_thread_area(void *p)
+{
+#ifdef SYS_set_thread_area
+	return __syscall(SYS_set_thread_area, p);
+#else
+	return -ENOSYS;
+#endif
+}
diff --git a/fusl/src/thread/__syscall_cp.c b/fusl/src/thread/__syscall_cp.c
new file mode 100644
index 0000000..09a2be8
--- /dev/null
+++ b/fusl/src/thread/__syscall_cp.c
@@ -0,0 +1,21 @@
+#include "pthread_impl.h"
+#include "syscall.h"
+
+__attribute__((__visibility__("hidden")))
+long __syscall_cp_c();
+
+static long sccp(syscall_arg_t nr,
+                 syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
+                 syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
+{
+	return (__syscall)(nr, u, v, w, x, y, z);
+}
+
+weak_alias(sccp, __syscall_cp_c);
+
+long (__syscall_cp)(syscall_arg_t nr,
+                    syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
+                    syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
+{
+	return __syscall_cp_c(nr, u, v, w, x, y, z);
+}
diff --git a/fusl/src/thread/__timedwait.c b/fusl/src/thread/__timedwait.c
new file mode 100644
index 0000000..13d8465
--- /dev/null
+++ b/fusl/src/thread/__timedwait.c
@@ -0,0 +1,46 @@
+#include <pthread.h>
+#include <time.h>
+#include <errno.h>
+#include "futex.h"
+#include "syscall.h"
+#include "pthread_impl.h"
+
+int __pthread_setcancelstate(int, int *);
+int __clock_gettime(clockid_t, struct timespec *);
+
+int __timedwait_cp(volatile int *addr, int val,
+	clockid_t clk, const struct timespec *at, int priv)
+{
+	int r;
+	struct timespec to, *top=0;
+
+	if (priv) priv = 128;
+
+	if (at) {
+		if (at->tv_nsec >= 1000000000UL) return EINVAL;
+		if (__clock_gettime(clk, &to)) return EINVAL;
+		to.tv_sec = at->tv_sec - to.tv_sec;
+		if ((to.tv_nsec = at->tv_nsec - to.tv_nsec) < 0) {
+			to.tv_sec--;
+			to.tv_nsec += 1000000000;
+		}
+		if (to.tv_sec < 0) return ETIMEDOUT;
+		top = &to;
+	}
+
+	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
+	if (r == ENOSYS) r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT, val, top);
+	if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0;
+
+	return r;
+}
+
+int __timedwait(volatile int *addr, int val,
+	clockid_t clk, const struct timespec *at, int priv)
+{
+	int cs, r;
+	__pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	r = __timedwait_cp(addr, val, clk, at, priv);
+	__pthread_setcancelstate(cs, 0);
+	return r;
+}
diff --git a/fusl/src/thread/__tls_get_addr.c b/fusl/src/thread/__tls_get_addr.c
new file mode 100644
index 0000000..6945faa
--- /dev/null
+++ b/fusl/src/thread/__tls_get_addr.c
@@ -0,0 +1,16 @@
+#include <stddef.h>
+#include "pthread_impl.h"
+#include "libc.h"
+
+__attribute__((__visibility__("hidden")))
+void *__tls_get_new(size_t *);
+
+void *__tls_get_addr(size_t *v)
+{
+	pthread_t self = __pthread_self();
+	if (v[0]<=(size_t)self->dtv[0])
+		return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
+	return __tls_get_new(v);
+}
+
+weak_alias(__tls_get_addr, __tls_get_new);
diff --git a/fusl/src/thread/__unmapself.c b/fusl/src/thread/__unmapself.c
new file mode 100644
index 0000000..1d3bee1
--- /dev/null
+++ b/fusl/src/thread/__unmapself.c
@@ -0,0 +1,29 @@
+#include "pthread_impl.h"
+#include "atomic.h"
+#include "syscall.h"
+/* cheat and reuse CRTJMP macro from dynlink code */
+#include "dynlink.h"
+
+static volatile int lock;
+static void *unmap_base;
+static size_t unmap_size;
+static char shared_stack[256];
+
+static void do_unmap()
+{
+	__syscall(SYS_munmap, unmap_base, unmap_size);
+	__syscall(SYS_exit);
+}
+
+void __unmapself(void *base, size_t size)
+{
+	int tid=__pthread_self()->tid;
+	char *stack = shared_stack + sizeof shared_stack;
+	stack -= (uintptr_t)stack % 16;
+	while (lock || a_cas(&lock, 0, tid))
+		a_spin();
+	__syscall(SYS_set_tid_address, &lock);
+	unmap_base = base;
+	unmap_size = size;
+	CRTJMP(do_unmap, stack);
+}
diff --git a/fusl/src/thread/__wait.c b/fusl/src/thread/__wait.c
new file mode 100644
index 0000000..dc33c1a
--- /dev/null
+++ b/fusl/src/thread/__wait.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+
+void __wait(volatile int *addr, volatile int *waiters, int val, int priv)
+{
+	int spins=100;
+	if (priv) priv = FUTEX_PRIVATE;
+	while (spins-- && (!waiters || !*waiters)) {
+		if (*addr==val) a_spin();
+		else return;
+	}
+	if (waiters) a_inc(waiters);
+	while (*addr==val) {
+		__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0) != -ENOSYS
+		|| __syscall(SYS_futex, addr, FUTEX_WAIT, val, 0);
+	}
+	if (waiters) a_dec(waiters);
+}
diff --git a/fusl/src/thread/aarch64/__set_thread_area.s b/fusl/src/thread/aarch64/__set_thread_area.s
new file mode 100644
index 0000000..97a80ac
--- /dev/null
+++ b/fusl/src/thread/aarch64/__set_thread_area.s
@@ -0,0 +1,6 @@
+.global __set_thread_area
+.type   __set_thread_area,@function
+__set_thread_area:
+	msr tpidr_el0,x0
+	mov w0,#0
+	ret
diff --git a/fusl/src/thread/aarch64/__unmapself.s b/fusl/src/thread/aarch64/__unmapself.s
new file mode 100644
index 0000000..2c5d254
--- /dev/null
+++ b/fusl/src/thread/aarch64/__unmapself.s
@@ -0,0 +1,7 @@
+.global __unmapself
+.type   __unmapself,%function
+__unmapself:
+	mov x8,#215 // SYS_munmap
+	svc 0
+	mov x8,#93 // SYS_exit
+	svc 0
diff --git a/fusl/src/thread/aarch64/clone.s b/fusl/src/thread/aarch64/clone.s
new file mode 100644
index 0000000..50af913
--- /dev/null
+++ b/fusl/src/thread/aarch64/clone.s
@@ -0,0 +1,29 @@
+// __clone(func, stack, flags, arg, ptid, tls, ctid)
+//         x0,   x1,    w2,    x3,  x4,   x5,  x6
+
+// syscall(SYS_clone, flags, stack, ptid, tls, ctid)
+//         x8,        x0,    x1,    x2,   x3,  x4
+
+.global __clone
+.type   __clone,%function
+__clone:
+	// align stack and save func,arg
+	and x1,x1,#-16
+	stp x0,x3,[x1,#-16]!
+
+	// syscall
+	uxtw x0,w2
+	mov x2,x4
+	mov x3,x5
+	mov x4,x6
+	mov x8,#220 // SYS_clone
+	svc #0
+
+	cbz x0,1f
+	// parent
+	ret
+	// child
+1:	ldp x1,x0,[sp],#16
+	blr x1
+	mov x8,#93 // SYS_exit
+	svc #0
diff --git a/fusl/src/thread/aarch64/syscall_cp.s b/fusl/src/thread/aarch64/syscall_cp.s
new file mode 100644
index 0000000..41db68a
--- /dev/null
+++ b/fusl/src/thread/aarch64/syscall_cp.s
@@ -0,0 +1,32 @@
+// __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z)
+//                  x0             x1  x2 x3 x4 x5 x6 x7
+
+// syscall(nr, u, v, w, x, y, z)
+//         x8  x0 x1 x2 x3 x4 x5
+
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type __syscall_cp_asm,%function
+__syscall_cp_asm:
+__cp_begin:
+	ldr w0,[x0]
+	cbnz w0,__cp_cancel
+	mov x8,x1
+	mov x0,x2
+	mov x1,x3
+	mov x2,x4
+	mov x3,x5
+	mov x4,x6
+	mov x5,x7
+	svc 0
+__cp_end:
+	ret
+__cp_cancel:
+	b __cancel
diff --git a/fusl/src/thread/arm/__set_thread_area.s b/fusl/src/thread/arm/__set_thread_area.s
new file mode 100644
index 0000000..4a4cd0d
--- /dev/null
+++ b/fusl/src/thread/arm/__set_thread_area.s
@@ -0,0 +1 @@
+/* Replaced by C code in arch/arm/src */
diff --git a/fusl/src/thread/arm/__unmapself.s b/fusl/src/thread/arm/__unmapself.s
new file mode 100644
index 0000000..29c2d07
--- /dev/null
+++ b/fusl/src/thread/arm/__unmapself.s
@@ -0,0 +1,9 @@
+.syntax unified
+.text
+.global __unmapself
+.type   __unmapself,%function
+__unmapself:
+	mov r7,#91
+	svc 0
+	mov r7,#1
+	svc 0
diff --git a/fusl/src/thread/arm/clone.s b/fusl/src/thread/arm/clone.s
new file mode 100644
index 0000000..fe2e0e6
--- /dev/null
+++ b/fusl/src/thread/arm/clone.s
@@ -0,0 +1,31 @@
+.syntax unified
+.text
+.global __clone
+.type   __clone,%function
+__clone:
+	stmfd sp!,{r4,r5,r6,r7}
+	mov r7,#120
+	mov r6,r3
+	mov r5,r0
+	mov r0,r2
+	and r1,r1,#-16
+	ldr r2,[sp,#16]
+	ldr r3,[sp,#20]
+	ldr r4,[sp,#24]
+	svc 0
+	tst r0,r0
+	beq 1f
+	ldmfd sp!,{r4,r5,r6,r7}
+	bx lr
+
+1:	mov r0,r6
+	tst r5,#1
+	bne 1f
+	mov lr,pc
+	mov pc,r5
+2:	mov r7,#1
+	svc 0
+
+1:	mov lr,pc
+	bx r5
+	b 2b
diff --git a/fusl/src/thread/arm/syscall_cp.s b/fusl/src/thread/arm/syscall_cp.s
new file mode 100644
index 0000000..a5730c0
--- /dev/null
+++ b/fusl/src/thread/arm/syscall_cp.s
@@ -0,0 +1,29 @@
+.syntax unified
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type __syscall_cp_asm,%function
+__syscall_cp_asm:
+	mov ip,sp
+	stmfd sp!,{r4,r5,r6,r7,lr}
+__cp_begin:
+	ldr r0,[r0]
+	cmp r0,#0
+	blne __cp_cancel
+	mov r7,r1
+	mov r0,r2
+	mov r1,r3
+	ldmfd ip,{r2,r3,r4,r5,r6}
+	svc 0
+__cp_end:
+	ldmfd sp!,{r4,r5,r6,r7,lr}
+	bx lr
+__cp_cancel:
+	ldmfd sp!,{r4,r5,r6,r7,lr}
+	b __cancel
diff --git a/fusl/src/thread/call_once.c b/fusl/src/thread/call_once.c
new file mode 100644
index 0000000..a7bc935
--- /dev/null
+++ b/fusl/src/thread/call_once.c
@@ -0,0 +1,8 @@
+#include <threads.h>
+
+int __pthread_once(once_flag *, void (*)(void));
+
+void call_once(once_flag *flag, void (*func)(void))
+{
+	__pthread_once(flag, func);
+}
diff --git a/fusl/src/thread/clone.c b/fusl/src/thread/clone.c
new file mode 100644
index 0000000..be80c8e
--- /dev/null
+++ b/fusl/src/thread/clone.c
@@ -0,0 +1,7 @@
+#include <errno.h>
+#include "pthread_impl.h"
+
+int __clone(int (*func)(void *), void *stack, int flags, void *arg, ...)
+{
+	return -ENOSYS;
+}
diff --git a/fusl/src/thread/cnd_broadcast.c b/fusl/src/thread/cnd_broadcast.c
new file mode 100644
index 0000000..85d4d3e
--- /dev/null
+++ b/fusl/src/thread/cnd_broadcast.c
@@ -0,0 +1,10 @@
+#include <threads.h>
+
+int __private_cond_signal(cnd_t *, int);
+
+int cnd_broadcast(cnd_t *c)
+{
+	/* This internal function never fails, and always returns zero,
+	 * which matches the value thrd_success is defined with. */
+	return __private_cond_signal(c, -1);
+}
diff --git a/fusl/src/thread/cnd_destroy.c b/fusl/src/thread/cnd_destroy.c
new file mode 100644
index 0000000..453c90b
--- /dev/null
+++ b/fusl/src/thread/cnd_destroy.c
@@ -0,0 +1,6 @@
+#include <threads.h>
+
+void cnd_destroy(cnd_t *c)
+{
+	/* For private cv this is a no-op */
+}
diff --git a/fusl/src/thread/cnd_init.c b/fusl/src/thread/cnd_init.c
new file mode 100644
index 0000000..18c5085
--- /dev/null
+++ b/fusl/src/thread/cnd_init.c
@@ -0,0 +1,7 @@
+#include <threads.h>
+
+int cnd_init(cnd_t *c)
+{
+	*c = (cnd_t){ 0 };
+	return thrd_success;
+}
diff --git a/fusl/src/thread/cnd_signal.c b/fusl/src/thread/cnd_signal.c
new file mode 100644
index 0000000..1211260
--- /dev/null
+++ b/fusl/src/thread/cnd_signal.c
@@ -0,0 +1,10 @@
+#include <threads.h>
+
+int __private_cond_signal(cnd_t *, int);
+
+int cnd_signal(cnd_t *c)
+{
+	/* This internal function never fails, and always returns zero,
+	 * which matches the value thrd_success is defined with. */
+	return __private_cond_signal(c, 1);
+}
diff --git a/fusl/src/thread/cnd_timedwait.c b/fusl/src/thread/cnd_timedwait.c
new file mode 100644
index 0000000..5997679
--- /dev/null
+++ b/fusl/src/thread/cnd_timedwait.c
@@ -0,0 +1,15 @@
+#include <threads.h>
+#include <errno.h>
+
+int __pthread_cond_timedwait(cnd_t *restrict, mtx_t *restrict, const struct timespec *restrict);
+
+int cnd_timedwait(cnd_t *restrict c, mtx_t *restrict m, const struct timespec *restrict ts)
+{
+	int ret = __pthread_cond_timedwait(c, m, ts);
+	switch (ret) {
+	/* May also return EINVAL or EPERM. */
+	default:        return thrd_error;
+	case 0:         return thrd_success;
+	case ETIMEDOUT: return thrd_timedout;
+	}
+}
diff --git a/fusl/src/thread/cnd_wait.c b/fusl/src/thread/cnd_wait.c
new file mode 100644
index 0000000..602796f
--- /dev/null
+++ b/fusl/src/thread/cnd_wait.c
@@ -0,0 +1,9 @@
+#include <threads.h>
+
+int cnd_wait(cnd_t *c, mtx_t *m)
+{
+	/* Calling cnd_timedwait with a null pointer is an extension.
+	 * It is convenient here to avoid duplication of the logic
+	 * for return values. */
+	return cnd_timedwait(c, m, 0);
+}
diff --git a/fusl/src/thread/i386/__set_thread_area.s b/fusl/src/thread/i386/__set_thread_area.s
new file mode 100644
index 0000000..3a558fb
--- /dev/null
+++ b/fusl/src/thread/i386/__set_thread_area.s
@@ -0,0 +1,45 @@
+.text
+.global __set_thread_area
+.type   __set_thread_area,@function
+__set_thread_area:
+	push %ebx
+	push $0x51
+	push $0xfffff
+	push 16(%esp)
+	call 1f
+1:	addl $4f-1b,(%esp)
+	pop %ecx
+	mov (%ecx),%edx
+	push %edx
+	mov %esp,%ebx
+	xor %eax,%eax
+	mov $243,%al
+	int $128
+	testl %eax,%eax
+	jnz 2f
+	movl (%esp),%edx
+	movl %edx,(%ecx)
+	leal 3(,%edx,8),%edx
+3:	movw %dx,%gs
+1:
+	addl $16,%esp
+	popl %ebx
+	ret
+2:
+	mov %ebx,%ecx
+	xor %ebx,%ebx
+	xor %edx,%edx
+	mov %ebx,(%esp)
+	mov $1,%bl
+	mov $16,%dl
+	mov $123,%al
+	int $128
+	testl %eax,%eax
+	jnz 1b
+	mov $7,%dl
+	inc %al
+	jmp 3b
+
+.data
+	.align 4
+4:	.long -1
diff --git a/fusl/src/thread/i386/__unmapself.s b/fusl/src/thread/i386/__unmapself.s
new file mode 100644
index 0000000..d656959
--- /dev/null
+++ b/fusl/src/thread/i386/__unmapself.s
@@ -0,0 +1,11 @@
+.text
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	movl $91,%eax
+	movl 4(%esp),%ebx
+	movl 8(%esp),%ecx
+	int $128
+	xorl %ebx,%ebx
+	movl $1,%eax
+	int $128
diff --git a/fusl/src/thread/i386/clone.s b/fusl/src/thread/i386/clone.s
new file mode 100644
index 0000000..52fe7ef
--- /dev/null
+++ b/fusl/src/thread/i386/clone.s
@@ -0,0 +1,48 @@
+.text
+.global __clone
+.type   __clone,@function
+__clone:
+	push %ebp
+	mov %esp,%ebp
+	push %ebx
+	push %esi
+	push %edi
+
+	xor %eax,%eax
+	push $0x51
+	mov %gs,%ax
+	push $0xfffff
+	shr $3,%eax
+	push 28(%ebp)
+	push %eax
+	mov $120,%al
+
+	mov 12(%ebp),%ecx
+	mov 16(%ebp),%ebx
+	and $-16,%ecx
+	sub $16,%ecx
+	mov 20(%ebp),%edi
+	mov %edi,(%ecx)
+	mov 24(%ebp),%edx
+	mov %esp,%esi
+	mov 32(%ebp),%edi
+	mov 8(%ebp),%ebp
+	int $128
+	test %eax,%eax
+	jnz 1f
+
+	mov %ebp,%eax
+	xor %ebp,%ebp
+	call *%eax
+	mov %eax,%ebx
+	xor %eax,%eax
+	inc %eax
+	int $128
+	hlt
+
+1:	add $16,%esp
+	pop %edi
+	pop %esi
+	pop %ebx
+	pop %ebp
+	ret
diff --git a/fusl/src/thread/i386/syscall_cp.s b/fusl/src/thread/i386/syscall_cp.s
new file mode 100644
index 0000000..7dce1eb
--- /dev/null
+++ b/fusl/src/thread/i386/syscall_cp.s
@@ -0,0 +1,41 @@
+.text
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
+	mov 4(%esp),%ecx
+	pushl %ebx
+	pushl %esi
+	pushl %edi
+	pushl %ebp
+__cp_begin:
+	movl (%ecx),%eax
+	testl %eax,%eax
+	jnz __cp_cancel
+	movl 24(%esp),%eax
+	movl 28(%esp),%ebx
+	movl 32(%esp),%ecx
+	movl 36(%esp),%edx
+	movl 40(%esp),%esi
+	movl 44(%esp),%edi
+	movl 48(%esp),%ebp
+	int $128
+__cp_end:
+	popl %ebp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
+__cp_cancel:
+	popl %ebp
+	popl %edi
+	popl %esi
+	popl %ebx
+	jmp __cancel
diff --git a/fusl/src/thread/i386/tls.s b/fusl/src/thread/i386/tls.s
new file mode 100644
index 0000000..76d5d46
--- /dev/null
+++ b/fusl/src/thread/i386/tls.s
@@ -0,0 +1,17 @@
+.text
+.global ___tls_get_addr
+.type ___tls_get_addr,@function
+___tls_get_addr:
+	mov %gs:4,%edx
+	mov (%eax),%ecx
+	cmp %ecx,(%edx)
+	jc 1f
+	mov 4(%eax),%eax
+	add (%edx,%ecx,4),%eax
+	ret
+1:	push %eax
+.weak __tls_get_new
+.hidden __tls_get_new
+	call __tls_get_new
+	pop %edx
+	ret
diff --git a/fusl/src/thread/lock_ptc.c b/fusl/src/thread/lock_ptc.c
new file mode 100644
index 0000000..7adedab
--- /dev/null
+++ b/fusl/src/thread/lock_ptc.c
@@ -0,0 +1,18 @@
+#include <pthread.h>
+
+static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
+
+void __inhibit_ptc()
+{
+	pthread_rwlock_wrlock(&lock);
+}
+
+void __acquire_ptc()
+{
+	pthread_rwlock_rdlock(&lock);
+}
+
+void __release_ptc()
+{
+	pthread_rwlock_unlock(&lock);
+}
diff --git a/fusl/src/thread/microblaze/__set_thread_area.s b/fusl/src/thread/microblaze/__set_thread_area.s
new file mode 100644
index 0000000..21dc1e1
--- /dev/null
+++ b/fusl/src/thread/microblaze/__set_thread_area.s
@@ -0,0 +1,6 @@
+.global __set_thread_area
+.type   __set_thread_area,@function
+__set_thread_area:
+	ori      r21, r5, 0
+	rtsd     r15, 8
+	ori      r3, r0, 0
diff --git a/fusl/src/thread/microblaze/__unmapself.s b/fusl/src/thread/microblaze/__unmapself.s
new file mode 100644
index 0000000..b180de6
--- /dev/null
+++ b/fusl/src/thread/microblaze/__unmapself.s
@@ -0,0 +1,8 @@
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	ori     r12, r0, 91
+	brki    r14, 0x8
+	ori     r12, r0, 1
+	brki    r14, 0x8
+	nop
diff --git a/fusl/src/thread/microblaze/clone.s b/fusl/src/thread/microblaze/clone.s
new file mode 100644
index 0000000..13448a3
--- /dev/null
+++ b/fusl/src/thread/microblaze/clone.s
@@ -0,0 +1,29 @@
+.global __clone
+.type   __clone,@function
+
+# r5, r6, r7, r8, r9, r10, stack
+# fn, st, fl, ar, pt, tl, ct
+# fl, st, __, pt, ct, tl
+
+__clone:
+	andi    r6, r6, -16
+	addi    r6, r6, -16
+	swi     r5, r6, 0
+	swi     r8, r6, 4
+
+	ori     r5, r7, 0
+	ori     r8, r9, 0
+	lwi     r9, r1, 28
+	ori     r12, r0, 120
+
+	brki    r14, 8
+	beqi	r3, 1f
+	rtsd    r15, 8
+	nop
+
+1:	lwi     r3, r1, 0
+	lwi     r5, r1, 4
+	brald   r15, r3
+	nop
+	ori     r12, r0, 1
+	brki    r14, 8
diff --git a/fusl/src/thread/microblaze/syscall_cp.s b/fusl/src/thread/microblaze/syscall_cp.s
new file mode 100644
index 0000000..b0df61c
--- /dev/null
+++ b/fusl/src/thread/microblaze/syscall_cp.s
@@ -0,0 +1,27 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
+__cp_begin:
+	lwi     r5, r5, 0
+	bnei    r5, __cp_cancel
+	addi    r12, r6, 0
+	add     r5, r7, r0
+	add     r6, r8, r0
+	add     r7, r9, r0
+	add     r8, r10, r0
+	lwi     r9, r1, 28
+	lwi     r10, r1, 32
+	brki    r14, 0x8
+__cp_end:
+	rtsd    r15, 8
+	nop
+__cp_cancel:
+	bri     __cancel
diff --git a/fusl/src/thread/mips/__unmapself.s b/fusl/src/thread/mips/__unmapself.s
new file mode 100644
index 0000000..ba139dc
--- /dev/null
+++ b/fusl/src/thread/mips/__unmapself.s
@@ -0,0 +1,10 @@
+.set noreorder
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	move $sp, $25
+	li $2, 4091
+	syscall
+	li $4, 0
+	li $2, 4001
+	syscall
diff --git a/fusl/src/thread/mips/clone.s b/fusl/src/thread/mips/clone.s
new file mode 100644
index 0000000..37dddf5
--- /dev/null
+++ b/fusl/src/thread/mips/clone.s
@@ -0,0 +1,32 @@
+.set noreorder
+.global __clone
+.type   __clone,@function
+__clone:
+	# Save function pointer and argument pointer on new thread stack
+	and $5, $5, -8
+	subu $5, $5, 16
+	sw $4, 0($5)
+	sw $7, 4($5)
+	# Shuffle (fn,sp,fl,arg,ptid,tls,ctid) to (fl,sp,ptid,tls,ctid)
+	move $4, $6
+	lw $6, 16($sp)
+	lw $7, 20($sp)
+	lw $9, 24($sp)
+	subu $sp, $sp, 16
+	sw $9, 16($sp)
+	li $2, 4120
+	syscall
+	beq $7, $0, 1f
+	nop
+	addu $sp, $sp, 16
+	jr $ra
+	subu $2, $0, $2
+1:	beq $2, $0, 1f
+	nop
+	addu $sp, $sp, 16
+	jr $ra
+	nop
+1:	lw $25, 0($sp)
+	lw $4, 4($sp)
+	jr $25
+	nop
diff --git a/fusl/src/thread/mips/syscall_cp.s b/fusl/src/thread/mips/syscall_cp.s
new file mode 100644
index 0000000..8f76d40
--- /dev/null
+++ b/fusl/src/thread/mips/syscall_cp.s
@@ -0,0 +1,46 @@
+.set    noreorder
+
+.global __cp_begin
+.hidden __cp_begin
+.type   __cp_begin,@function
+.global __cp_end
+.hidden __cp_end
+.type   __cp_end,@function
+.global __cp_cancel
+.hidden __cp_cancel
+.type   __cp_cancel,@function
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
+	subu    $sp, $sp, 32
+__cp_begin:
+	lw      $4, 0($4)
+	bne     $4, $0, __cp_cancel
+	move    $2, $5
+	move    $4, $6
+	move    $5, $7
+	lw      $6, 48($sp)
+	lw      $7, 52($sp)
+	lw      $8, 56($sp)
+	lw      $9, 60($sp)
+	lw      $10,64($sp)
+	sw      $8, 16($sp)
+	sw      $9, 20($sp)
+	sw      $10,24($sp)
+	sw      $2, 28($sp)
+	lw      $2, 28($sp)
+	syscall
+__cp_end:
+	beq     $7, $0, 1f
+	addu    $sp, $sp, 32
+	subu    $2, $0, $2
+1:	jr      $ra
+	nop
+
+__cp_cancel:
+	addu    $sp, $sp, 32
+	lw      $25, %call16(__cancel)($gp)
+	jr      $25
+	nop
diff --git a/fusl/src/thread/mtx_destroy.c b/fusl/src/thread/mtx_destroy.c
new file mode 100644
index 0000000..40a0899
--- /dev/null
+++ b/fusl/src/thread/mtx_destroy.c
@@ -0,0 +1,5 @@
+#include <threads.h>
+
+void mtx_destroy(mtx_t *mtx)
+{
+}
diff --git a/fusl/src/thread/mtx_init.c b/fusl/src/thread/mtx_init.c
new file mode 100644
index 0000000..4826f76
--- /dev/null
+++ b/fusl/src/thread/mtx_init.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int mtx_init(mtx_t *m, int type)
+{
+	*m = (mtx_t){
+		._m_type = ((type&mtx_recursive) ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL),
+	};
+	return thrd_success;
+}
diff --git a/fusl/src/thread/mtx_lock.c b/fusl/src/thread/mtx_lock.c
new file mode 100644
index 0000000..5c2415c
--- /dev/null
+++ b/fusl/src/thread/mtx_lock.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int mtx_lock(mtx_t *m)
+{
+	if (m->_m_type == PTHREAD_MUTEX_NORMAL && !a_cas(&m->_m_lock, 0, EBUSY))
+		return thrd_success;
+	/* Calling mtx_timedlock with a null pointer is an extension.
+	 * It is convenient, here to avoid duplication of the logic
+	 * for return values. */
+	return mtx_timedlock(m, 0);
+}
diff --git a/fusl/src/thread/mtx_timedlock.c b/fusl/src/thread/mtx_timedlock.c
new file mode 100644
index 0000000..bcc152c
--- /dev/null
+++ b/fusl/src/thread/mtx_timedlock.c
@@ -0,0 +1,14 @@
+#include <threads.h>
+#include <errno.h>
+
+int __pthread_mutex_timedlock(mtx_t *restrict, const struct timespec *restrict);
+
+int mtx_timedlock(mtx_t *restrict m, const struct timespec *restrict ts)
+{
+	int ret = __pthread_mutex_timedlock(m, ts);
+	switch (ret) {
+	default:        return thrd_error;
+	case 0:         return thrd_success;
+	case ETIMEDOUT: return thrd_timedout;
+	}
+}
diff --git a/fusl/src/thread/mtx_trylock.c b/fusl/src/thread/mtx_trylock.c
new file mode 100644
index 0000000..61e7694
--- /dev/null
+++ b/fusl/src/thread/mtx_trylock.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_mutex_trylock(mtx_t *);
+
+int mtx_trylock(mtx_t *m)
+{
+	if (m->_m_type == PTHREAD_MUTEX_NORMAL)
+		return (a_cas(&m->_m_lock, 0, EBUSY) & EBUSY) ? thrd_busy : thrd_success;
+
+	int ret = __pthread_mutex_trylock(m);
+	switch (ret) {
+	default:    return thrd_error;
+	case 0:     return thrd_success;
+	case EBUSY: return thrd_busy;
+	}
+}
diff --git a/fusl/src/thread/mtx_unlock.c b/fusl/src/thread/mtx_unlock.c
new file mode 100644
index 0000000..5033ace
--- /dev/null
+++ b/fusl/src/thread/mtx_unlock.c
@@ -0,0 +1,11 @@
+#include <threads.h>
+
+int __pthread_mutex_unlock(mtx_t *);
+
+int mtx_unlock(mtx_t *mtx)
+{
+	/* The only cases where pthread_mutex_unlock can return an
+	 * error are undefined behavior for C11 mtx_unlock, so we can
+	 * assume it does not return an error and simply tail call. */
+	return __pthread_mutex_unlock(mtx);
+}
diff --git a/fusl/src/thread/or1k/__set_thread_area.s b/fusl/src/thread/or1k/__set_thread_area.s
new file mode 100644
index 0000000..44c5d45
--- /dev/null
+++ b/fusl/src/thread/or1k/__set_thread_area.s
@@ -0,0 +1,6 @@
+.global __set_thread_area
+.type   __set_thread_area,@function
+__set_thread_area:
+	l.ori	r10, r3, 0
+	l.jr	r9
+	 l.ori	r11, r0, 0
diff --git a/fusl/src/thread/or1k/__unmapself.s b/fusl/src/thread/or1k/__unmapself.s
new file mode 100644
index 0000000..6c0fa2a
--- /dev/null
+++ b/fusl/src/thread/or1k/__unmapself.s
@@ -0,0 +1,8 @@
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	l.ori	r11, r0, 215 /* __NR_munmap */
+	l.sys	1
+	l.ori	r3, r0, 0
+	l.ori	r11, r0, 93 /* __NR_exit */
+	l.sys	1
diff --git a/fusl/src/thread/or1k/clone.s b/fusl/src/thread/or1k/clone.s
new file mode 100644
index 0000000..02f380b
--- /dev/null
+++ b/fusl/src/thread/or1k/clone.s
@@ -0,0 +1,30 @@
+/* int clone(fn, stack, flags, arg, ptid, tls, ctid)
+ *           r3  r4     r5     r6   sp+0  sp+4 sp+8
+ * sys_clone(flags, stack, ptid, ctid, tls)
+ */
+.global __clone
+.type   __clone,@function
+__clone:
+	l.addi	r4, r4, -8
+	l.sw	0(r4), r3
+	l.sw	4(r4), r6
+	/* (fn, st, fl, ar, pt, tl, ct) => (fl, st, pt, ct, tl) */
+	l.ori	r3, r5, 0
+	l.lwz	r5, 0(r1)
+	l.lwz	r6, 8(r1)
+	l.lwz	r7, 4(r1)
+	l.ori	r11, r0, 220 /* __NR_clone */
+	l.sys	1
+
+	l.sfeqi	r11, 0
+	l.bf	1f
+	 l.nop
+	l.jr	r9
+	 l.nop
+
+1:	l.lwz	r11, 0(r1)
+	l.jalr	r11
+	 l.lwz	r3, 4(r1)
+
+	l.ori	r11, r0, 93 /* __NR_exit */
+	l.sys	1
diff --git a/fusl/src/thread/or1k/syscall_cp.s b/fusl/src/thread/or1k/syscall_cp.s
new file mode 100644
index 0000000..7951166
--- /dev/null
+++ b/fusl/src/thread/or1k/syscall_cp.s
@@ -0,0 +1,29 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
+__cp_begin:
+	l.lwz	r3, 0(r3)
+	l.sfeqi	r3, 0
+	l.bnf	__cp_cancel
+	 l.ori	r11, r4, 0
+	l.ori	r3, r5, 0
+	l.ori	r4, r6, 0
+	l.ori	r5, r7, 0
+	l.ori	r6, r8, 0
+	l.lwz	r7, 0(r1)
+	l.lwz	r8, 4(r1)
+	l.sys	1
+__cp_end:
+	l.jr	r9
+	 l.nop
+__cp_cancel:
+	l.j	__cancel
+	 l.nop
diff --git a/fusl/src/thread/powerpc/__set_thread_area.s b/fusl/src/thread/powerpc/__set_thread_area.s
new file mode 100644
index 0000000..c1a34c1
--- /dev/null
+++ b/fusl/src/thread/powerpc/__set_thread_area.s
@@ -0,0 +1,11 @@
+.text
+.global __set_thread_area
+.type   __set_thread_area, %function
+__set_thread_area:
+	# mov pointer in reg3 into r2
+	mr 2, 3
+	# put 0 into return reg
+	li 3, 0
+	# return
+	blr
+
diff --git a/fusl/src/thread/powerpc/__unmapself.s b/fusl/src/thread/powerpc/__unmapself.s
new file mode 100644
index 0000000..c9360b4
--- /dev/null
+++ b/fusl/src/thread/powerpc/__unmapself.s
@@ -0,0 +1,9 @@
+	.text
+	.global __unmapself
+	.type   __unmapself,%function
+__unmapself:
+	li      0, 91 # __NR_munmap
+	sc
+	li      0, 1 #__NR_exit
+	sc
+	blr
diff --git a/fusl/src/thread/powerpc/clone.s b/fusl/src/thread/powerpc/clone.s
new file mode 100644
index 0000000..dc5c74c
--- /dev/null
+++ b/fusl/src/thread/powerpc/clone.s
@@ -0,0 +1,67 @@
+.text
+.global __clone
+.type __clone, %function
+__clone:
+# int clone(fn, stack, flags, arg, ptid, tls, ctid)
+#            a  b       c     d     e    f    g
+#            3  4       5     6     7    8    9
+# pseudo C code:
+# tid = syscall(SYS_clone,c,b,e,f,g);
+# if (!tid) syscall(SYS_exit, a(d));
+# return tid;
+
+# SYS_clone = 120
+# SYS_exit = 1
+
+# store non-volatile regs r30, r31 on stack in order to put our
+# start func and its arg there
+stwu 30, -16(1)
+stw 31, 4(1)
+
+# save r3 (func) into r30, and r6(arg) into r31
+mr 30, 3
+mr 31, 6
+
+#move c into first arg
+mr 3, 5
+#mr 4, 4
+mr 5, 7
+mr 6, 8
+mr 7, 9
+
+# move syscall number into r0    
+li 0, 120
+
+sc
+
+# check for syscall error
+bns+ 1f # jump to label 1 if no summary overflow.
+#else
+neg 3, 3 #negate the result (errno)
+1:
+# compare sc result with 0
+cmpwi cr7, 3, 0
+
+# if not 0, jump to end
+bne cr7, 2f
+
+#else: we're the child
+#call funcptr: move arg (d) into r3
+mr 3, 31
+#move r30 (funcptr) into CTR reg
+mtctr 30
+# call CTR reg
+bctrl
+# mov SYS_exit into r0 (the exit param is already in r3)
+li 0, 1
+sc
+
+2:
+
+# restore stack
+lwz 30, 0(1)
+lwz 31, 4(1)
+addi 1, 1, 16
+
+blr
+
diff --git a/fusl/src/thread/powerpc/syscall_cp.s b/fusl/src/thread/powerpc/syscall_cp.s
new file mode 100644
index 0000000..77f8938
--- /dev/null
+++ b/fusl/src/thread/powerpc/syscall_cp.s
@@ -0,0 +1,59 @@
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+
+#r0: volatile. may be modified during linkage.
+#r1: stack frame: 16 byte alignment.
+#r2: tls/thread pointer on pp32
+#r3,r4: return values, first args
+#r5-r10: args
+#r11-r12: volatile. may be modified during linkage
+#r13: "small data area" pointer
+#r14 - r30: local vars
+#r31: local or environment pointer
+
+#r1, r14-31: belong to the caller, must be saved and restored
+#r0, r3-r12, ctr, xer: volatile, not preserved
+#r0,r11,r12: may be altered by cross-module call, 
+#"a func cannot depend on that these regs have the values placed by the caller"
+
+#the fields CR2,CR2,CR4 of the cond reg must be preserved
+#LR (link reg) shall contain the funcs return address
+	.text
+	.type   __syscall_cp_asm,%function
+__syscall_cp_asm:
+	# at enter: r3 = pointer to self->cancel, r4: syscall no, r5: first arg, r6: 2nd, r7: 3rd, r8: 4th, r9: 5th, r10: 6th
+__cp_begin:
+	# r3 holds first argument, its a pointer to self->cancel. 
+	# we must compare the dereferenced value with 0 and jump to __cancel if its not
+	
+	lwz 0, 0(3) #deref pointer into r0
+	
+	cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7. 
+	beq+ cr7, 1f #jump to label 1 if r0 was 0
+	
+	b __cp_cancel #else call cancel
+1:
+	#ok, the cancel flag was not set
+	# syscall: number goes to r0, the rest 3-8
+	mr      0, 4                  # put the system call number into r0
+	mr      3, 5                  # Shift the arguments: arg1
+	mr      4, 6                  # arg2
+	mr      5, 7                  # arg3
+	mr      6, 8                  # arg4
+	mr      7, 9                  # arg5
+	mr      8, 10                  # arg6
+	sc
+__cp_end:
+	bnslr+ # return if no summary overflow. 
+	#else negate result.
+	neg 3, 3
+	blr
+__cp_cancel:
+	b __cancel
diff --git a/fusl/src/thread/pthread_atfork.c b/fusl/src/thread/pthread_atfork.c
new file mode 100644
index 0000000..a40d7f6
--- /dev/null
+++ b/fusl/src/thread/pthread_atfork.c
@@ -0,0 +1,48 @@
+#include <pthread.h>
+#include "libc.h"
+
+static struct atfork_funcs {
+	void (*prepare)(void);
+	void (*parent)(void);
+	void (*child)(void);
+	struct atfork_funcs *prev, *next;
+} *funcs;
+
+static volatile int lock[2];
+
+void __fork_handler(int who)
+{
+	struct atfork_funcs *p;
+	if (!funcs) return;
+	if (who < 0) {
+		LOCK(lock);
+		for (p=funcs; p; p = p->next) {
+			if (p->prepare) p->prepare();
+			funcs = p;
+		}
+	} else {
+		for (p=funcs; p; p = p->prev) {
+			if (!who && p->parent) p->parent();
+			else if (who && p->child) p->child();
+			funcs = p;
+		}
+		UNLOCK(lock);
+	}
+}
+
+int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(void))
+{
+	struct atfork_funcs *new = malloc(sizeof *new);
+	if (!new) return -1;
+
+	LOCK(lock);
+	new->next = funcs;
+	new->prev = 0;
+	new->prepare = prepare;
+	new->parent = parent;
+	new->child = child;
+	if (funcs) funcs->prev = new;
+	funcs = new;
+	UNLOCK(lock);
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_destroy.c b/fusl/src/thread/pthread_attr_destroy.c
new file mode 100644
index 0000000..b5845dd
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_attr_destroy(pthread_attr_t *a)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_get.c b/fusl/src/thread/pthread_attr_get.c
new file mode 100644
index 0000000..3d296bf
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_get.c
@@ -0,0 +1,98 @@
+#include "pthread_impl.h"
+
+int pthread_attr_getdetachstate(const pthread_attr_t *a, int *state)
+{
+	*state = a->_a_detach;
+	return 0;
+}
+int pthread_attr_getguardsize(const pthread_attr_t *restrict a, size_t *restrict size)
+{
+	*size = a->_a_guardsize + DEFAULT_GUARD_SIZE;
+	return 0;
+}
+
+int pthread_attr_getinheritsched(const pthread_attr_t *restrict a, int *restrict inherit)
+{
+	*inherit = a->_a_sched;
+	return 0;
+}
+
+int pthread_attr_getschedparam(const pthread_attr_t *restrict a, struct sched_param *restrict param)
+{
+	param->sched_priority = a->_a_prio;
+	return 0;
+}
+
+int pthread_attr_getschedpolicy(const pthread_attr_t *restrict a, int *restrict policy)
+{
+	*policy = a->_a_policy;
+	return 0;
+}
+
+int pthread_attr_getscope(const pthread_attr_t *restrict a, int *restrict scope)
+{
+	*scope = PTHREAD_SCOPE_SYSTEM;
+	return 0;
+}
+
+int pthread_attr_getstack(const pthread_attr_t *restrict a, void **restrict addr, size_t *restrict size)
+{
+	if (!a->_a_stackaddr)
+		return EINVAL;
+	*size = a->_a_stacksize + DEFAULT_STACK_SIZE;
+	*addr = (void *)(a->_a_stackaddr - *size);
+	return 0;
+}
+
+int pthread_attr_getstacksize(const pthread_attr_t *restrict a, size_t *restrict size)
+{
+	*size = a->_a_stacksize + DEFAULT_STACK_SIZE;
+	return 0;
+}
+
+int pthread_barrierattr_getpshared(const pthread_barrierattr_t *restrict a, int *restrict pshared)
+{
+	*pshared = !!a->__attr;
+	return 0;
+}
+
+int pthread_condattr_getclock(const pthread_condattr_t *restrict a, clockid_t *restrict clk)
+{
+	*clk = a->__attr & 0x7fffffff;
+	return 0;
+}
+
+int pthread_condattr_getpshared(const pthread_condattr_t *restrict a, int *restrict pshared)
+{
+	*pshared = a->__attr>>31;
+	return 0;
+}
+
+int pthread_mutexattr_getprotocol(const pthread_mutexattr_t *restrict a, int *restrict protocol)
+{
+	*protocol = PTHREAD_PRIO_NONE;
+	return 0;
+}
+int pthread_mutexattr_getpshared(const pthread_mutexattr_t *restrict a, int *restrict pshared)
+{
+	*pshared = a->__attr / 128U % 2;
+	return 0;
+}
+
+int pthread_mutexattr_getrobust(const pthread_mutexattr_t *restrict a, int *restrict robust)
+{
+	*robust = a->__attr / 4U % 2;
+	return 0;
+}
+
+int pthread_mutexattr_gettype(const pthread_mutexattr_t *restrict a, int *restrict type)
+{
+	*type = a->__attr & 3;
+	return 0;
+}
+
+int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t *restrict a, int *restrict pshared)
+{
+	*pshared = a->__attr[0];
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_init.c b/fusl/src/thread/pthread_attr_init.c
new file mode 100644
index 0000000..969e0a3
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_init.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_attr_init(pthread_attr_t *a)
+{
+	*a = (pthread_attr_t){0};
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setdetachstate.c b/fusl/src/thread/pthread_attr_setdetachstate.c
new file mode 100644
index 0000000..1b71278
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setdetachstate.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setdetachstate(pthread_attr_t *a, int state)
+{
+	if (state > 1U) return EINVAL;
+	a->_a_detach = state;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setguardsize.c b/fusl/src/thread/pthread_attr_setguardsize.c
new file mode 100644
index 0000000..9f21d24
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setguardsize.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setguardsize(pthread_attr_t *a, size_t size)
+{
+	if (size > SIZE_MAX/8) return EINVAL;
+	a->_a_guardsize = size - DEFAULT_GUARD_SIZE;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setinheritsched.c b/fusl/src/thread/pthread_attr_setinheritsched.c
new file mode 100644
index 0000000..c91d8f8
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setinheritsched.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setinheritsched(pthread_attr_t *a, int inherit)
+{
+	if (inherit > 1U) return EINVAL;
+	a->_a_sched = inherit;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setschedparam.c b/fusl/src/thread/pthread_attr_setschedparam.c
new file mode 100644
index 0000000..d4c1204
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setschedparam.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setschedparam(pthread_attr_t *restrict a, const struct sched_param *restrict param)
+{
+	a->_a_prio = param->sched_priority;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setschedpolicy.c b/fusl/src/thread/pthread_attr_setschedpolicy.c
new file mode 100644
index 0000000..bb71f39
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setschedpolicy.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setschedpolicy(pthread_attr_t *a, int policy)
+{
+	a->_a_policy = policy;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setscope.c b/fusl/src/thread/pthread_attr_setscope.c
new file mode 100644
index 0000000..46b520c
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setscope.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setscope(pthread_attr_t *a, int scope)
+{
+	switch (scope) {
+	case PTHREAD_SCOPE_SYSTEM:
+		return 0;
+	case PTHREAD_SCOPE_PROCESS:
+		return ENOTSUP;
+	default:
+		return EINVAL;
+	}
+}
diff --git a/fusl/src/thread/pthread_attr_setstack.c b/fusl/src/thread/pthread_attr_setstack.c
new file mode 100644
index 0000000..61707a3
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setstack.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setstack(pthread_attr_t *a, void *addr, size_t size)
+{
+	if (size-PTHREAD_STACK_MIN > SIZE_MAX/4) return EINVAL;
+	a->_a_stackaddr = (size_t)addr + size;
+	a->_a_stacksize = size - DEFAULT_STACK_SIZE;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_attr_setstacksize.c b/fusl/src/thread/pthread_attr_setstacksize.c
new file mode 100644
index 0000000..09d3fda
--- /dev/null
+++ b/fusl/src/thread/pthread_attr_setstacksize.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_attr_setstacksize(pthread_attr_t *a, size_t size)
+{
+	if (size-PTHREAD_STACK_MIN > SIZE_MAX/4) return EINVAL;
+	a->_a_stackaddr = 0;
+	a->_a_stacksize = size - DEFAULT_STACK_SIZE;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrier_destroy.c b/fusl/src/thread/pthread_barrier_destroy.c
new file mode 100644
index 0000000..4ce0b2e
--- /dev/null
+++ b/fusl/src/thread/pthread_barrier_destroy.c
@@ -0,0 +1,15 @@
+#include "pthread_impl.h"
+
+int pthread_barrier_destroy(pthread_barrier_t *b)
+{
+	if (b->_b_limit < 0) {
+		if (b->_b_lock) {
+			int v;
+			a_or(&b->_b_lock, INT_MIN);
+			while ((v = b->_b_lock) & INT_MAX)
+				__wait(&b->_b_lock, 0, v, 0);
+		}
+		__vm_wait();
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrier_init.c b/fusl/src/thread/pthread_barrier_init.c
new file mode 100644
index 0000000..4c3cb28
--- /dev/null
+++ b/fusl/src/thread/pthread_barrier_init.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_barrier_init(pthread_barrier_t *restrict b, const pthread_barrierattr_t *restrict a, unsigned count)
+{
+	if (count-1 > INT_MAX-1) return EINVAL;
+	*b = (pthread_barrier_t){ ._b_limit = count-1 | (a?a->__attr:0) };
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrier_wait.c b/fusl/src/thread/pthread_barrier_wait.c
new file mode 100644
index 0000000..06b83db
--- /dev/null
+++ b/fusl/src/thread/pthread_barrier_wait.c
@@ -0,0 +1,111 @@
+#include "pthread_impl.h"
+
+static int pshared_barrier_wait(pthread_barrier_t *b)
+{
+	int limit = (b->_b_limit & INT_MAX) + 1;
+	int ret = 0;
+	int v, w;
+
+	if (limit==1) return PTHREAD_BARRIER_SERIAL_THREAD;
+
+	while ((v=a_cas(&b->_b_lock, 0, limit)))
+		__wait(&b->_b_lock, &b->_b_waiters, v, 0);
+
+	/* Wait for <limit> threads to get to the barrier */
+	if (++b->_b_count == limit) {
+		a_store(&b->_b_count, 0);
+		ret = PTHREAD_BARRIER_SERIAL_THREAD;
+		if (b->_b_waiters2) __wake(&b->_b_count, -1, 0);
+	} else {
+		a_store(&b->_b_lock, 0);
+		if (b->_b_waiters) __wake(&b->_b_lock, 1, 0);
+		while ((v=b->_b_count)>0)
+			__wait(&b->_b_count, &b->_b_waiters2, v, 0);
+	}
+
+	__vm_lock();
+
+	/* Ensure all threads have a vm lock before proceeding */
+	if (a_fetch_add(&b->_b_count, -1)==1-limit) {
+		a_store(&b->_b_count, 0);
+		if (b->_b_waiters2) __wake(&b->_b_count, -1, 0);
+	} else {
+		while ((v=b->_b_count))
+			__wait(&b->_b_count, &b->_b_waiters2, v, 0);
+	}
+	
+	/* Perform a recursive unlock suitable for self-sync'd destruction */
+	do {
+		v = b->_b_lock;
+		w = b->_b_waiters;
+	} while (a_cas(&b->_b_lock, v, v==INT_MIN+1 ? 0 : v-1) != v);
+
+	/* Wake a thread waiting to reuse or destroy the barrier */
+	if (v==INT_MIN+1 || (v==1 && w))
+		__wake(&b->_b_lock, 1, 0);
+
+	__vm_unlock();
+
+	return ret;
+}
+
+struct instance
+{
+	volatile int count;
+	volatile int last;
+	volatile int waiters;
+	volatile int finished;
+};
+
+int pthread_barrier_wait(pthread_barrier_t *b)
+{
+	int limit = b->_b_limit;
+	struct instance *inst;
+
+	/* Trivial case: count was set at 1 */
+	if (!limit) return PTHREAD_BARRIER_SERIAL_THREAD;
+
+	/* Process-shared barriers require a separate, inefficient wait */
+	if (limit < 0) return pshared_barrier_wait(b);
+
+	/* Otherwise we need a lock on the barrier object */
+	while (a_swap(&b->_b_lock, 1))
+		__wait(&b->_b_lock, &b->_b_waiters, 1, 1);
+	inst = b->_b_inst;
+
+	/* First thread to enter the barrier becomes the "instance owner" */
+	if (!inst) {
+		struct instance new_inst = { 0 };
+		int spins = 200;
+		b->_b_inst = inst = &new_inst;
+		a_store(&b->_b_lock, 0);
+		if (b->_b_waiters) __wake(&b->_b_lock, 1, 1);
+		while (spins-- && !inst->finished)
+			a_spin();
+		a_inc(&inst->finished);
+		while (inst->finished == 1)
+			__syscall(SYS_futex,&inst->finished,FUTEX_WAIT|128,1,0) != -ENOSYS
+			|| __syscall(SYS_futex,&inst->finished,FUTEX_WAIT,1,0);
+		return PTHREAD_BARRIER_SERIAL_THREAD;
+	}
+
+	/* Last thread to enter the barrier wakes all non-instance-owners */
+	if (++inst->count == limit) {
+		b->_b_inst = 0;
+		a_store(&b->_b_lock, 0);
+		if (b->_b_waiters) __wake(&b->_b_lock, 1, 1);
+		a_store(&inst->last, 1);
+		if (inst->waiters)
+			__wake(&inst->last, -1, 1);
+	} else {
+		a_store(&b->_b_lock, 0);
+		if (b->_b_waiters) __wake(&b->_b_lock, 1, 1);
+		__wait(&inst->last, &inst->waiters, 0, 1);
+	}
+
+	/* Last thread to exit the barrier wakes the instance owner */
+	if (a_fetch_add(&inst->count,-1)==1 && a_fetch_add(&inst->finished,1))
+		__wake(&inst->finished, 1, 1);
+
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrierattr_destroy.c b/fusl/src/thread/pthread_barrierattr_destroy.c
new file mode 100644
index 0000000..adec738
--- /dev/null
+++ b/fusl/src/thread/pthread_barrierattr_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_barrierattr_destroy(pthread_barrierattr_t *a)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrierattr_init.c b/fusl/src/thread/pthread_barrierattr_init.c
new file mode 100644
index 0000000..fa742bb
--- /dev/null
+++ b/fusl/src/thread/pthread_barrierattr_init.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_barrierattr_init(pthread_barrierattr_t *a)
+{
+	*a = (pthread_barrierattr_t){0};
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_barrierattr_setpshared.c b/fusl/src/thread/pthread_barrierattr_setpshared.c
new file mode 100644
index 0000000..b391461
--- /dev/null
+++ b/fusl/src/thread/pthread_barrierattr_setpshared.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_barrierattr_setpshared(pthread_barrierattr_t *a, int pshared)
+{
+	a->__attr = pshared ? INT_MIN : 0;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_cancel.c b/fusl/src/thread/pthread_cancel.c
new file mode 100644
index 0000000..a21c386
--- /dev/null
+++ b/fusl/src/thread/pthread_cancel.c
@@ -0,0 +1,96 @@
+#define _GNU_SOURCE
+#include <string.h>
+#include "pthread_impl.h"
+#include "syscall.h"
+#include "libc.h"
+
+__attribute__((__visibility__("hidden")))
+long __cancel(), __syscall_cp_asm(), __syscall_cp_c();
+
+long __cancel()
+{
+	pthread_t self = __pthread_self();
+	if (self->canceldisable == PTHREAD_CANCEL_ENABLE || self->cancelasync)
+		pthread_exit(PTHREAD_CANCELED);
+	self->canceldisable = PTHREAD_CANCEL_DISABLE;
+	return -ECANCELED;
+}
+
+long __syscall_cp_asm(volatile void *, syscall_arg_t,
+                      syscall_arg_t, syscall_arg_t, syscall_arg_t,
+                      syscall_arg_t, syscall_arg_t, syscall_arg_t);
+
+long __syscall_cp_c(syscall_arg_t nr,
+                    syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
+                    syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
+{
+	pthread_t self;
+	long r;
+	int st;
+
+	if ((st=(self=__pthread_self())->canceldisable)
+	    && (st==PTHREAD_CANCEL_DISABLE || nr==SYS_close))
+		return __syscall(nr, u, v, w, x, y, z);
+
+	r = __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z);
+	if (r==-EINTR && nr!=SYS_close && self->cancel &&
+	    self->canceldisable != PTHREAD_CANCEL_DISABLE)
+		r = __cancel();
+	return r;
+}
+
+static void _sigaddset(sigset_t *set, int sig)
+{
+	unsigned s = sig-1;
+	set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1);
+}
+
+__attribute__((__visibility__("hidden")))
+extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1];
+
+static void cancel_handler(int sig, siginfo_t *si, void *ctx)
+{
+	pthread_t self = __pthread_self();
+	ucontext_t *uc = ctx;
+	uintptr_t pc = uc->uc_mcontext.MC_PC;
+
+	a_barrier();
+	if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return;
+
+	_sigaddset(&uc->uc_sigmask, SIGCANCEL);
+
+	if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
+		uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel;
+		return;
+	}
+
+	__syscall(SYS_tkill, self->tid, SIGCANCEL);
+}
+
+void __testcancel()
+{
+	pthread_t self = __pthread_self();
+	if (self->cancel && !self->canceldisable)
+		__cancel();
+}
+
+static void init_cancellation()
+{
+	struct sigaction sa = {
+		.sa_flags = SA_SIGINFO | SA_RESTART,
+		.sa_sigaction = cancel_handler
+	};
+	memset(&sa.sa_mask, -1, _NSIG/8);
+	__libc_sigaction(SIGCANCEL, &sa, 0);
+}
+
+int pthread_cancel(pthread_t t)
+{
+	static int init;
+	if (!init) {
+		init_cancellation();
+		init = 1;
+	}
+	a_store(&t->cancel, 1);
+	return pthread_kill(t, SIGCANCEL);
+}
diff --git a/fusl/src/thread/pthread_cleanup_push.c b/fusl/src/thread/pthread_cleanup_push.c
new file mode 100644
index 0000000..9b21764
--- /dev/null
+++ b/fusl/src/thread/pthread_cleanup_push.c
@@ -0,0 +1,20 @@
+#include "pthread_impl.h"
+
+static void dummy(struct __ptcb *cb)
+{
+}
+weak_alias(dummy, __do_cleanup_push);
+weak_alias(dummy, __do_cleanup_pop);
+
+void _pthread_cleanup_push(struct __ptcb *cb, void (*f)(void *), void *x)
+{
+	cb->__f = f;
+	cb->__x = x;
+	__do_cleanup_push(cb);
+}
+
+void _pthread_cleanup_pop(struct __ptcb *cb, int run)
+{
+	__do_cleanup_pop(cb);
+	if (run) cb->__f(cb->__x);
+}
diff --git a/fusl/src/thread/pthread_cond_broadcast.c b/fusl/src/thread/pthread_cond_broadcast.c
new file mode 100644
index 0000000..69f840f
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_broadcast.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+
+int __private_cond_signal(pthread_cond_t *, int);
+
+int pthread_cond_broadcast(pthread_cond_t *c)
+{
+	if (!c->_c_shared) return __private_cond_signal(c, -1);
+	if (!c->_c_waiters) return 0;
+	a_inc(&c->_c_seq);
+	__wake(&c->_c_seq, -1, 0);
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_cond_destroy.c b/fusl/src/thread/pthread_cond_destroy.c
new file mode 100644
index 0000000..8c55516
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_destroy.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+
+int pthread_cond_destroy(pthread_cond_t *c)
+{
+	if (c->_c_shared && c->_c_waiters) {
+		int cnt;
+		a_or(&c->_c_waiters, 0x80000000);
+		a_inc(&c->_c_seq);
+		__wake(&c->_c_seq, -1, 0);
+		while ((cnt = c->_c_waiters) & 0x7fffffff)
+			__wait(&c->_c_waiters, 0, cnt, 0);
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_cond_init.c b/fusl/src/thread/pthread_cond_init.c
new file mode 100644
index 0000000..8c484dd
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_init.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+
+int pthread_cond_init(pthread_cond_t *restrict c, const pthread_condattr_t *restrict a)
+{
+	*c = (pthread_cond_t){0};
+	if (a) {
+		c->_c_clock = a->__attr & 0x7fffffff;
+		if (a->__attr>>31) c->_c_shared = (void *)-1;
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_cond_signal.c b/fusl/src/thread/pthread_cond_signal.c
new file mode 100644
index 0000000..119c00a
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_signal.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+
+int __private_cond_signal(pthread_cond_t *, int);
+
+int pthread_cond_signal(pthread_cond_t *c)
+{
+	if (!c->_c_shared) return __private_cond_signal(c, 1);
+	if (!c->_c_waiters) return 0;
+	a_inc(&c->_c_seq);
+	__wake(&c->_c_seq, 1, 0);
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_cond_timedwait.c b/fusl/src/thread/pthread_cond_timedwait.c
new file mode 100644
index 0000000..3526ecf
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_timedwait.c
@@ -0,0 +1,214 @@
+#include "pthread_impl.h"
+
+void __pthread_testcancel(void);
+int __pthread_mutex_lock(pthread_mutex_t *);
+int __pthread_mutex_unlock(pthread_mutex_t *);
+int __pthread_setcancelstate(int, int *);
+
+/*
+ * struct waiter
+ *
+ * Waiter objects have automatic storage on the waiting thread, and
+ * are used in building a linked list representing waiters currently
+ * waiting on the condition variable or a group of waiters woken
+ * together by a broadcast or signal; in the case of signal, this is a
+ * degenerate list of one member.
+ *
+ * Waiter lists attached to the condition variable itself are
+ * protected by the lock on the cv. Detached waiter lists are never
+ * modified again, but can only be traversed in reverse order, and are
+ * protected by the "barrier" locks in each node, which are unlocked
+ * in turn to control wake order.
+ *
+ * Since process-shared cond var semantics do not necessarily allow
+ * one thread to see another's automatic storage (they may be in
+ * different processes), the waiter list is not used for the
+ * process-shared case, but the structure is still used to store data
+ * needed by the cancellation cleanup handler.
+ */
+
+struct waiter {
+	struct waiter *prev, *next;
+	volatile int state, barrier;
+	volatile int *notify;
+};
+
+/* Self-synchronized-destruction-safe lock functions */
+
+static inline void lock(volatile int *l)
+{
+	if (a_cas(l, 0, 1)) {
+		a_cas(l, 1, 2);
+		do __wait(l, 0, 2, 1);
+		while (a_cas(l, 0, 2));
+	}
+}
+
+static inline void unlock(volatile int *l)
+{
+	if (a_swap(l, 0)==2)
+		__wake(l, 1, 1);
+}
+
+static inline void unlock_requeue(volatile int *l, volatile int *r, int w)
+{
+	a_store(l, 0);
+	if (w) __wake(l, 1, 1);
+	else __syscall(SYS_futex, l, FUTEX_REQUEUE|128, 0, 1, r) != -ENOSYS
+		|| __syscall(SYS_futex, l, FUTEX_REQUEUE, 0, 1, r);
+}
+
+enum {
+	WAITING,
+	SIGNALED,
+	LEAVING,
+};
+
+int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts)
+{
+	struct waiter node = { 0 };
+	int e, seq, clock = c->_c_clock, cs, shared=0, oldstate, tmp;
+	volatile int *fut;
+
+	if ((m->_m_type&15) && (m->_m_lock&INT_MAX) != __pthread_self()->tid)
+		return EPERM;
+
+	if (ts && ts->tv_nsec >= 1000000000UL)
+		return EINVAL;
+
+	__pthread_testcancel();
+
+	if (c->_c_shared) {
+		shared = 1;
+		fut = &c->_c_seq;
+		seq = c->_c_seq;
+		a_inc(&c->_c_waiters);
+	} else {
+		lock(&c->_c_lock);
+
+		seq = node.barrier = 2;
+		fut = &node.barrier;
+		node.state = WAITING;
+		node.next = c->_c_head;
+		c->_c_head = &node;
+		if (!c->_c_tail) c->_c_tail = &node;
+		else node.next->prev = &node;
+
+		unlock(&c->_c_lock);
+	}
+
+	__pthread_mutex_unlock(m);
+
+	__pthread_setcancelstate(PTHREAD_CANCEL_MASKED, &cs);
+	if (cs == PTHREAD_CANCEL_DISABLE) __pthread_setcancelstate(cs, 0);
+
+	do e = __timedwait_cp(fut, seq, clock, ts, !shared);
+	while (*fut==seq && (!e || e==EINTR));
+	if (e == EINTR) e = 0;
+
+	if (shared) {
+		/* Suppress cancellation if a signal was potentially
+		 * consumed; this is a legitimate form of spurious
+		 * wake even if not. */
+		if (e == ECANCELED && c->_c_seq != seq) e = 0;
+		if (a_fetch_add(&c->_c_waiters, -1) == -0x7fffffff)
+			__wake(&c->_c_waiters, 1, 0);
+		oldstate = WAITING;
+		goto relock;
+	}
+
+	oldstate = a_cas(&node.state, WAITING, LEAVING);
+
+	if (oldstate == WAITING) {
+		/* Access to cv object is valid because this waiter was not
+		 * yet signaled and a new signal/broadcast cannot return
+		 * after seeing a LEAVING waiter without getting notified
+		 * via the futex notify below. */
+
+		lock(&c->_c_lock);
+		
+		if (c->_c_head == &node) c->_c_head = node.next;
+		else if (node.prev) node.prev->next = node.next;
+		if (c->_c_tail == &node) c->_c_tail = node.prev;
+		else if (node.next) node.next->prev = node.prev;
+		
+		unlock(&c->_c_lock);
+
+		if (node.notify) {
+			if (a_fetch_add(node.notify, -1)==1)
+				__wake(node.notify, 1, 1);
+		}
+	} else {
+		/* Lock barrier first to control wake order. */
+		lock(&node.barrier);
+	}
+
+relock:
+	/* Errors locking the mutex override any existing error or
+	 * cancellation, since the caller must see them to know the
+	 * state of the mutex. */
+	if ((tmp = pthread_mutex_lock(m))) e = tmp;
+
+	if (oldstate == WAITING) goto done;
+
+	if (!node.next) a_inc(&m->_m_waiters);
+
+	/* Unlock the barrier that's holding back the next waiter, and
+	 * either wake it or requeue it to the mutex. */
+	if (node.prev)
+		unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & 128);
+	else
+		a_dec(&m->_m_waiters);
+
+	/* Since a signal was consumed, cancellation is not permitted. */
+	if (e == ECANCELED) e = 0;
+
+done:
+	__pthread_setcancelstate(cs, 0);
+
+	if (e == ECANCELED) {
+		__pthread_testcancel();
+		__pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0);
+	}
+
+	return e;
+}
+
+int __private_cond_signal(pthread_cond_t *c, int n)
+{
+	struct waiter *p, *first=0;
+	volatile int ref = 0;
+	int cur;
+
+	lock(&c->_c_lock);
+	for (p=c->_c_tail; n && p; p=p->prev) {
+		if (a_cas(&p->state, WAITING, SIGNALED) != WAITING) {
+			ref++;
+			p->notify = &ref;
+		} else {
+			n--;
+			if (!first) first=p;
+		}
+	}
+	/* Split the list, leaving any remainder on the cv. */
+	if (p) {
+		if (p->next) p->next->prev = 0;
+		p->next = 0;
+	} else {
+		c->_c_head = 0;
+	}
+	c->_c_tail = p;
+	unlock(&c->_c_lock);
+
+	/* Wait for any waiters in the LEAVING state to remove
+	 * themselves from the list before returning or allowing
+	 * signaled threads to proceed. */
+	while ((cur = ref)) __wait(&ref, 0, cur, 1);
+
+	/* Allow first signaled waiter, if any, to proceed. */
+	if (first) unlock(&first->barrier);
+
+	return 0;
+}
+
+weak_alias(__pthread_cond_timedwait, pthread_cond_timedwait);
diff --git a/fusl/src/thread/pthread_cond_wait.c b/fusl/src/thread/pthread_cond_wait.c
new file mode 100644
index 0000000..8735bf1
--- /dev/null
+++ b/fusl/src/thread/pthread_cond_wait.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_cond_wait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m)
+{
+	return pthread_cond_timedwait(c, m, 0);
+}
diff --git a/fusl/src/thread/pthread_condattr_destroy.c b/fusl/src/thread/pthread_condattr_destroy.c
new file mode 100644
index 0000000..c54ec41
--- /dev/null
+++ b/fusl/src/thread/pthread_condattr_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_condattr_destroy(pthread_condattr_t *a)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_condattr_init.c b/fusl/src/thread/pthread_condattr_init.c
new file mode 100644
index 0000000..a41741b
--- /dev/null
+++ b/fusl/src/thread/pthread_condattr_init.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_condattr_init(pthread_condattr_t *a)
+{
+	*a = (pthread_condattr_t){0};
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_condattr_setclock.c b/fusl/src/thread/pthread_condattr_setclock.c
new file mode 100644
index 0000000..7112594
--- /dev/null
+++ b/fusl/src/thread/pthread_condattr_setclock.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_condattr_setclock(pthread_condattr_t *a, clockid_t clk)
+{
+	if (clk < 0 || clk-2U < 2) return EINVAL;
+	a->__attr &= 0x80000000;
+	a->__attr |= clk;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_condattr_setpshared.c b/fusl/src/thread/pthread_condattr_setpshared.c
new file mode 100644
index 0000000..51453e0
--- /dev/null
+++ b/fusl/src/thread/pthread_condattr_setpshared.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_condattr_setpshared(pthread_condattr_t *a, int pshared)
+{
+	if (pshared > 1U) return EINVAL;
+	a->__attr &= 0x7fffffff;
+	a->__attr |= (unsigned)pshared<<31;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_create.c b/fusl/src/thread/pthread_create.c
new file mode 100644
index 0000000..e7df34a
--- /dev/null
+++ b/fusl/src/thread/pthread_create.c
@@ -0,0 +1,304 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "stdio_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+#include <string.h>
+#include <stddef.h>
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+int __mprotect(void *, size_t, int);
+
+static void dummy_0()
+{
+}
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
+weak_alias(dummy_0, __pthread_tsd_run_dtors);
+weak_alias(dummy_0, __do_orphaned_stdio_locks);
+weak_alias(dummy_0, __dl_thread_cleanup);
+
+_Noreturn void __pthread_exit(void *result)
+{
+	pthread_t self = __pthread_self();
+	sigset_t set;
+
+	self->canceldisable = 1;
+	self->cancelasync = 0;
+	self->result = result;
+
+	while (self->cancelbuf) {
+		void (*f)(void *) = self->cancelbuf->__f;
+		void *x = self->cancelbuf->__x;
+		self->cancelbuf = self->cancelbuf->__next;
+		f(x);
+	}
+
+	__pthread_tsd_run_dtors();
+
+	__lock(self->exitlock);
+
+	/* Mark this thread dead before decrementing count */
+	__lock(self->killlock);
+	self->dead = 1;
+
+	/* Block all signals before decrementing the live thread count.
+	 * This is important to ensure that dynamically allocated TLS
+	 * is not under-allocated/over-committed, and possibly for other
+	 * reasons as well. */
+	__block_all_sigs(&set);
+
+	/* Wait to unlock the kill lock, which governs functions like
+	 * pthread_kill which target a thread id, until signals have
+	 * been blocked. This precludes observation of the thread id
+	 * as a live thread (with application code running in it) after
+	 * the thread was reported dead by ESRCH being returned. */
+	__unlock(self->killlock);
+
+	/* It's impossible to determine whether this is "the last thread"
+	 * until performing the atomic decrement, since multiple threads
+	 * could exit at the same time. For the last thread, revert the
+	 * decrement and unblock signals to give the atexit handlers and
+	 * stdio cleanup code a consistent state. */
+	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
+		libc.threads_minus_1 = 0;
+		__restore_sigs(&set);
+		exit(0);
+	}
+
+	/* Process robust list in userspace to handle non-pshared mutexes
+	 * and the detached thread case where the robust list head will
+	 * be invalid when the kernel would process it. */
+	__vm_lock();
+	volatile void *volatile *rp;
+	while ((rp=self->robust_list.head) && rp != &self->robust_list.head) {
+		pthread_mutex_t *m = (void *)((char *)rp
+			- offsetof(pthread_mutex_t, _m_next));
+		int waiters = m->_m_waiters;
+		int priv = (m->_m_type & 128) ^ 128;
+		self->robust_list.pending = rp;
+		self->robust_list.head = *rp;
+		int cont = a_swap(&m->_m_lock, self->tid|0x40000000);
+		self->robust_list.pending = 0;
+		if (cont < 0 || waiters)
+			__wake(&m->_m_lock, 1, priv);
+	}
+	__vm_unlock();
+
+	__do_orphaned_stdio_locks();
+	__dl_thread_cleanup();
+
+	if (self->detached && self->map_base) {
+		/* Detached threads must avoid the kernel clear_child_tid
+		 * feature, since the virtual address will have been
+		 * unmapped and possibly already reused by a new mapping
+		 * at the time the kernel would perform the write. In
+		 * the case of threads that started out detached, the
+		 * initial clone flags are correct, but if the thread was
+		 * detached later (== 2), we need to clear it here. */
+		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
+
+		/* Robust list will no longer be valid, and was already
+		 * processed above, so unregister it with the kernel. */
+		if (self->robust_list.off)
+			__syscall(SYS_set_robust_list, 0, 3*sizeof(long));
+
+		/* Since __unmapself bypasses the normal munmap code path,
+		 * explicitly wait for vmlock holders first. */
+		__vm_wait();
+
+		/* The following call unmaps the thread's stack mapping
+		 * and then exits without touching the stack. */
+		__unmapself(self->map_base, self->map_size);
+	}
+
+	for (;;) __syscall(SYS_exit, 0);
+}
+
+void __do_cleanup_push(struct __ptcb *cb)
+{
+	struct pthread *self = __pthread_self();
+	cb->__next = self->cancelbuf;
+	self->cancelbuf = cb;
+}
+
+void __do_cleanup_pop(struct __ptcb *cb)
+{
+	__pthread_self()->cancelbuf = cb->__next;
+}
+
+static int start(void *p)
+{
+	pthread_t self = p;
+	if (self->startlock[0]) {
+		__wait(self->startlock, 0, 1, 1);
+		if (self->startlock[0]) {
+			self->detached = 2;
+			pthread_exit(0);
+		}
+		__restore_sigs(self->sigmask);
+	}
+	if (self->unblock_cancel)
+		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK,
+			SIGPT_SET, 0, _NSIG/8);
+	__pthread_exit(self->start(self->start_arg));
+	return 0;
+}
+
+static int start_c11(void *p)
+{
+	pthread_t self = p;
+	int (*start)(void*) = (int(*)(void*)) self->start;
+	__pthread_exit((void *)(uintptr_t)start(self->start_arg));
+	return 0;
+}
+
+#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
+
+/* pthread_key_create.c overrides this */
+static volatile size_t dummy = 0;
+weak_alias(dummy, __pthread_tsd_size);
+static void *dummy_tsd[1] = { 0 };
+weak_alias(dummy_tsd, __pthread_tsd_main);
+
+volatile int __block_new_threads = 0;
+
+static FILE *volatile dummy_file = 0;
+weak_alias(dummy_file, __stdin_used);
+weak_alias(dummy_file, __stdout_used);
+weak_alias(dummy_file, __stderr_used);
+
+static void init_file_lock(FILE *f)
+{
+	if (f && f->lock<0) f->lock = 0;
+}
+
+void *__copy_tls(unsigned char *);
+
+int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp, void *(*entry)(void *), void *restrict arg)
+{
+	int ret, c11 = (attrp == __ATTRP_C11_THREAD);
+	size_t size, guard;
+	struct pthread *self, *new;
+	unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit;
+	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
+		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
+		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
+	int do_sched = 0;
+	pthread_attr_t attr = {0};
+
+	if (!libc.can_do_threads) return ENOSYS;
+	self = __pthread_self();
+	if (!libc.threaded) {
+		for (FILE *f=*__ofl_lock(); f; f=f->next)
+			init_file_lock(f);
+		__ofl_unlock();
+		init_file_lock(__stdin_used);
+		init_file_lock(__stdout_used);
+		init_file_lock(__stderr_used);
+		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
+		self->tsd = (void **)__pthread_tsd_main;
+		libc.threaded = 1;
+	}
+	if (attrp && !c11) attr = *attrp;
+
+	__acquire_ptc();
+	if (__block_new_threads) __wait(&__block_new_threads, 0, 1, 1);
+
+	if (attr._a_stackaddr) {
+		size_t need = libc.tls_size + __pthread_tsd_size;
+		size = attr._a_stacksize + DEFAULT_STACK_SIZE;
+		stack = (void *)(attr._a_stackaddr & -16);
+		stack_limit = (void *)(attr._a_stackaddr - size);
+		/* Use application-provided stack for TLS only when
+		 * it does not take more than ~12% or 2k of the
+		 * application's stack space. */
+		if (need < size/8 && need < 2048) {
+			tsd = stack - __pthread_tsd_size;
+			stack = tsd - libc.tls_size;
+			memset(stack, 0, need);
+		} else {
+			size = ROUND(need);
+			guard = 0;
+		}
+	} else {
+		guard = ROUND(DEFAULT_GUARD_SIZE + attr._a_guardsize);
+		size = guard + ROUND(DEFAULT_STACK_SIZE + attr._a_stacksize
+			+ libc.tls_size +  __pthread_tsd_size);
+	}
+
+	if (!tsd) {
+		if (guard) {
+			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			if (map == MAP_FAILED) goto fail;
+			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)
+			    && errno != ENOSYS) {
+				__munmap(map, size);
+				goto fail;
+			}
+		} else {
+			map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			if (map == MAP_FAILED) goto fail;
+		}
+		tsd = map + size - __pthread_tsd_size;
+		if (!stack) {
+			stack = tsd - libc.tls_size;
+			stack_limit = map + guard;
+		}
+	}
+
+	new = __copy_tls(tsd - libc.tls_size);
+	new->map_base = map;
+	new->map_size = size;
+	new->stack = stack;
+	new->stack_size = stack - stack_limit;
+	new->start = entry;
+	new->start_arg = arg;
+	new->self = new;
+	new->tsd = (void *)tsd;
+	new->locale = &libc.global_locale;
+	if (attr._a_detach) {
+		new->detached = 1;
+		flags -= CLONE_CHILD_CLEARTID;
+	}
+	if (attr._a_sched) {
+		do_sched = new->startlock[0] = 1;
+		__block_app_sigs(new->sigmask);
+	}
+	new->robust_list.head = &new->robust_list.head;
+	new->unblock_cancel = self->cancel;
+	new->CANARY = self->CANARY;
+
+	a_inc(&libc.threads_minus_1);
+	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
+
+	__release_ptc();
+
+	if (do_sched) {
+		__restore_sigs(new->sigmask);
+	}
+
+	if (ret < 0) {
+		a_dec(&libc.threads_minus_1);
+		if (map) __munmap(map, size);
+		return EAGAIN;
+	}
+
+	if (do_sched) {
+		ret = __syscall(SYS_sched_setscheduler, new->tid,
+			attr._a_policy, &attr._a_prio);
+		a_store(new->startlock, ret<0 ? 2 : 0);
+		__wake(new->startlock, 1, 1);
+		if (ret < 0) return -ret;
+	}
+
+	*res = new;
+	return 0;
+fail:
+	__release_ptc();
+	return EAGAIN;
+}
+
+weak_alias(__pthread_exit, pthread_exit);
+weak_alias(__pthread_create, pthread_create);
diff --git a/fusl/src/thread/pthread_detach.c b/fusl/src/thread/pthread_detach.c
new file mode 100644
index 0000000..ed77f74
--- /dev/null
+++ b/fusl/src/thread/pthread_detach.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_join(pthread_t, void **);
+
+static int __pthread_detach(pthread_t t)
+{
+	/* Cannot detach a thread that's already exiting */
+	if (a_swap(t->exitlock, 1))
+		return __pthread_join(t, 0);
+	t->detached = 2;
+	__unlock(t->exitlock);
+	return 0;
+}
+
+weak_alias(__pthread_detach, pthread_detach);
+weak_alias(__pthread_detach, thrd_detach);
diff --git a/fusl/src/thread/pthread_equal.c b/fusl/src/thread/pthread_equal.c
new file mode 100644
index 0000000..7c31482
--- /dev/null
+++ b/fusl/src/thread/pthread_equal.c
@@ -0,0 +1,11 @@
+#include <pthread.h>
+#include <threads.h>
+#include "libc.h"
+
+static int __pthread_equal(pthread_t a, pthread_t b)
+{
+	return a==b;
+}
+
+weak_alias(__pthread_equal, pthread_equal);
+weak_alias(__pthread_equal, thrd_equal);
diff --git a/fusl/src/thread/pthread_getattr_np.c b/fusl/src/thread/pthread_getattr_np.c
new file mode 100644
index 0000000..10ea512
--- /dev/null
+++ b/fusl/src/thread/pthread_getattr_np.c
@@ -0,0 +1,23 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+
+int pthread_getattr_np(pthread_t t, pthread_attr_t *a)
+{
+	*a = (pthread_attr_t){0};
+	a->_a_detach = !!t->detached;
+	if (t->stack) {
+		a->_a_stackaddr = (uintptr_t)t->stack;
+		a->_a_stacksize = t->stack_size - DEFAULT_STACK_SIZE;
+	} else {
+		char *p = (void *)libc.auxv;
+		size_t l = PAGE_SIZE;
+		p += -(uintptr_t)p & PAGE_SIZE-1;
+		a->_a_stackaddr = (uintptr_t)p;
+		while (mremap(p-l-PAGE_SIZE, PAGE_SIZE, 2*PAGE_SIZE, 0)==MAP_FAILED && errno==ENOMEM)
+			l += PAGE_SIZE;
+		a->_a_stacksize = l - DEFAULT_STACK_SIZE;
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_getconcurrency.c b/fusl/src/thread/pthread_getconcurrency.c
new file mode 100644
index 0000000..269429a
--- /dev/null
+++ b/fusl/src/thread/pthread_getconcurrency.c
@@ -0,0 +1,6 @@
+#include <pthread.h>
+
+int pthread_getconcurrency()
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_getcpuclockid.c b/fusl/src/thread/pthread_getcpuclockid.c
new file mode 100644
index 0000000..9df14fb
--- /dev/null
+++ b/fusl/src/thread/pthread_getcpuclockid.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_getcpuclockid(pthread_t t, clockid_t *clockid)
+{
+	*clockid = (-t->tid-1)*8U + 6;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_getschedparam.c b/fusl/src/thread/pthread_getschedparam.c
new file mode 100644
index 0000000..3053c18
--- /dev/null
+++ b/fusl/src/thread/pthread_getschedparam.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+
+int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param *restrict param)
+{
+	int r;
+	__lock(t->killlock);
+	if (t->dead) {
+		r = ESRCH;
+	} else {
+		r = -__syscall(SYS_sched_getparam, t->tid, param);
+		if (!r) {
+			*policy = __syscall(SYS_sched_getscheduler, t->tid);
+		}
+	}
+	__unlock(t->killlock);
+	return r;
+}
diff --git a/fusl/src/thread/pthread_getspecific.c b/fusl/src/thread/pthread_getspecific.c
new file mode 100644
index 0000000..d9342a5
--- /dev/null
+++ b/fusl/src/thread/pthread_getspecific.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+static void *__pthread_getspecific(pthread_key_t k)
+{
+	struct pthread *self = __pthread_self();
+	return self->tsd[k];
+}
+
+weak_alias(__pthread_getspecific, pthread_getspecific);
+weak_alias(__pthread_getspecific, tss_get);
diff --git a/fusl/src/thread/pthread_join.c b/fusl/src/thread/pthread_join.c
new file mode 100644
index 0000000..694d377
--- /dev/null
+++ b/fusl/src/thread/pthread_join.c
@@ -0,0 +1,22 @@
+#include "pthread_impl.h"
+#include <sys/mman.h>
+
+int __munmap(void *, size_t);
+void __pthread_testcancel(void);
+int __pthread_setcancelstate(int, int *);
+
+int __pthread_join(pthread_t t, void **res)
+{
+	int tmp, cs;
+	__pthread_testcancel();
+	__pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	if (cs == PTHREAD_CANCEL_ENABLE) __pthread_setcancelstate(cs, 0);
+	while ((tmp = t->tid)) __timedwait_cp(&t->tid, tmp, 0, 0, 0);
+	__pthread_setcancelstate(cs, 0);
+	a_barrier();
+	if (res) *res = t->result;
+	if (t->map_base) __munmap(t->map_base, t->map_size);
+	return 0;
+}
+
+weak_alias(__pthread_join, pthread_join);
diff --git a/fusl/src/thread/pthread_key_create.c b/fusl/src/thread/pthread_key_create.c
new file mode 100644
index 0000000..a78e507
--- /dev/null
+++ b/fusl/src/thread/pthread_key_create.c
@@ -0,0 +1,56 @@
+#include "pthread_impl.h"
+
+volatile size_t __pthread_tsd_size = sizeof(void *) * PTHREAD_KEYS_MAX;
+void *__pthread_tsd_main[PTHREAD_KEYS_MAX] = { 0 };
+
+static void (*volatile keys[PTHREAD_KEYS_MAX])(void *);
+
+static void nodtor(void *dummy)
+{
+}
+
+int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
+{
+	unsigned i = (uintptr_t)&k / 16 % PTHREAD_KEYS_MAX;
+	unsigned j = i;
+	pthread_t self = __pthread_self();
+
+	/* This can only happen in the main thread before
+	 * pthread_create has been called. */
+	if (!self->tsd) self->tsd = __pthread_tsd_main;
+
+	if (!dtor) dtor = nodtor;
+	do {
+		if (!a_cas_p(keys+j, 0, (void *)dtor)) {
+			*k = j;
+			return 0;
+		}
+	} while ((j=(j+1)%PTHREAD_KEYS_MAX) != i);
+	return EAGAIN;
+}
+
+int __pthread_key_delete(pthread_key_t k)
+{
+	keys[k] = 0;
+	return 0;
+}
+
+void __pthread_tsd_run_dtors()
+{
+	pthread_t self = __pthread_self();
+	int i, j, not_finished = self->tsd_used;
+	for (j=0; not_finished && j<PTHREAD_DESTRUCTOR_ITERATIONS; j++) {
+		not_finished = 0;
+		for (i=0; i<PTHREAD_KEYS_MAX; i++) {
+			if (self->tsd[i] && keys[i]) {
+				void *tmp = self->tsd[i];
+				self->tsd[i] = 0;
+				keys[i](tmp);
+				not_finished = 1;
+			}
+		}
+	}
+}
+
+weak_alias(__pthread_key_delete, pthread_key_delete);
+weak_alias(__pthread_key_create, pthread_key_create);
diff --git a/fusl/src/thread/pthread_kill.c b/fusl/src/thread/pthread_kill.c
new file mode 100644
index 0000000..acdb1ea
--- /dev/null
+++ b/fusl/src/thread/pthread_kill.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+
+int pthread_kill(pthread_t t, int sig)
+{
+	int r;
+	__lock(t->killlock);
+	r = t->dead ? ESRCH : -__syscall(SYS_tkill, t->tid, sig);
+	__unlock(t->killlock);
+	return r;
+}
diff --git a/fusl/src/thread/pthread_mutex_consistent.c b/fusl/src/thread/pthread_mutex_consistent.c
new file mode 100644
index 0000000..96b83b5
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_consistent.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+
+int pthread_mutex_consistent(pthread_mutex_t *m)
+{
+	if (!(m->_m_type & 8)) return EINVAL;
+	if ((m->_m_lock & 0x7fffffff) != __pthread_self()->tid)
+		return EPERM;
+	m->_m_type &= ~8U;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutex_destroy.c b/fusl/src/thread/pthread_mutex_destroy.c
new file mode 100644
index 0000000..6d49e68
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_destroy.c
@@ -0,0 +1,6 @@
+#include <pthread.h>
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutex_getprioceiling.c b/fusl/src/thread/pthread_mutex_getprioceiling.c
new file mode 100644
index 0000000..8c75a66
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_getprioceiling.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_mutex_getprioceiling(const pthread_mutex_t *restrict m, int *restrict ceiling)
+{
+	return EINVAL;
+}
diff --git a/fusl/src/thread/pthread_mutex_init.c b/fusl/src/thread/pthread_mutex_init.c
new file mode 100644
index 0000000..acf45a7
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_init.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_mutex_init(pthread_mutex_t *restrict m, const pthread_mutexattr_t *restrict a)
+{
+	*m = (pthread_mutex_t){0};
+	if (a) m->_m_type = a->__attr;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutex_lock.c b/fusl/src/thread/pthread_mutex_lock.c
new file mode 100644
index 0000000..d0c93ca
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_lock.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+
+int __pthread_mutex_timedlock(pthread_mutex_t *restrict, const struct timespec *restrict);
+
+int __pthread_mutex_lock(pthread_mutex_t *m)
+{
+	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
+	    && !a_cas(&m->_m_lock, 0, EBUSY))
+		return 0;
+
+	return __pthread_mutex_timedlock(m, 0);
+}
+
+weak_alias(__pthread_mutex_lock, pthread_mutex_lock);
diff --git a/fusl/src/thread/pthread_mutex_setprioceiling.c b/fusl/src/thread/pthread_mutex_setprioceiling.c
new file mode 100644
index 0000000..681f07c
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_setprioceiling.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_mutex_setprioceiling(pthread_mutex_t *restrict m, int ceiling, int *restrict old)
+{
+	return EINVAL;
+}
diff --git a/fusl/src/thread/pthread_mutex_timedlock.c b/fusl/src/thread/pthread_mutex_timedlock.c
new file mode 100644
index 0000000..0a240e7
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_timedlock.c
@@ -0,0 +1,34 @@
+#include "pthread_impl.h"
+
+int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
+{
+	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
+	    && !a_cas(&m->_m_lock, 0, EBUSY))
+		return 0;
+
+	int r, t, priv = (m->_m_type & 128) ^ 128;
+
+	r = pthread_mutex_trylock(m);
+	if (r != EBUSY) return r;
+	
+	int spins = 100;
+	while (spins-- && m->_m_lock && !m->_m_waiters) a_spin();
+
+	while ((r=pthread_mutex_trylock(m)) == EBUSY) {
+		if (!(r=m->_m_lock) || ((r&0x40000000) && (m->_m_type&4)))
+			continue;
+		if ((m->_m_type&3) == PTHREAD_MUTEX_ERRORCHECK
+		 && (r&0x7fffffff) == __pthread_self()->tid)
+			return EDEADLK;
+
+		a_inc(&m->_m_waiters);
+		t = r | 0x80000000;
+		a_cas(&m->_m_lock, r, t);
+		r = __timedwait(&m->_m_lock, t, CLOCK_REALTIME, at, priv);
+		a_dec(&m->_m_waiters);
+		if (r && r != EINTR) break;
+	}
+	return r;
+}
+
+weak_alias(__pthread_mutex_timedlock, pthread_mutex_timedlock);
diff --git a/fusl/src/thread/pthread_mutex_trylock.c b/fusl/src/thread/pthread_mutex_trylock.c
new file mode 100644
index 0000000..0df3ce2
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_trylock.c
@@ -0,0 +1,58 @@
+#include "pthread_impl.h"
+
+int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
+{
+	int old, own;
+	int type = m->_m_type & 15;
+	pthread_t self = __pthread_self();
+	int tid = self->tid;
+
+	old = m->_m_lock;
+	own = old & 0x7fffffff;
+	if (own == tid && (type&3) == PTHREAD_MUTEX_RECURSIVE) {
+		if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
+		m->_m_count++;
+		return 0;
+	}
+	if (own == 0x40000000) return ENOTRECOVERABLE;
+
+	if (m->_m_type & 128) {
+		if (!self->robust_list.off) {
+			self->robust_list.off = (char*)&m->_m_lock-(char *)&m->_m_next;
+			__syscall(SYS_set_robust_list, &self->robust_list, 3*sizeof(long));
+		}
+		if (m->_m_waiters) tid |= 0x80000000;
+		self->robust_list.pending = &m->_m_next;
+	}
+
+	if ((own && (!(own & 0x40000000) || !(type & 4)))
+	    || a_cas(&m->_m_lock, old, tid) != old) {
+		self->robust_list.pending = 0;
+		return EBUSY;
+	}
+
+	volatile void *next = self->robust_list.head;
+	m->_m_next = next;
+	m->_m_prev = &self->robust_list.head;
+	if (next != &self->robust_list.head) *(volatile void *volatile *)
+		((char *)next - sizeof(void *)) = &m->_m_next;
+	self->robust_list.head = &m->_m_next;
+	self->robust_list.pending = 0;
+
+	if (own) {
+		m->_m_count = 0;
+		m->_m_type |= 8;
+		return EOWNERDEAD;
+	}
+
+	return 0;
+}
+
+int __pthread_mutex_trylock(pthread_mutex_t *m)
+{
+	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL)
+		return a_cas(&m->_m_lock, 0, EBUSY) & EBUSY;
+	return __pthread_mutex_trylock_owner(m);
+}
+
+weak_alias(__pthread_mutex_trylock, pthread_mutex_trylock);
diff --git a/fusl/src/thread/pthread_mutex_unlock.c b/fusl/src/thread/pthread_mutex_unlock.c
new file mode 100644
index 0000000..02da92a
--- /dev/null
+++ b/fusl/src/thread/pthread_mutex_unlock.c
@@ -0,0 +1,37 @@
+#include "pthread_impl.h"
+
+int __pthread_mutex_unlock(pthread_mutex_t *m)
+{
+	pthread_t self;
+	int waiters = m->_m_waiters;
+	int cont;
+	int type = m->_m_type & 15;
+	int priv = (m->_m_type & 128) ^ 128;
+
+	if (type != PTHREAD_MUTEX_NORMAL) {
+		self = __pthread_self();
+		if ((m->_m_lock&0x7fffffff) != self->tid)
+			return EPERM;
+		if ((type&3) == PTHREAD_MUTEX_RECURSIVE && m->_m_count)
+			return m->_m_count--, 0;
+		if (!priv) {
+			self->robust_list.pending = &m->_m_next;
+			__vm_lock();
+		}
+		volatile void *prev = m->_m_prev;
+		volatile void *next = m->_m_next;
+		*(volatile void *volatile *)prev = next;
+		if (next != &self->robust_list.head) *(volatile void *volatile *)
+			((char *)next - sizeof(void *)) = prev;
+	}
+	cont = a_swap(&m->_m_lock, (type & 8) ? 0x40000000 : 0);
+	if (type != PTHREAD_MUTEX_NORMAL && !priv) {
+		self->robust_list.pending = 0;
+		__vm_unlock();
+	}
+	if (waiters || cont<0)
+		__wake(&m->_m_lock, 1, priv);
+	return 0;
+}
+
+weak_alias(__pthread_mutex_unlock, pthread_mutex_unlock);
diff --git a/fusl/src/thread/pthread_mutexattr_destroy.c b/fusl/src/thread/pthread_mutexattr_destroy.c
new file mode 100644
index 0000000..9fd6974
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_destroy(pthread_mutexattr_t *a)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutexattr_init.c b/fusl/src/thread/pthread_mutexattr_init.c
new file mode 100644
index 0000000..0b72c1b
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_init.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_init(pthread_mutexattr_t *a)
+{
+	*a = (pthread_mutexattr_t){0};
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutexattr_setprotocol.c b/fusl/src/thread/pthread_mutexattr_setprotocol.c
new file mode 100644
index 0000000..c92a31c
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_setprotocol.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol)
+{
+	if (protocol) return ENOTSUP;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutexattr_setpshared.c b/fusl/src/thread/pthread_mutexattr_setpshared.c
new file mode 100644
index 0000000..100f6ff
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_setpshared.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_setpshared(pthread_mutexattr_t *a, int pshared)
+{
+	if (pshared > 1U) return EINVAL;
+	a->__attr &= ~128U;
+	a->__attr |= pshared<<7;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutexattr_setrobust.c b/fusl/src/thread/pthread_mutexattr_setrobust.c
new file mode 100644
index 0000000..dcfa4cf
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_setrobust.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_setrobust(pthread_mutexattr_t *a, int robust)
+{
+	if (robust > 1U) return EINVAL;
+	a->__attr &= ~4;
+	a->__attr |= robust*4;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_mutexattr_settype.c b/fusl/src/thread/pthread_mutexattr_settype.c
new file mode 100644
index 0000000..cd7a80e
--- /dev/null
+++ b/fusl/src/thread/pthread_mutexattr_settype.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_mutexattr_settype(pthread_mutexattr_t *a, int type)
+{
+	if ((unsigned)type > 2) return EINVAL;
+	a->__attr = (a->__attr & ~3) | type;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_once.c b/fusl/src/thread/pthread_once.c
new file mode 100644
index 0000000..a8f8aeb
--- /dev/null
+++ b/fusl/src/thread/pthread_once.c
@@ -0,0 +1,50 @@
+#include "pthread_impl.h"
+
+static void undo(void *control)
+{
+	/* Wake all waiters, since the waiter status is lost when
+	 * resetting control to the initial state. */
+	if (a_swap(control, 0) == 3)
+		__wake(control, -1, 1);
+}
+
+int __pthread_once_full(pthread_once_t *control, void (*init)(void))
+{
+	/* Try to enter initializing state. Four possibilities:
+	 *  0 - we're the first or the other cancelled; run init
+	 *  1 - another thread is running init; wait
+	 *  2 - another thread finished running init; just return
+	 *  3 - another thread is running init, waiters present; wait */
+
+	for (;;) switch (a_cas(control, 0, 1)) {
+	case 0:
+		pthread_cleanup_push(undo, control);
+		init();
+		pthread_cleanup_pop(0);
+
+		if (a_swap(control, 2) == 3)
+			__wake(control, -1, 1);
+		return 0;
+	case 1:
+		/* If this fails, so will __wait. */
+		a_cas(control, 1, 3);
+	case 3:
+		__wait(control, 0, 3, 1);
+		continue;
+	case 2:
+		return 0;
+	}
+}
+
+int __pthread_once(pthread_once_t *control, void (*init)(void))
+{
+	/* Return immediately if init finished before, but ensure that
+	 * effects of the init routine are visible to the caller. */
+	if (*(volatile int *)control == 2) {
+		a_barrier();
+		return 0;
+	}
+	return __pthread_once_full(control, init);
+}
+
+weak_alias(__pthread_once, pthread_once);
diff --git a/fusl/src/thread/pthread_rwlock_destroy.c b/fusl/src/thread/pthread_rwlock_destroy.c
new file mode 100644
index 0000000..49ecfbd
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rw)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlock_init.c b/fusl/src/thread/pthread_rwlock_init.c
new file mode 100644
index 0000000..a2c0b47
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_init.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_init(pthread_rwlock_t *restrict rw, const pthread_rwlockattr_t *restrict a)
+{
+	*rw = (pthread_rwlock_t){0};
+	if (a) rw->_rw_shared = a->__attr[0]*128;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlock_rdlock.c b/fusl/src/thread/pthread_rwlock_rdlock.c
new file mode 100644
index 0000000..0800d21
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_rdlock.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rw)
+{
+	return pthread_rwlock_timedrdlock(rw, 0);
+}
diff --git a/fusl/src/thread/pthread_rwlock_timedrdlock.c b/fusl/src/thread/pthread_rwlock_timedrdlock.c
new file mode 100644
index 0000000..0d5d0d6
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_timedrdlock.c
@@ -0,0 +1,23 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+{
+	int r, t;
+
+	r = pthread_rwlock_tryrdlock(rw);
+	if (r != EBUSY) return r;
+	
+	int spins = 100;
+	while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
+
+	while ((r=pthread_rwlock_tryrdlock(rw))==EBUSY) {
+		if (!(r=rw->_rw_lock) || (r&0x7fffffff)!=0x7fffffff) continue;
+		t = r | 0x80000000;
+		a_inc(&rw->_rw_waiters);
+		a_cas(&rw->_rw_lock, r, t);
+		r = __timedwait(&rw->_rw_lock, t, CLOCK_REALTIME, at, rw->_rw_shared^128);
+		a_dec(&rw->_rw_waiters);
+		if (r && r != EINTR) return r;
+	}
+	return r;
+}
diff --git a/fusl/src/thread/pthread_rwlock_timedwrlock.c b/fusl/src/thread/pthread_rwlock_timedwrlock.c
new file mode 100644
index 0000000..7f26dad
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_timedwrlock.c
@@ -0,0 +1,23 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rw, const struct timespec *restrict at)
+{
+	int r, t;
+	
+	r = pthread_rwlock_trywrlock(rw);
+	if (r != EBUSY) return r;
+	
+	int spins = 100;
+	while (spins-- && rw->_rw_lock && !rw->_rw_waiters) a_spin();
+
+	while ((r=pthread_rwlock_trywrlock(rw))==EBUSY) {
+		if (!(r=rw->_rw_lock)) continue;
+		t = r | 0x80000000;
+		a_inc(&rw->_rw_waiters);
+		a_cas(&rw->_rw_lock, r, t);
+		r = __timedwait(&rw->_rw_lock, t, CLOCK_REALTIME, at, rw->_rw_shared^128);
+		a_dec(&rw->_rw_waiters);
+		if (r && r != EINTR) return r;
+	}
+	return r;
+}
diff --git a/fusl/src/thread/pthread_rwlock_tryrdlock.c b/fusl/src/thread/pthread_rwlock_tryrdlock.c
new file mode 100644
index 0000000..fa271fc
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_tryrdlock.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
+{
+	int val, cnt;
+	do {
+		val = rw->_rw_lock;
+		cnt = val & 0x7fffffff;
+		if (cnt == 0x7fffffff) return EBUSY;
+		if (cnt == 0x7ffffffe) return EAGAIN;
+	} while (a_cas(&rw->_rw_lock, val, val+1) != val);
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlock_trywrlock.c b/fusl/src/thread/pthread_rwlock_trywrlock.c
new file mode 100644
index 0000000..bb3d3a9
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_trywrlock.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
+{
+	if (a_cas(&rw->_rw_lock, 0, 0x7fffffff)) return EBUSY;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlock_unlock.c b/fusl/src/thread/pthread_rwlock_unlock.c
new file mode 100644
index 0000000..7b5eec8
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_unlock.c
@@ -0,0 +1,18 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rw)
+{
+	int val, cnt, waiters, new, priv = rw->_rw_shared^128;
+
+	do {
+		val = rw->_rw_lock;
+		cnt = val & 0x7fffffff;
+		waiters = rw->_rw_waiters;
+		new = (cnt == 0x7fffffff || cnt == 1) ? 0 : val-1;
+	} while (a_cas(&rw->_rw_lock, val, new) != val);
+
+	if (!new && (waiters || val<0))
+		__wake(&rw->_rw_lock, cnt, priv);
+
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlock_wrlock.c b/fusl/src/thread/pthread_rwlock_wrlock.c
new file mode 100644
index 0000000..7f33535
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlock_wrlock.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rw)
+{
+	return pthread_rwlock_timedwrlock(rw, 0);
+}
diff --git a/fusl/src/thread/pthread_rwlockattr_destroy.c b/fusl/src/thread/pthread_rwlockattr_destroy.c
new file mode 100644
index 0000000..fc8d611
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlockattr_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_rwlockattr_destroy(pthread_rwlockattr_t *a)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlockattr_init.c b/fusl/src/thread/pthread_rwlockattr_init.c
new file mode 100644
index 0000000..e742069
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlockattr_init.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_rwlockattr_init(pthread_rwlockattr_t *a)
+{
+	*a = (pthread_rwlockattr_t){0};
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_rwlockattr_setpshared.c b/fusl/src/thread/pthread_rwlockattr_setpshared.c
new file mode 100644
index 0000000..e706197
--- /dev/null
+++ b/fusl/src/thread/pthread_rwlockattr_setpshared.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+
+int pthread_rwlockattr_setpshared(pthread_rwlockattr_t *a, int pshared)
+{
+	if (pshared > 1U) return EINVAL;
+	a->__attr[0] = pshared;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_self.c b/fusl/src/thread/pthread_self.c
new file mode 100644
index 0000000..241a620
--- /dev/null
+++ b/fusl/src/thread/pthread_self.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+#include "libc.h"
+
+static pthread_t __pthread_self_internal()
+{
+	return __pthread_self();
+}
+
+weak_alias(__pthread_self_internal, pthread_self);
+weak_alias(__pthread_self_internal, thrd_current);
diff --git a/fusl/src/thread/pthread_setcancelstate.c b/fusl/src/thread/pthread_setcancelstate.c
new file mode 100644
index 0000000..5ab8c33
--- /dev/null
+++ b/fusl/src/thread/pthread_setcancelstate.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+
+int __pthread_setcancelstate(int new, int *old)
+{
+	if (new > 2U) return EINVAL;
+	struct pthread *self = __pthread_self();
+	if (old) *old = self->canceldisable;
+	self->canceldisable = new;
+	return 0;
+}
+
+weak_alias(__pthread_setcancelstate, pthread_setcancelstate);
diff --git a/fusl/src/thread/pthread_setcanceltype.c b/fusl/src/thread/pthread_setcanceltype.c
new file mode 100644
index 0000000..bf0a3f3
--- /dev/null
+++ b/fusl/src/thread/pthread_setcanceltype.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+
+int pthread_setcanceltype(int new, int *old)
+{
+	struct pthread *self = __pthread_self();
+	if (new > 1U) return EINVAL;
+	if (old) *old = self->cancelasync;
+	self->cancelasync = new;
+	if (new) pthread_testcancel();
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_setconcurrency.c b/fusl/src/thread/pthread_setconcurrency.c
new file mode 100644
index 0000000..091abf9
--- /dev/null
+++ b/fusl/src/thread/pthread_setconcurrency.c
@@ -0,0 +1,9 @@
+#include <pthread.h>
+#include <errno.h>
+
+int pthread_setconcurrency(int val)
+{
+	if (val < 0) return EINVAL;
+	if (val > 0) return EAGAIN;
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_setschedparam.c b/fusl/src/thread/pthread_setschedparam.c
new file mode 100644
index 0000000..c4738d6
--- /dev/null
+++ b/fusl/src/thread/pthread_setschedparam.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+
+int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
+{
+	int r;
+	__lock(t->killlock);
+	r = t->dead ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param);
+	__unlock(t->killlock);
+	return r;
+}
diff --git a/fusl/src/thread/pthread_setschedprio.c b/fusl/src/thread/pthread_setschedprio.c
new file mode 100644
index 0000000..e0bdc03
--- /dev/null
+++ b/fusl/src/thread/pthread_setschedprio.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+
+int pthread_setschedprio(pthread_t t, int prio)
+{
+	int r;
+	__lock(t->killlock);
+	r = t->dead ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio);
+	__unlock(t->killlock);
+	return r;
+}
diff --git a/fusl/src/thread/pthread_setspecific.c b/fusl/src/thread/pthread_setspecific.c
new file mode 100644
index 0000000..55e46a8
--- /dev/null
+++ b/fusl/src/thread/pthread_setspecific.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+
+int pthread_setspecific(pthread_key_t k, const void *x)
+{
+	struct pthread *self = __pthread_self();
+	/* Avoid unnecessary COW */
+	if (self->tsd[k] != x) {
+		self->tsd[k] = (void *)x;
+		self->tsd_used = 1;
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_sigmask.c b/fusl/src/thread/pthread_sigmask.c
new file mode 100644
index 0000000..88c333f
--- /dev/null
+++ b/fusl/src/thread/pthread_sigmask.c
@@ -0,0 +1,19 @@
+#include <signal.h>
+#include <errno.h>
+#include "syscall.h"
+
+int pthread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict old)
+{
+	int ret;
+	if ((unsigned)how - SIG_BLOCK > 2U) return EINVAL;
+	ret = -__syscall(SYS_rt_sigprocmask, how, set, old, _NSIG/8);
+	if (!ret && old) {
+		if (sizeof old->__bits[0] == 8) {
+			old->__bits[0] &= ~0x380000000ULL;
+		} else {
+			old->__bits[0] &= ~0x80000000UL;
+			old->__bits[1] &= ~0x3UL;
+		}
+	}
+	return ret;
+}
diff --git a/fusl/src/thread/pthread_spin_destroy.c b/fusl/src/thread/pthread_spin_destroy.c
new file mode 100644
index 0000000..e65a820
--- /dev/null
+++ b/fusl/src/thread/pthread_spin_destroy.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_spin_destroy(pthread_spinlock_t *s)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_spin_init.c b/fusl/src/thread/pthread_spin_init.c
new file mode 100644
index 0000000..681881c
--- /dev/null
+++ b/fusl/src/thread/pthread_spin_init.c
@@ -0,0 +1,6 @@
+#include "pthread_impl.h"
+
+int pthread_spin_init(pthread_spinlock_t *s, int shared)
+{
+	return *s = 0;
+}
diff --git a/fusl/src/thread/pthread_spin_lock.c b/fusl/src/thread/pthread_spin_lock.c
new file mode 100644
index 0000000..ded2b65
--- /dev/null
+++ b/fusl/src/thread/pthread_spin_lock.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+#include <errno.h>
+
+int pthread_spin_lock(pthread_spinlock_t *s)
+{
+	while (*(volatile int *)s || a_cas(s, 0, EBUSY)) a_spin();
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_spin_trylock.c b/fusl/src/thread/pthread_spin_trylock.c
new file mode 100644
index 0000000..5284fda
--- /dev/null
+++ b/fusl/src/thread/pthread_spin_trylock.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+#include <errno.h>
+
+int pthread_spin_trylock(pthread_spinlock_t *s)
+{
+	return a_cas(s, 0, EBUSY);
+}
diff --git a/fusl/src/thread/pthread_spin_unlock.c b/fusl/src/thread/pthread_spin_unlock.c
new file mode 100644
index 0000000..724d9e0
--- /dev/null
+++ b/fusl/src/thread/pthread_spin_unlock.c
@@ -0,0 +1,7 @@
+#include "pthread_impl.h"
+
+int pthread_spin_unlock(pthread_spinlock_t *s)
+{
+	a_store(s, 0);
+	return 0;
+}
diff --git a/fusl/src/thread/pthread_testcancel.c b/fusl/src/thread/pthread_testcancel.c
new file mode 100644
index 0000000..ee48e6d
--- /dev/null
+++ b/fusl/src/thread/pthread_testcancel.c
@@ -0,0 +1,15 @@
+#include "pthread_impl.h"
+#include "libc.h"
+
+static void dummy()
+{
+}
+
+weak_alias(dummy, __testcancel);
+
+void __pthread_testcancel()
+{
+	__testcancel();
+}
+
+weak_alias(__pthread_testcancel, pthread_testcancel);
diff --git a/fusl/src/thread/sem_destroy.c b/fusl/src/thread/sem_destroy.c
new file mode 100644
index 0000000..f4aced5
--- /dev/null
+++ b/fusl/src/thread/sem_destroy.c
@@ -0,0 +1,6 @@
+#include <semaphore.h>
+
+int sem_destroy(sem_t *sem)
+{
+	return 0;
+}
diff --git a/fusl/src/thread/sem_getvalue.c b/fusl/src/thread/sem_getvalue.c
new file mode 100644
index 0000000..d9d8307
--- /dev/null
+++ b/fusl/src/thread/sem_getvalue.c
@@ -0,0 +1,8 @@
+#include <semaphore.h>
+
+int sem_getvalue(sem_t *restrict sem, int *restrict valp)
+{
+	int val = sem->__val[0];
+	*valp = val < 0 ? 0 : val;
+	return 0;
+}
diff --git a/fusl/src/thread/sem_init.c b/fusl/src/thread/sem_init.c
new file mode 100644
index 0000000..5509243
--- /dev/null
+++ b/fusl/src/thread/sem_init.c
@@ -0,0 +1,15 @@
+#include <semaphore.h>
+#include <limits.h>
+#include <errno.h>
+
+int sem_init(sem_t *sem, int pshared, unsigned value)
+{
+	if (value > SEM_VALUE_MAX) {
+		errno = EINVAL;
+		return -1;
+	}
+	sem->__val[0] = value;
+	sem->__val[1] = 0;
+	sem->__val[2] = pshared ? 0 : 128;
+	return 0;
+}
diff --git a/fusl/src/thread/sem_open.c b/fusl/src/thread/sem_open.c
new file mode 100644
index 0000000..fda0acd
--- /dev/null
+++ b/fusl/src/thread/sem_open.c
@@ -0,0 +1,175 @@
+#include <semaphore.h>
+#include <sys/mman.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <time.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "libc.h"
+
+char *__shm_mapname(const char *, char *);
+
+static struct {
+	ino_t ino;
+	sem_t *sem;
+	int refcnt;
+} *semtab;
+static volatile int lock[2];
+
+#define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK)
+
+sem_t *sem_open(const char *name, int flags, ...)
+{
+	va_list ap;
+	mode_t mode;
+	unsigned value;
+	int fd, i, e, slot, first=1, cnt, cs;
+	sem_t newsem;
+	void *map;
+	char tmp[64];
+	struct timespec ts;
+	struct stat st;
+	char buf[NAME_MAX+10];
+
+	if (!(name = __shm_mapname(name, buf)))
+		return SEM_FAILED;
+
+	LOCK(lock);
+	/* Allocate table if we don't have one yet */
+	if (!semtab && !(semtab = calloc(sizeof *semtab, SEM_NSEMS_MAX))) {
+		UNLOCK(lock);
+		return SEM_FAILED;
+	}
+
+	/* Reserve a slot in case this semaphore is not mapped yet;
+	 * this is necessary because there is no way to handle
+	 * failures after creation of the file. */
+	slot = -1;
+	for (cnt=i=0; i<SEM_NSEMS_MAX; i++) {
+		cnt += semtab[i].refcnt;
+		if (!semtab[i].sem && slot < 0) slot = i;
+	}
+	/* Avoid possibility of overflow later */
+	if (cnt == INT_MAX || slot < 0) {
+		errno = EMFILE;
+		UNLOCK(lock);
+		return SEM_FAILED;
+	}
+	/* Dummy pointer to make a reservation */
+	semtab[slot].sem = (sem_t *)-1;
+	UNLOCK(lock);
+
+	flags &= (O_CREAT|O_EXCL);
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+
+	/* Early failure check for exclusive open; otherwise the case
+	 * where the semaphore already exists is expensive. */
+	if (flags == (O_CREAT|O_EXCL) && access(name, F_OK) == 0) {
+		errno = EEXIST;
+		goto fail;
+	}
+
+	for (;;) {
+		/* If exclusive mode is not requested, try opening an
+		 * existing file first and fall back to creation. */
+		if (flags != (O_CREAT|O_EXCL)) {
+			fd = open(name, FLAGS);
+			if (fd >= 0) {
+				if (fstat(fd, &st) < 0 ||
+				    (map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+					close(fd);
+					goto fail;
+				}
+				close(fd);
+				break;
+			}
+			if (errno != ENOENT)
+				goto fail;
+		}
+		if (!(flags & O_CREAT))
+			goto fail;
+		if (first) {
+			first = 0;
+			va_start(ap, flags);
+			mode = va_arg(ap, mode_t) & 0666;
+			value = va_arg(ap, unsigned);
+			va_end(ap);
+			if (value > SEM_VALUE_MAX) {
+				errno = EINVAL;
+				goto fail;
+			}
+			sem_init(&newsem, 1, value);
+		}
+		/* Create a temp file with the new semaphore contents
+		 * and attempt to atomically link it as the new name */
+		clock_gettime(CLOCK_REALTIME, &ts);
+		snprintf(tmp, sizeof(tmp), "/dev/shm/tmp-%d", (int)ts.tv_nsec);
+		fd = open(tmp, O_CREAT|O_EXCL|FLAGS, mode);
+		if (fd < 0) {
+			if (errno == EEXIST) continue;
+			goto fail;
+		}
+		if (write(fd, &newsem, sizeof newsem) != sizeof newsem || fstat(fd, &st) < 0 ||
+		    (map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+			close(fd);
+			unlink(tmp);
+			goto fail;
+		}
+		close(fd);
+		e = link(tmp, name) ? errno : 0;
+		unlink(tmp);
+		if (!e) break;
+		munmap(map, sizeof(sem_t));
+		/* Failure is only fatal when doing an exclusive open;
+		 * otherwise, next iteration will try to open the
+		 * existing file. */
+		if (e != EEXIST || flags == (O_CREAT|O_EXCL))
+			goto fail;
+	}
+
+	/* See if the newly mapped semaphore is already mapped. If
+	 * so, unmap the new mapping and use the existing one. Otherwise,
+	 * add it to the table of mapped semaphores. */
+	LOCK(lock);
+	for (i=0; i<SEM_NSEMS_MAX && semtab[i].ino != st.st_ino; i++);
+	if (i<SEM_NSEMS_MAX) {
+		munmap(map, sizeof(sem_t));
+		semtab[slot].sem = 0;
+		slot = i;
+		map = semtab[i].sem;
+	}
+	semtab[slot].refcnt++;
+	semtab[slot].sem = map;
+	semtab[slot].ino = st.st_ino;
+	UNLOCK(lock);
+	pthread_setcancelstate(cs, 0);
+	return map;
+
+fail:
+	pthread_setcancelstate(cs, 0);
+	LOCK(lock);
+	semtab[slot].sem = 0;
+	UNLOCK(lock);
+	return SEM_FAILED;
+}
+
+int sem_close(sem_t *sem)
+{
+	int i;
+	LOCK(lock);
+	for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem != sem; i++);
+	if (!--semtab[i].refcnt) {
+		semtab[i].sem = 0;
+		semtab[i].ino = 0;
+	}
+	UNLOCK(lock);
+	munmap(sem, sizeof *sem);
+	return 0;
+}
diff --git a/fusl/src/thread/sem_post.c b/fusl/src/thread/sem_post.c
new file mode 100644
index 0000000..31e3293
--- /dev/null
+++ b/fusl/src/thread/sem_post.c
@@ -0,0 +1,17 @@
+#include <semaphore.h>
+#include "pthread_impl.h"
+
+int sem_post(sem_t *sem)
+{
+	int val, waiters, priv = sem->__val[2];
+	do {
+		val = sem->__val[0];
+		waiters = sem->__val[1];
+		if (val == SEM_VALUE_MAX) {
+			errno = EOVERFLOW;
+			return -1;
+		}
+	} while (a_cas(sem->__val, val, val+1+(val<0)) != val);
+	if (val<0 || waiters) __wake(sem->__val, 1, priv);
+	return 0;
+}
diff --git a/fusl/src/thread/sem_timedwait.c b/fusl/src/thread/sem_timedwait.c
new file mode 100644
index 0000000..8132eb1
--- /dev/null
+++ b/fusl/src/thread/sem_timedwait.c
@@ -0,0 +1,31 @@
+#include <semaphore.h>
+#include "pthread_impl.h"
+
+static void cleanup(void *p)
+{
+	a_dec(p);
+}
+
+int sem_timedwait(sem_t *restrict sem, const struct timespec *restrict at)
+{
+	pthread_testcancel();
+
+	if (!sem_trywait(sem)) return 0;
+
+	int spins = 100;
+	while (spins-- && sem->__val[0] <= 0 && !sem->__val[1]) a_spin();
+
+	while (sem_trywait(sem)) {
+		int r;
+		a_inc(sem->__val+1);
+		a_cas(sem->__val, 0, -1);
+		pthread_cleanup_push(cleanup, (void *)(sem->__val+1));
+		r = __timedwait_cp(sem->__val, -1, CLOCK_REALTIME, at, sem->__val[2]);
+		pthread_cleanup_pop(1);
+		if (r && r != EINTR) {
+			errno = r;
+			return -1;
+		}
+	}
+	return 0;
+}
diff --git a/fusl/src/thread/sem_trywait.c b/fusl/src/thread/sem_trywait.c
new file mode 100644
index 0000000..04edf46
--- /dev/null
+++ b/fusl/src/thread/sem_trywait.c
@@ -0,0 +1,13 @@
+#include <semaphore.h>
+#include "pthread_impl.h"
+
+int sem_trywait(sem_t *sem)
+{
+	int val;
+	while ((val=sem->__val[0]) > 0) {
+		int new = val-1-(val==1 && sem->__val[1]);
+		if (a_cas(sem->__val, val, new)==val) return 0;
+	}
+	errno = EAGAIN;
+	return -1;
+}
diff --git a/fusl/src/thread/sem_unlink.c b/fusl/src/thread/sem_unlink.c
new file mode 100644
index 0000000..c06134b
--- /dev/null
+++ b/fusl/src/thread/sem_unlink.c
@@ -0,0 +1,7 @@
+#include <semaphore.h>
+#include <sys/mman.h>
+
+int sem_unlink(const char *name)
+{
+	return shm_unlink(name);
+}
diff --git a/fusl/src/thread/sem_wait.c b/fusl/src/thread/sem_wait.c
new file mode 100644
index 0000000..264194f
--- /dev/null
+++ b/fusl/src/thread/sem_wait.c
@@ -0,0 +1,6 @@
+#include <semaphore.h>
+
+int sem_wait(sem_t *sem)
+{
+	return sem_timedwait(sem, 0);
+}
diff --git a/fusl/src/thread/sh/__set_thread_area.s b/fusl/src/thread/sh/__set_thread_area.s
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/fusl/src/thread/sh/__set_thread_area.s
diff --git a/fusl/src/thread/sh/__unmapself.s b/fusl/src/thread/sh/__unmapself.s
new file mode 100644
index 0000000..0161d53
--- /dev/null
+++ b/fusl/src/thread/sh/__unmapself.s
@@ -0,0 +1,22 @@
+.text
+.global __unmapself_sh_mmu
+.type   __unmapself_sh_mmu, @function
+__unmapself_sh_mmu:
+	mov   #91, r3  ! SYS_munmap
+	trapa #31
+
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
+
+	mov   #1, r3   ! SYS_exit
+	mov   #0, r4
+	trapa #31
+
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
+	or    r0, r0
diff --git a/fusl/src/thread/sh/clone.s b/fusl/src/thread/sh/clone.s
new file mode 100644
index 0000000..aa4d0df
--- /dev/null
+++ b/fusl/src/thread/sh/clone.s
@@ -0,0 +1,53 @@
+.text
+.global __clone
+.type   __clone, @function
+__clone:
+! incoming: fn stack flags arg ptid tls      ctid
+!           r4 r5    r6    r7  @r15 @(4,r15) @(8,r15)
+
+	mov   #-16, r0
+	and   r0, r5
+
+	mov   r4, r1         ! r1 = fn
+	mov   r7, r2         ! r2 = arg
+
+	mov   #120,     r3   ! r3 = __NR_clone
+	mov   r6,       r4   ! r4 = flags
+	!mov  r5,       r5   ! r5 = stack
+	mov.l @r15,     r6   ! r6 = ptid
+	mov.l @(8,r15), r7   ! r7 = ctid
+	mov.l @(4,r15), r0   ! r0 = tls
+	trapa #31
+
+	or r0, r0
+	or r0, r0
+	or r0, r0
+	or r0, r0
+	or r0, r0
+
+	cmp/eq #0, r0
+	bt     1f
+
+	! we are the parent, return
+	rts
+	 nop
+
+1:	! we are the child, call fn(arg)
+	mov.l  1f, r0
+	mov    r1, r5
+	bsrf   r0
+	 mov    r2, r4
+
+2:	mov   #1, r3   ! __NR_exit
+	mov   r0, r4
+	trapa #31
+
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+
+.align 2
+.hidden __shcall
+1:	.long __shcall@PCREL+(.-2b)
diff --git a/fusl/src/thread/sh/syscall_cp.s b/fusl/src/thread/sh/syscall_cp.s
new file mode 100644
index 0000000..bb848ef
--- /dev/null
+++ b/fusl/src/thread/sh/syscall_cp.s
@@ -0,0 +1,45 @@
+.text
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm, @function
+__syscall_cp_asm:
+
+__cp_begin:
+	mov.l @r4, r4
+	tst   r4, r4
+	bf    __cp_cancel
+	mov   r5, r3
+	mov   r6, r4
+	mov   r7, r5
+	mov.l @r15, r6
+	mov.l @(4,r15), r7
+	mov.l @(8,r15), r0
+	mov.l @(12,r15), r1
+	trapa #31
+
+__cp_end:
+	! work around hardware bug
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+	or   r0, r0
+
+	rts
+	 nop
+
+__cp_cancel:
+	mov.l 2f, r0
+	braf  r0
+	 nop
+1:
+
+.align 2
+2:	.long __cancel@PCREL-(1b-.)
diff --git a/fusl/src/thread/synccall.c b/fusl/src/thread/synccall.c
new file mode 100644
index 0000000..000ec4e
--- /dev/null
+++ b/fusl/src/thread/synccall.c
@@ -0,0 +1,178 @@
+#include "pthread_impl.h"
+#include <semaphore.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+#include "futex.h"
+#include "atomic.h"
+#include "../dirent/__dirent.h"
+
+static struct chain {
+	struct chain *next;
+	int tid;
+	sem_t target_sem, caller_sem;
+} *volatile head;
+
+static volatile int synccall_lock[2];
+static volatile int target_tid;
+static void (*callback)(void *), *context;
+static volatile int dummy = 0;
+weak_alias(dummy, __block_new_threads);
+
+static void handler(int sig)
+{
+	struct chain ch;
+	int old_errno = errno;
+
+	sem_init(&ch.target_sem, 0, 0);
+	sem_init(&ch.caller_sem, 0, 0);
+
+	ch.tid = __syscall(SYS_gettid);
+
+	do ch.next = head;
+	while (a_cas_p(&head, ch.next, &ch) != ch.next);
+
+	if (a_cas(&target_tid, ch.tid, 0) == (ch.tid | 0x80000000))
+		__syscall(SYS_futex, &target_tid, FUTEX_UNLOCK_PI|FUTEX_PRIVATE);
+
+	sem_wait(&ch.target_sem);
+	callback(context);
+	sem_post(&ch.caller_sem);
+	sem_wait(&ch.target_sem);
+
+	errno = old_errno;
+}
+
+void __synccall(void (*func)(void *), void *ctx)
+{
+	sigset_t oldmask;
+	int cs, i, r, pid, self;;
+	DIR dir = {0};
+	struct dirent *de;
+	struct sigaction sa = { .sa_flags = 0, .sa_handler = handler };
+	struct chain *cp, *next;
+	struct timespec ts;
+
+	/* Blocking signals in two steps, first only app-level signals
+	 * before taking the lock, then all signals after taking the lock,
+	 * is necessary to achieve AS-safety. Blocking them all first would
+	 * deadlock if multiple threads called __synccall. Waiting to block
+	 * any until after the lock would allow re-entry in the same thread
+	 * with the lock already held. */
+	__block_app_sigs(&oldmask);
+	LOCK(synccall_lock);
+	__block_all_sigs(0);
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+
+	head = 0;
+
+	if (!libc.threaded) goto single_threaded;
+
+	callback = func;
+	context = ctx;
+
+	/* This atomic store ensures that any signaled threads will see the
+	 * above stores, and prevents more than a bounded number of threads,
+	 * those already in pthread_create, from creating new threads until
+	 * the value is cleared to zero again. */
+	a_store(&__block_new_threads, 1);
+
+	/* Block even implementation-internal signals, so that nothing
+	 * interrupts the SIGSYNCCALL handlers. The main possible source
+	 * of trouble is asynchronous cancellation. */
+	memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+	__libc_sigaction(SIGSYNCCALL, &sa, 0);
+
+	pid = __syscall(SYS_getpid);
+	self = __syscall(SYS_gettid);
+
+	/* Since opendir is not AS-safe, the DIR needs to be setup manually
+	 * in automatic storage. Thankfully this is easy. */
+	dir.fd = open("/proc/self/task", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+	if (dir.fd < 0) goto out;
+
+	/* Initially send one signal per counted thread. But since we can't
+	 * synchronize with thread creation/exit here, there could be too
+	 * few signals. This initial signaling is just an optimization, not
+	 * part of the logic. */
+	for (i=libc.threads_minus_1; i; i--)
+		__syscall(SYS_kill, pid, SIGSYNCCALL);
+
+	/* Loop scanning the kernel-provided thread list until it shows no
+	 * threads that have not already replied to the signal. */
+	for (;;) {
+		int miss_cnt = 0;
+		while ((de = readdir(&dir))) {
+			if (!isdigit(de->d_name[0])) continue;
+			int tid = atoi(de->d_name);
+			if (tid == self || !tid) continue;
+
+			/* Set the target thread as the PI futex owner before
+			 * checking if it's in the list of caught threads. If it
+			 * adds itself to the list after we check for it, then
+			 * it will see its own tid in the PI futex and perform
+			 * the unlock operation. */
+			a_store(&target_tid, tid);
+
+			/* Thread-already-caught is a success condition. */
+			for (cp = head; cp && cp->tid != tid; cp=cp->next);
+			if (cp) continue;
+
+			r = -__syscall(SYS_tgkill, pid, tid, SIGSYNCCALL);
+
+			/* Target thread exit is a success condition. */
+			if (r == ESRCH) continue;
+
+			/* The FUTEX_LOCK_PI operation is used to loan priority
+			 * to the target thread, which otherwise may be unable
+			 * to run. Timeout is necessary because there is a race
+			 * condition where the tid may be reused by a different
+			 * process. */
+			clock_gettime(CLOCK_REALTIME, &ts);
+			ts.tv_nsec += 10000000;
+			if (ts.tv_nsec >= 1000000000) {
+				ts.tv_sec++;
+				ts.tv_nsec -= 1000000000;
+			}
+			r = -__syscall(SYS_futex, &target_tid,
+				FUTEX_LOCK_PI|FUTEX_PRIVATE, 0, &ts);
+
+			/* Obtaining the lock means the thread responded. ESRCH
+			 * means the target thread exited, which is okay too. */
+			if (!r || r == ESRCH) continue;
+
+			miss_cnt++;
+		}
+		if (!miss_cnt) break;
+		rewinddir(&dir);
+	}
+	close(dir.fd);
+
+	/* Serialize execution of callback in caught threads. */
+	for (cp=head; cp; cp=cp->next) {
+		sem_post(&cp->target_sem);
+		sem_wait(&cp->caller_sem);
+	}
+
+	sa.sa_handler = SIG_IGN;
+	__libc_sigaction(SIGSYNCCALL, &sa, 0);
+
+single_threaded:
+	func(ctx);
+
+	/* Only release the caught threads once all threads, including the
+	 * caller, have returned from the callback function. */
+	for (cp=head; cp; cp=next) {
+		next = cp->next;
+		sem_post(&cp->target_sem);
+	}
+
+out:
+	a_store(&__block_new_threads, 0);
+	__wake(&__block_new_threads, -1, 1);
+
+	pthread_setcancelstate(cs, 0);
+	UNLOCK(synccall_lock);
+	__restore_sigs(&oldmask);
+}
diff --git a/fusl/src/thread/syscall_cp.c b/fusl/src/thread/syscall_cp.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/fusl/src/thread/syscall_cp.c
diff --git a/fusl/src/thread/thrd_create.c b/fusl/src/thread/thrd_create.c
new file mode 100644
index 0000000..e033669
--- /dev/null
+++ b/fusl/src/thread/thrd_create.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_create(pthread_t *restrict, const pthread_attr_t *restrict, void *(*)(void *), void *restrict);
+
+int thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
+{
+	int ret = __pthread_create(thr, __ATTRP_C11_THREAD, (void *(*)(void *))func, arg);
+	switch (ret) {
+	case 0:      return thrd_success;
+	case EAGAIN: return thrd_nomem;
+	default:     return thrd_error;
+	}
+}
diff --git a/fusl/src/thread/thrd_exit.c b/fusl/src/thread/thrd_exit.c
new file mode 100644
index 0000000..b66bd99
--- /dev/null
+++ b/fusl/src/thread/thrd_exit.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+_Noreturn void __pthread_exit(void *);
+
+_Noreturn void thrd_exit(int result)
+{
+	__pthread_exit((void*)(intptr_t)result);
+}
diff --git a/fusl/src/thread/thrd_join.c b/fusl/src/thread/thrd_join.c
new file mode 100644
index 0000000..ac66789
--- /dev/null
+++ b/fusl/src/thread/thrd_join.c
@@ -0,0 +1,12 @@
+#include <stdint.h>
+#include <threads.h>
+
+int __pthread_join(thrd_t, void**);
+
+int thrd_join(thrd_t t, int *res)
+{
+        void *pthread_res;
+        __pthread_join(t, &pthread_res);
+        if (res) *res = (int)(intptr_t)pthread_res;
+        return thrd_success;
+}
diff --git a/fusl/src/thread/thrd_sleep.c b/fusl/src/thread/thrd_sleep.c
new file mode 100644
index 0000000..e8dfe40
--- /dev/null
+++ b/fusl/src/thread/thrd_sleep.c
@@ -0,0 +1,13 @@
+#include <threads.h>
+#include <errno.h>
+#include "syscall.h"
+
+int thrd_sleep(const struct timespec *req, struct timespec *rem)
+{
+	int ret = __syscall(SYS_nanosleep, req, rem);
+	switch (ret) {
+	case 0:      return 0;
+	case -EINTR: return -1; /* value specified by C11 */
+	default:     return -2;
+	}
+}
diff --git a/fusl/src/thread/thrd_yield.c b/fusl/src/thread/thrd_yield.c
new file mode 100644
index 0000000..f7ad132
--- /dev/null
+++ b/fusl/src/thread/thrd_yield.c
@@ -0,0 +1,7 @@
+#include <threads.h>
+#include "syscall.h"
+
+void thrd_yield()
+{
+	__syscall(SYS_sched_yield);
+}
diff --git a/fusl/src/thread/tls.c b/fusl/src/thread/tls.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/fusl/src/thread/tls.c
diff --git a/fusl/src/thread/tss_create.c b/fusl/src/thread/tss_create.c
new file mode 100644
index 0000000..251d22b
--- /dev/null
+++ b/fusl/src/thread/tss_create.c
@@ -0,0 +1,11 @@
+#include <threads.h>
+
+int __pthread_key_create(tss_t *, void (*)(void *));
+
+int tss_create(tss_t *tss, tss_dtor_t dtor)
+{
+	/* Different error returns are possible. C glues them together into
+	 * just failure notification. Can't be optimized to a tail call,
+	 * unless thrd_error equals EAGAIN. */
+	return __pthread_key_create(tss, dtor) ? thrd_error : thrd_success;
+}
diff --git a/fusl/src/thread/tss_delete.c b/fusl/src/thread/tss_delete.c
new file mode 100644
index 0000000..35db103
--- /dev/null
+++ b/fusl/src/thread/tss_delete.c
@@ -0,0 +1,8 @@
+#include <threads.h>
+
+int __pthread_key_delete(tss_t k);
+
+void tss_delete(tss_t key)
+{
+	__pthread_key_delete(key);
+}
diff --git a/fusl/src/thread/tss_set.c b/fusl/src/thread/tss_set.c
new file mode 100644
index 0000000..70c4fb7
--- /dev/null
+++ b/fusl/src/thread/tss_set.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int tss_set(tss_t k, void *x)
+{
+	struct pthread *self = __pthread_self();
+	/* Avoid unnecessary COW */
+	if (self->tsd[k] != x) {
+		self->tsd[k] = x;
+		self->tsd_used = 1;
+	}
+	return thrd_success;
+}
diff --git a/fusl/src/thread/vmlock.c b/fusl/src/thread/vmlock.c
new file mode 100644
index 0000000..75f3cb7
--- /dev/null
+++ b/fusl/src/thread/vmlock.c
@@ -0,0 +1,21 @@
+#include "pthread_impl.h"
+
+static volatile int vmlock[2];
+
+void __vm_wait()
+{
+	int tmp;
+	while ((tmp=vmlock[0]))
+		__wait(vmlock, vmlock+1, tmp, 1);
+}
+
+void __vm_lock()
+{
+	a_inc(vmlock);
+}
+
+void __vm_unlock()
+{
+	if (a_fetch_add(vmlock, -1)==1 && vmlock[1])
+		__wake(vmlock, -1, 1);
+}
diff --git a/fusl/src/thread/x32/__set_thread_area.s b/fusl/src/thread/x32/__set_thread_area.s
new file mode 100644
index 0000000..e0daf72
--- /dev/null
+++ b/fusl/src/thread/x32/__set_thread_area.s
@@ -0,0 +1,10 @@
+/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
+.text
+.global __set_thread_area
+.type __set_thread_area,@function
+__set_thread_area:
+	mov %edi,%esi           /* shift for syscall */
+	movl $0x1002,%edi       /* SET_FS register */
+	movl $0x4000009e,%eax          /* set fs segment to */
+	syscall                 /* arch_prctl(SET_FS, arg)*/
+	ret
diff --git a/fusl/src/thread/x32/__unmapself.s b/fusl/src/thread/x32/__unmapself.s
new file mode 100644
index 0000000..d925460
--- /dev/null
+++ b/fusl/src/thread/x32/__unmapself.s
@@ -0,0 +1,10 @@
+/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
+.text
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	movl $0x4000000b,%eax   /* SYS_munmap */
+	syscall         /* munmap(arg2,arg3) */
+	xor %rdi,%rdi   /* exit() args: always return success */
+	movl $0x4000003c,%eax   /* SYS_exit */
+	syscall         /* exit(0) */
diff --git a/fusl/src/thread/x32/clone.s b/fusl/src/thread/x32/clone.s
new file mode 100644
index 0000000..eed4615
--- /dev/null
+++ b/fusl/src/thread/x32/clone.s
@@ -0,0 +1,25 @@
+.text
+.global __clone
+.type   __clone,@function
+__clone:
+	movl $0x40000038,%eax /* SYS_clone */
+	mov %rdi,%r11
+	mov %rdx,%rdi
+	mov %r8,%rdx
+	mov %r9,%r8
+	mov 8(%rsp),%r10
+	mov %r11,%r9
+	and $-16,%rsi
+	sub $8,%rsi
+	mov %rcx,(%rsi)
+	syscall
+	test %eax,%eax
+	jnz 1f
+	xor %ebp,%ebp
+	pop %rdi
+	call *%r9
+	mov %eax,%edi
+	movl $0x4000003c,%eax /* SYS_exit */
+	syscall
+	hlt
+1:	ret
diff --git a/fusl/src/thread/x32/syscall_cp.s b/fusl/src/thread/x32/syscall_cp.s
new file mode 100644
index 0000000..9805af0
--- /dev/null
+++ b/fusl/src/thread/x32/syscall_cp.s
@@ -0,0 +1,31 @@
+.text
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_internal
+.hidden __syscall_cp_internal
+.type   __syscall_cp_internal,@function
+__syscall_cp_internal:
+
+__cp_begin:
+	mov (%rdi),%eax
+	test %eax,%eax
+	jnz __cp_cancel
+	mov %rdi,%r11
+	mov %rsi,%rax
+	mov %rdx,%rdi
+	mov %rcx,%rsi
+	mov %r8,%rdx
+	mov %r9,%r10
+	mov 8(%rsp),%r8
+	mov 16(%rsp),%r9
+	mov %r11,8(%rsp)
+	syscall
+__cp_end:
+	ret
+__cp_cancel:
+	jmp __cancel
diff --git a/fusl/src/thread/x86_64/__set_thread_area.s b/fusl/src/thread/x86_64/__set_thread_area.s
new file mode 100644
index 0000000..f3ff4f6
--- /dev/null
+++ b/fusl/src/thread/x86_64/__set_thread_area.s
@@ -0,0 +1,10 @@
+/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
+.text
+.global __set_thread_area
+.type __set_thread_area,@function
+__set_thread_area:
+	mov %rdi,%rsi           /* shift for syscall */
+	movl $0x1002,%edi       /* SET_FS register */
+	movl $158,%eax          /* set fs segment to */
+	syscall                 /* arch_prctl(SET_FS, arg)*/
+	ret
diff --git a/fusl/src/thread/x86_64/__unmapself.s b/fusl/src/thread/x86_64/__unmapself.s
new file mode 100644
index 0000000..e2689e6
--- /dev/null
+++ b/fusl/src/thread/x86_64/__unmapself.s
@@ -0,0 +1,10 @@
+/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
+.text
+.global __unmapself
+.type   __unmapself,@function
+__unmapself:
+	movl $11,%eax   /* SYS_munmap */
+	syscall         /* munmap(arg2,arg3) */
+	xor %rdi,%rdi   /* exit() args: always return success */
+	movl $60,%eax   /* SYS_exit */
+	syscall         /* exit(0) */
diff --git a/fusl/src/thread/x86_64/clone.s b/fusl/src/thread/x86_64/clone.s
new file mode 100644
index 0000000..ee59903
--- /dev/null
+++ b/fusl/src/thread/x86_64/clone.s
@@ -0,0 +1,27 @@
+.text
+.global __clone
+.type   __clone,@function
+__clone:
+	xor %eax,%eax
+	mov $56,%al
+	mov %rdi,%r11
+	mov %rdx,%rdi
+	mov %r8,%rdx
+	mov %r9,%r8
+	mov 8(%rsp),%r10
+	mov %r11,%r9
+	and $-16,%rsi
+	sub $8,%rsi
+	mov %rcx,(%rsi)
+	syscall
+	test %eax,%eax
+	jnz 1f
+	xor %ebp,%ebp
+	pop %rdi
+	call *%r9
+	mov %eax,%edi
+	xor %eax,%eax
+	mov $60,%al
+	syscall
+	hlt
+1:	ret
diff --git a/fusl/src/thread/x86_64/syscall_cp.s b/fusl/src/thread/x86_64/syscall_cp.s
new file mode 100644
index 0000000..4f10171
--- /dev/null
+++ b/fusl/src/thread/x86_64/syscall_cp.s
@@ -0,0 +1,31 @@
+.text
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type   __syscall_cp_asm,@function
+__syscall_cp_asm:
+
+__cp_begin:
+	mov (%rdi),%eax
+	test %eax,%eax
+	jnz __cp_cancel
+	mov %rdi,%r11
+	mov %rsi,%rax
+	mov %rdx,%rdi
+	mov %rcx,%rsi
+	mov %r8,%rdx
+	mov %r9,%r10
+	mov 8(%rsp),%r8
+	mov 16(%rsp),%r9
+	mov %r11,8(%rsp)
+	syscall
+__cp_end:
+	ret
+__cp_cancel:
+	jmp __cancel