Add a "fork" of musl as //fusl.

This is musl verbatim at d5f8394f6ea9549607567bd92de12a2446c15614.

See http://www.musl-libc.org/.

R=kulakowski@chromium.org

Review URL: https://codereview.chromium.org/1573973002 .
diff --git a/fusl/src/string/armel/memcpy.s b/fusl/src/string/armel/memcpy.s
new file mode 100644
index 0000000..b16be0d
--- /dev/null
+++ b/fusl/src/string/armel/memcpy.s
@@ -0,0 +1,379 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * Optimized memcpy() for ARM.
+ *
+ * note that memcpy() always returns the destination pointer,
+ * so we have to preserve R0.
+  */
+
+/*
+ * This file has been modified from the original for use in musl libc.
+ * The main changes are: addition of .type memcpy,%function to make the
+ * code safely callable from thumb mode, adjusting the return
+ * instructions to be compatible with pre-thumb ARM cpus, and removal
+ * of prefetch code that is not compatible with older cpus.
+ */
+
+.syntax unified
+
+.global memcpy
+.type memcpy,%function
+memcpy:
+	/* The stack must always be 64-bits aligned to be compliant with the
+	 * ARM ABI. Since we have to save R0, we might as well save R4
+	 * which we can use for better pipelining of the reads below
+	 */
+	.fnstart
+	.save       {r0, r4, lr}
+	stmfd       sp!, {r0, r4, lr}
+	/* Making room for r5-r11 which will be spilled later */
+	.pad        #28
+	sub         sp, sp, #28
+
+	/* it simplifies things to take care of len<4 early */
+	cmp     r2, #4
+	blo     copy_last_3_and_return
+
+	/* compute the offset to align the source
+	 * offset = (4-(src&3))&3 = -src & 3
+	 */
+	rsb     r3, r1, #0
+	ands    r3, r3, #3
+	beq     src_aligned
+
+	/* align source to 32 bits. We need to insert 2 instructions between
+	 * a ldr[b|h] and str[b|h] because byte and half-word instructions
+	 * stall 2 cycles.
+	 */
+	movs    r12, r3, lsl #31
+	sub     r2, r2, r3              /* we know that r3 <= r2 because r2 >= 4 */
+	ldrbmi r3, [r1], #1
+	ldrbcs r4, [r1], #1
+	ldrbcs r12,[r1], #1
+	strbmi r3, [r0], #1
+	strbcs r4, [r0], #1
+	strbcs r12,[r0], #1
+
+src_aligned:
+
+	/* see if src and dst are aligned together (congruent) */
+	eor     r12, r0, r1
+	tst     r12, #3
+	bne     non_congruent
+
+	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+	 * frame. Don't update sp.
+	 */
+	stmea   sp, {r5-r11}
+
+	/* align the destination to a cache-line */
+	rsb     r3, r0, #0
+	ands    r3, r3, #0x1C
+	beq     congruent_aligned32
+	cmp     r3, r2
+	andhi   r3, r2, #0x1C
+
+	/* conditionnaly copies 0 to 7 words (length in r3) */
+	movs    r12, r3, lsl #28
+	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
+	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
+	stmcs   r0!, {r4, r5, r6, r7}
+	stmmi   r0!, {r8, r9}
+	tst     r3, #0x4
+	ldrne   r10,[r1], #4                    /*  4 bytes */
+	strne   r10,[r0], #4
+	sub     r2, r2, r3
+
+congruent_aligned32:
+	/*
+	 * here source is aligned to 32 bytes.
+	 */
+
+cached_aligned32:
+	subs    r2, r2, #32
+	blo     less_than_32_left
+
+	/*
+	 * We preload a cache-line up to 64 bytes ahead. On the 926, this will
+	 * stall only until the requested world is fetched, but the linefill
+	 * continues in the the background.
+	 * While the linefill is going, we write our previous cache-line
+	 * into the write-buffer (which should have some free space).
+	 * When the linefill is done, the writebuffer will
+	 * start dumping its content into memory
+	 *
+	 * While all this is going, we then load a full cache line into
+	 * 8 registers, this cache line should be in the cache by now
+	 * (or partly in the cache).
+	 *
+	 * This code should work well regardless of the source/dest alignment.
+	 *
+	 */
+
+	/* Align the preload register to a cache-line because the cpu does
+	 * "critical word first" (the first word requested is loaded first).
+	 */
+	@ bic           r12, r1, #0x1F
+	@ add           r12, r12, #64
+
+1:      ldmia   r1!, { r4-r11 }
+	subs    r2, r2, #32
+
+	/* 
+	 * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
+	 * for ARM9 preload will not be safely guarded by the preceding subs.
+	 * When it is safely guarded the only possibility to have SIGSEGV here
+	 * is because the caller overstates the length.
+	 */
+	@ ldrhi         r3, [r12], #32      /* cheap ARM9 preload */
+	stmia   r0!, { r4-r11 }
+	bhs     1b
+
+	add     r2, r2, #32
+
+less_than_32_left:
+	/*
+	 * less than 32 bytes left at this point (length in r2)
+	 */
+
+	/* skip all this if there is nothing to do, which should
+	 * be a common case (if not executed the code below takes
+	 * about 16 cycles)
+	 */
+	tst     r2, #0x1F
+	beq     1f
+
+	/* conditionnaly copies 0 to 31 bytes */
+	movs    r12, r2, lsl #28
+	ldmcs   r1!, {r4, r5, r6, r7}           /* 16 bytes */
+	ldmmi   r1!, {r8, r9}                   /*  8 bytes */
+	stmcs   r0!, {r4, r5, r6, r7}
+	stmmi   r0!, {r8, r9}
+	movs    r12, r2, lsl #30
+	ldrcs   r3, [r1], #4                    /*  4 bytes */
+	ldrhmi r4, [r1], #2                     /*  2 bytes */
+	strcs   r3, [r0], #4
+	strhmi r4, [r0], #2
+	tst     r2, #0x1
+	ldrbne r3, [r1]                         /*  last byte  */
+	strbne r3, [r0]
+
+	/* we're done! restore everything and return */
+1:      ldmfd   sp!, {r5-r11}
+	ldmfd   sp!, {r0, r4, lr}
+	bx      lr
+
+	/********************************************************************/
+
+non_congruent:
+	/*
+	 * here source is aligned to 4 bytes
+	 * but destination is not.
+	 *
+	 * in the code below r2 is the number of bytes read
+	 * (the number of bytes written is always smaller, because we have
+	 * partial words in the shift queue)
+	 */
+	cmp     r2, #4
+	blo     copy_last_3_and_return
+
+	/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
+	 * frame. Don't update sp.
+	 */
+	stmea   sp, {r5-r11}
+
+	/* compute shifts needed to align src to dest */
+	rsb     r5, r0, #0
+	and     r5, r5, #3                      /* r5 = # bytes in partial words */
+	mov     r12, r5, lsl #3         /* r12 = right */
+	rsb     lr, r12, #32            /* lr = left  */
+
+	/* read the first word */
+	ldr     r3, [r1], #4
+	sub     r2, r2, #4
+
+	/* write a partial word (0 to 3 bytes), such that destination
+	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
+	 */
+	movs    r5, r5, lsl #31
+	strbmi r3, [r0], #1
+	movmi   r3, r3, lsr #8
+	strbcs r3, [r0], #1
+	movcs   r3, r3, lsr #8
+	strbcs r3, [r0], #1
+	movcs   r3, r3, lsr #8
+
+	cmp     r2, #4
+	blo     partial_word_tail
+
+	/* Align destination to 32 bytes (cache line boundary) */
+1:      tst     r0, #0x1c
+	beq     2f
+	ldr     r5, [r1], #4
+	sub     r2, r2, #4
+	orr     r4, r3, r5,             lsl lr
+	mov     r3, r5,                 lsr r12
+	str     r4, [r0], #4
+	cmp     r2, #4
+	bhs     1b
+	blo     partial_word_tail
+
+	/* copy 32 bytes at a time */
+2:      subs    r2, r2, #32
+	blo     less_than_thirtytwo
+
+	/* Use immediate mode for the shifts, because there is an extra cycle
+	 * for register shifts, which could account for up to 50% of
+	 * performance hit.
+	 */
+
+	cmp     r12, #24
+	beq     loop24
+	cmp     r12, #8
+	beq     loop8
+
+loop16:
+	ldr     r12, [r1], #4
+1:      mov     r4, r12
+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+	subs    r2, r2, #32
+	ldrhs   r12, [r1], #4
+	orr     r3, r3, r4, lsl #16
+	mov     r4, r4, lsr #16
+	orr     r4, r4, r5, lsl #16
+	mov     r5, r5, lsr #16
+	orr     r5, r5, r6, lsl #16
+	mov     r6, r6, lsr #16
+	orr     r6, r6, r7, lsl #16
+	mov     r7, r7, lsr #16
+	orr     r7, r7, r8, lsl #16
+	mov     r8, r8, lsr #16
+	orr     r8, r8, r9, lsl #16
+	mov     r9, r9, lsr #16
+	orr     r9, r9, r10, lsl #16
+	mov     r10, r10,               lsr #16
+	orr     r10, r10, r11, lsl #16
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsr #16
+	bhs     1b
+	b       less_than_thirtytwo
+
+loop8:
+	ldr     r12, [r1], #4
+1:      mov     r4, r12
+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+	subs    r2, r2, #32
+	ldrhs   r12, [r1], #4
+	orr     r3, r3, r4, lsl #24
+	mov     r4, r4, lsr #8
+	orr     r4, r4, r5, lsl #24
+	mov     r5, r5, lsr #8
+	orr     r5, r5, r6, lsl #24
+	mov     r6, r6,  lsr #8
+	orr     r6, r6, r7, lsl #24
+	mov     r7, r7,  lsr #8
+	orr     r7, r7, r8,             lsl #24
+	mov     r8, r8,  lsr #8
+	orr     r8, r8, r9,             lsl #24
+	mov     r9, r9,  lsr #8
+	orr     r9, r9, r10,    lsl #24
+	mov     r10, r10, lsr #8
+	orr     r10, r10, r11,  lsl #24
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsr #8
+	bhs     1b
+	b       less_than_thirtytwo
+
+loop24:
+	ldr     r12, [r1], #4
+1:      mov     r4, r12
+	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
+	subs    r2, r2, #32
+	ldrhs   r12, [r1], #4
+	orr     r3, r3, r4, lsl #8
+	mov     r4, r4, lsr #24
+	orr     r4, r4, r5, lsl #8
+	mov     r5, r5, lsr #24
+	orr     r5, r5, r6, lsl #8
+	mov     r6, r6, lsr #24
+	orr     r6, r6, r7, lsl #8
+	mov     r7, r7, lsr #24
+	orr     r7, r7, r8, lsl #8
+	mov     r8, r8, lsr #24
+	orr     r8, r8, r9, lsl #8
+	mov     r9, r9, lsr #24
+	orr     r9, r9, r10, lsl #8
+	mov     r10, r10, lsr #24
+	orr     r10, r10, r11, lsl #8
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsr #24
+	bhs     1b
+
+less_than_thirtytwo:
+	/* copy the last 0 to 31 bytes of the source */
+	rsb     r12, lr, #32            /* we corrupted r12, recompute it  */
+	add     r2, r2, #32
+	cmp     r2, #4
+	blo     partial_word_tail
+
+1:      ldr     r5, [r1], #4
+	sub     r2, r2, #4
+	orr     r4, r3, r5,             lsl lr
+	mov     r3,     r5,                     lsr r12
+	str     r4, [r0], #4
+	cmp     r2, #4
+	bhs     1b
+
+partial_word_tail:
+	/* we have a partial word in the input buffer */
+	movs    r5, lr, lsl #(31-3)
+	strbmi r3, [r0], #1
+	movmi   r3, r3, lsr #8
+	strbcs r3, [r0], #1
+	movcs   r3, r3, lsr #8
+	strbcs r3, [r0], #1
+
+	/* Refill spilled registers from the stack. Don't update sp. */
+	ldmfd   sp, {r5-r11}
+
+copy_last_3_and_return:
+	movs    r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
+	ldrbmi r2, [r1], #1
+	ldrbcs r3, [r1], #1
+	ldrbcs r12,[r1]
+	strbmi r2, [r0], #1
+	strbcs r3, [r0], #1
+	strbcs r12,[r0]
+
+	/* we're done! restore sp and spilled registers and return */
+	add     sp,  sp, #28
+	ldmfd   sp!, {r0, r4, lr}
+	bx      lr
diff --git a/fusl/src/string/armel/memcpy.sub b/fusl/src/string/armel/memcpy.sub
new file mode 100644
index 0000000..543f583
--- /dev/null
+++ b/fusl/src/string/armel/memcpy.sub
@@ -0,0 +1 @@
+memcpy.s
diff --git a/fusl/src/string/armhf/memcpy.sub b/fusl/src/string/armhf/memcpy.sub
new file mode 100644
index 0000000..add0590
--- /dev/null
+++ b/fusl/src/string/armhf/memcpy.sub
@@ -0,0 +1 @@
+../armel/memcpy.s
diff --git a/fusl/src/string/bcmp.c b/fusl/src/string/bcmp.c
new file mode 100644
index 0000000..87c6007
--- /dev/null
+++ b/fusl/src/string/bcmp.c
@@ -0,0 +1,8 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <strings.h>
+
+int bcmp(const void *s1, const void *s2, size_t n)
+{
+	return memcmp(s1, s2, n);
+}
diff --git a/fusl/src/string/bcopy.c b/fusl/src/string/bcopy.c
new file mode 100644
index 0000000..a07129f
--- /dev/null
+++ b/fusl/src/string/bcopy.c
@@ -0,0 +1,8 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <strings.h>
+
+void bcopy(const void *s1, void *s2, size_t n)
+{
+	memmove(s2, s1, n);
+}
diff --git a/fusl/src/string/bzero.c b/fusl/src/string/bzero.c
new file mode 100644
index 0000000..ba536b0
--- /dev/null
+++ b/fusl/src/string/bzero.c
@@ -0,0 +1,8 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <strings.h>
+
+void bzero(void *s, size_t n)
+{
+	memset(s, 0, n);
+}
diff --git a/fusl/src/string/i386/memcpy.s b/fusl/src/string/i386/memcpy.s
new file mode 100644
index 0000000..0608dd8
--- /dev/null
+++ b/fusl/src/string/i386/memcpy.s
@@ -0,0 +1,32 @@
+.global memcpy
+.global __memcpy_fwd
+.hidden __memcpy_fwd
+.type memcpy,@function
+memcpy:
+__memcpy_fwd:
+	push %esi
+	push %edi
+	mov 12(%esp),%edi
+	mov 16(%esp),%esi
+	mov 20(%esp),%ecx
+	mov %edi,%eax
+	cmp $4,%ecx
+	jc 1f
+	test $3,%edi
+	jz 1f
+2:	movsb
+	dec %ecx
+	test $3,%edi
+	jnz 2b
+1:	mov %ecx,%edx
+	shr $2,%ecx
+	rep
+	movsl
+	and $3,%edx
+	jz 1f
+2:	movsb
+	dec %edx
+	jnz 2b
+1:	pop %edi
+	pop %esi
+	ret
diff --git a/fusl/src/string/i386/memmove.s b/fusl/src/string/i386/memmove.s
new file mode 100644
index 0000000..2a6a504
--- /dev/null
+++ b/fusl/src/string/i386/memmove.s
@@ -0,0 +1,22 @@
+.global memmove
+.type memmove,@function
+memmove:
+	mov 4(%esp),%eax
+	sub 8(%esp),%eax
+	cmp 12(%esp),%eax
+.hidden __memcpy_fwd
+	jae __memcpy_fwd
+	push %esi
+	push %edi
+	mov 12(%esp),%edi
+	mov 16(%esp),%esi
+	mov 20(%esp),%ecx
+	lea -1(%edi,%ecx),%edi
+	lea -1(%esi,%ecx),%esi
+	std
+	rep movsb
+	cld
+	lea 1(%edi),%eax
+	pop %edi
+	pop %esi
+	ret
diff --git a/fusl/src/string/i386/memset.s b/fusl/src/string/i386/memset.s
new file mode 100644
index 0000000..d00422c
--- /dev/null
+++ b/fusl/src/string/i386/memset.s
@@ -0,0 +1,76 @@
+.global memset
+.type memset,@function
+memset:
+	mov 12(%esp),%ecx
+	cmp $62,%ecx
+	ja 2f
+
+	mov 8(%esp),%dl
+	mov 4(%esp),%eax
+	test %ecx,%ecx
+	jz 1f
+
+	mov %dl,%dh
+
+	mov %dl,(%eax)
+	mov %dl,-1(%eax,%ecx)
+	cmp $2,%ecx
+	jbe 1f
+
+	mov %dx,1(%eax)
+	mov %dx,(-1-2)(%eax,%ecx)
+	cmp $6,%ecx
+	jbe 1f
+
+	shl $16,%edx
+	mov 8(%esp),%dl
+	mov 8(%esp),%dh
+
+	mov %edx,(1+2)(%eax)
+	mov %edx,(-1-2-4)(%eax,%ecx)
+	cmp $14,%ecx
+	jbe 1f
+
+	mov %edx,(1+2+4)(%eax)
+	mov %edx,(1+2+4+4)(%eax)
+	mov %edx,(-1-2-4-8)(%eax,%ecx)
+	mov %edx,(-1-2-4-4)(%eax,%ecx)
+	cmp $30,%ecx
+	jbe 1f
+
+	mov %edx,(1+2+4+8)(%eax)
+	mov %edx,(1+2+4+8+4)(%eax)
+	mov %edx,(1+2+4+8+8)(%eax)
+	mov %edx,(1+2+4+8+12)(%eax)
+	mov %edx,(-1-2-4-8-16)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-12)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-8)(%eax,%ecx)
+	mov %edx,(-1-2-4-8-4)(%eax,%ecx)
+
+1:	ret 	
+
+2:	movzbl 8(%esp),%eax
+	mov %edi,12(%esp)
+	imul $0x1010101,%eax
+	mov 4(%esp),%edi
+	test $15,%edi
+	mov %eax,-4(%edi,%ecx)
+	jnz 2f
+
+1:	shr $2, %ecx
+	rep
+	stosl
+	mov 4(%esp),%eax
+	mov 12(%esp),%edi
+	ret
+	
+2:	xor %edx,%edx
+	sub %edi,%edx
+	and $15,%edx
+	mov %eax,(%edi)
+	mov %eax,4(%edi)
+	mov %eax,8(%edi)
+	mov %eax,12(%edi)
+	sub %edx,%ecx
+	add %edx,%edi
+	jmp 1b
diff --git a/fusl/src/string/index.c b/fusl/src/string/index.c
new file mode 100644
index 0000000..252948f
--- /dev/null
+++ b/fusl/src/string/index.c
@@ -0,0 +1,8 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <strings.h>
+
+char *index(const char *s, int c)
+{
+	return strchr(s, c);
+}
diff --git a/fusl/src/string/memccpy.c b/fusl/src/string/memccpy.c
new file mode 100644
index 0000000..7c233d5
--- /dev/null
+++ b/fusl/src/string/memccpy.c
@@ -0,0 +1,31 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define ALIGN (sizeof(size_t)-1)
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
+{
+	unsigned char *d = dest;
+	const unsigned char *s = src;
+	size_t *wd, k;
+	const size_t *ws;
+
+	c = (unsigned char)c;
+	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
+		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++);
+		if ((uintptr_t)s & ALIGN) goto tail;
+		k = ONES * c;
+		wd=(void *)d; ws=(const void *)s;
+		for (; n>=sizeof(size_t) && !HASZERO(*ws^k);
+		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
+		d=(void *)wd; s=(const void *)ws;
+	}
+	for (; n && (*d=*s)!=c; n--, s++, d++);
+tail:
+	if (*s==c) return d+1;
+	return 0;
+}
diff --git a/fusl/src/string/memchr.c b/fusl/src/string/memchr.c
new file mode 100644
index 0000000..4daff7b
--- /dev/null
+++ b/fusl/src/string/memchr.c
@@ -0,0 +1,23 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define SS (sizeof(size_t))
+#define ALIGN (sizeof(size_t)-1)
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+void *memchr(const void *src, int c, size_t n)
+{
+	const unsigned char *s = src;
+	c = (unsigned char)c;
+	for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--);
+	if (n && *s != c) {
+		const size_t *w;
+		size_t k = ONES * c;
+		for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
+		for (s = (const void *)w; n && *s != c; s++, n--);
+	}
+	return n ? (void *)s : 0;
+}
diff --git a/fusl/src/string/memcmp.c b/fusl/src/string/memcmp.c
new file mode 100644
index 0000000..bdbce9f
--- /dev/null
+++ b/fusl/src/string/memcmp.c
@@ -0,0 +1,8 @@
+#include <string.h>
+
+int memcmp(const void *vl, const void *vr, size_t n)
+{
+	const unsigned char *l=vl, *r=vr;
+	for (; n && *l == *r; n--, l++, r++);
+	return n ? *l-*r : 0;
+}
diff --git a/fusl/src/string/memcpy.c b/fusl/src/string/memcpy.c
new file mode 100644
index 0000000..06e8874
--- /dev/null
+++ b/fusl/src/string/memcpy.c
@@ -0,0 +1,124 @@
+#include <string.h>
+#include <stdint.h>
+#include <endian.h>
+
+void *memcpy(void *restrict dest, const void *restrict src, size_t n)
+{
+	unsigned char *d = dest;
+	const unsigned char *s = src;
+
+#ifdef __GNUC__
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define LS >>
+#define RS <<
+#else
+#define LS <<
+#define RS >>
+#endif
+
+	typedef uint32_t __attribute__((__may_alias__)) u32;
+	uint32_t w, x;
+
+	for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
+
+	if ((uintptr_t)d % 4 == 0) {
+		for (; n>=16; s+=16, d+=16, n-=16) {
+			*(u32 *)(d+0) = *(u32 *)(s+0);
+			*(u32 *)(d+4) = *(u32 *)(s+4);
+			*(u32 *)(d+8) = *(u32 *)(s+8);
+			*(u32 *)(d+12) = *(u32 *)(s+12);
+		}
+		if (n&8) {
+			*(u32 *)(d+0) = *(u32 *)(s+0);
+			*(u32 *)(d+4) = *(u32 *)(s+4);
+			d += 8; s += 8;
+		}
+		if (n&4) {
+			*(u32 *)(d+0) = *(u32 *)(s+0);
+			d += 4; s += 4;
+		}
+		if (n&2) {
+			*d++ = *s++; *d++ = *s++;
+		}
+		if (n&1) {
+			*d = *s;
+		}
+		return dest;
+	}
+
+	if (n >= 32) switch ((uintptr_t)d % 4) {
+	case 1:
+		w = *(u32 *)s;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		n -= 3;
+		for (; n>=17; s+=16, d+=16, n-=16) {
+			x = *(u32 *)(s+1);
+			*(u32 *)(d+0) = (w LS 24) | (x RS 8);
+			w = *(u32 *)(s+5);
+			*(u32 *)(d+4) = (x LS 24) | (w RS 8);
+			x = *(u32 *)(s+9);
+			*(u32 *)(d+8) = (w LS 24) | (x RS 8);
+			w = *(u32 *)(s+13);
+			*(u32 *)(d+12) = (x LS 24) | (w RS 8);
+		}
+		break;
+	case 2:
+		w = *(u32 *)s;
+		*d++ = *s++;
+		*d++ = *s++;
+		n -= 2;
+		for (; n>=18; s+=16, d+=16, n-=16) {
+			x = *(u32 *)(s+2);
+			*(u32 *)(d+0) = (w LS 16) | (x RS 16);
+			w = *(u32 *)(s+6);
+			*(u32 *)(d+4) = (x LS 16) | (w RS 16);
+			x = *(u32 *)(s+10);
+			*(u32 *)(d+8) = (w LS 16) | (x RS 16);
+			w = *(u32 *)(s+14);
+			*(u32 *)(d+12) = (x LS 16) | (w RS 16);
+		}
+		break;
+	case 3:
+		w = *(u32 *)s;
+		*d++ = *s++;
+		n -= 1;
+		for (; n>=19; s+=16, d+=16, n-=16) {
+			x = *(u32 *)(s+3);
+			*(u32 *)(d+0) = (w LS 8) | (x RS 24);
+			w = *(u32 *)(s+7);
+			*(u32 *)(d+4) = (x LS 8) | (w RS 24);
+			x = *(u32 *)(s+11);
+			*(u32 *)(d+8) = (w LS 8) | (x RS 24);
+			w = *(u32 *)(s+15);
+			*(u32 *)(d+12) = (x LS 8) | (w RS 24);
+		}
+		break;
+	}
+	if (n&16) {
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+	}
+	if (n&8) {
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+	}
+	if (n&4) {
+		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
+	}
+	if (n&2) {
+		*d++ = *s++; *d++ = *s++;
+	}
+	if (n&1) {
+		*d = *s;
+	}
+	return dest;
+#endif
+
+	for (; n; n--) *d++ = *s++;
+	return dest;
+}
diff --git a/fusl/src/string/memmem.c b/fusl/src/string/memmem.c
new file mode 100644
index 0000000..d7e1221
--- /dev/null
+++ b/fusl/src/string/memmem.c
@@ -0,0 +1,148 @@
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdint.h>
+
+static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
+	for (h++, k--; k; k--, hw = hw<<8 | *++h)
+		if (hw == nw) return (char *)h-1;
+	return 0;
+}
+
+static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+	for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
+		if (hw == nw) return (char *)h-2;
+	return 0;
+}
+
+static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+	for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
+		if (hw == nw) return (char *)h-3;
+	return 0;
+}
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+#define BITOP(a,b,op) \
+ ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
+
+static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
+{
+	size_t i, ip, jp, k, p, ms, p0, mem, mem0;
+	size_t byteset[32 / sizeof(size_t)] = { 0 };
+	size_t shift[256];
+
+	/* Computing length of needle and fill shift table */
+	for (i=0; i<l; i++)
+		BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
+
+	/* Compute maximal suffix */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] > n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	ms = ip;
+	p0 = p;
+
+	/* And with the opposite comparison */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] < n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	if (ip+1 > ms+1) ms = ip;
+	else p = p0;
+
+	/* Periodic needle? */
+	if (memcmp(n, n+p, ms+1)) {
+		mem0 = 0;
+		p = MAX(ms, l-ms-1) + 1;
+	} else mem0 = l-p;
+	mem = 0;
+
+	/* Search loop */
+	for (;;) {
+		/* If remainder of haystack is shorter than needle, done */
+		if (z-h < l) return 0;
+
+		/* Check last byte first; advance by shift on mismatch */
+		if (BITOP(byteset, h[l-1], &)) {
+			k = l-shift[h[l-1]];
+			if (k) {
+				if (mem0 && mem && k < p) k = l-p;
+				h += k;
+				mem = 0;
+				continue;
+			}
+		} else {
+			h += l;
+			mem = 0;
+			continue;
+		}
+
+		/* Compare right half */
+		for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
+		if (k < l) {
+			h += k-ms;
+			mem = 0;
+			continue;
+		}
+		/* Compare left half */
+		for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
+		if (k <= mem) return (char *)h;
+		h += p;
+		mem = mem0;
+	}
+}
+
+void *memmem(const void *h0, size_t k, const void *n0, size_t l)
+{
+	const unsigned char *h = h0, *n = n0;
+
+	/* Return immediately on empty needle */
+	if (!l) return (void *)h;
+
+	/* Return immediately when needle is longer than haystack */
+	if (k<l) return 0;
+
+	/* Use faster algorithms for short needles */
+	h = memchr(h0, *n, k);
+	if (!h || l==1) return (void *)h;
+	k -= h - (const unsigned char *)h0;
+	if (l==2) return twobyte_memmem(h, k, n);
+	if (l==3) return threebyte_memmem(h, k, n);
+	if (l==4) return fourbyte_memmem(h, k, n);
+
+	return twoway_memmem(h, h+k, n, l);
+}
diff --git a/fusl/src/string/memmove.c b/fusl/src/string/memmove.c
new file mode 100644
index 0000000..27f670e
--- /dev/null
+++ b/fusl/src/string/memmove.c
@@ -0,0 +1,36 @@
+#include <string.h>
+#include <stdint.h>
+
+#define WT size_t
+#define WS (sizeof(WT))
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	char *d = dest;
+	const char *s = src;
+
+	if (d==s) return d;
+	if (s+n <= d || d+n <= s) return memcpy(d, s, n);
+
+	if (d<s) {
+		if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
+			while ((uintptr_t)d % WS) {
+				if (!n--) return dest;
+				*d++ = *s++;
+			}
+			for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
+		}
+		for (; n; n--) *d++ = *s++;
+	} else {
+		if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
+			while ((uintptr_t)(d+n) % WS) {
+				if (!n--) return dest;
+				d[n] = s[n];
+			}
+			while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
+		}
+		while (n) n--, d[n] = s[n];
+	}
+
+	return dest;
+}
diff --git a/fusl/src/string/mempcpy.c b/fusl/src/string/mempcpy.c
new file mode 100644
index 0000000..a297985
--- /dev/null
+++ b/fusl/src/string/mempcpy.c
@@ -0,0 +1,7 @@
+#define _GNU_SOURCE
+#include <string.h>
+
+void *mempcpy(void *dest, const void *src, size_t n)
+{
+	return (char *)memcpy(dest, src, n) + n;
+}
diff --git a/fusl/src/string/memrchr.c b/fusl/src/string/memrchr.c
new file mode 100644
index 0000000..a78e9d6
--- /dev/null
+++ b/fusl/src/string/memrchr.c
@@ -0,0 +1,12 @@
+#include <string.h>
+#include "libc.h"
+
+void *__memrchr(const void *m, int c, size_t n)
+{
+	const unsigned char *s = m;
+	c = (unsigned char)c;
+	while (n--) if (s[n]==c) return (void *)(s+n);
+	return 0;
+}
+
+weak_alias(__memrchr, memrchr);
diff --git a/fusl/src/string/memset.c b/fusl/src/string/memset.c
new file mode 100644
index 0000000..f438b07
--- /dev/null
+++ b/fusl/src/string/memset.c
@@ -0,0 +1,86 @@
+#include <string.h>
+#include <stdint.h>
+
+void *memset(void *dest, int c, size_t n)
+{
+	unsigned char *s = dest;
+	size_t k;
+
+	/* Fill head and tail with minimal branching. Each
+	 * conditional ensures that all the subsequently used
+	 * offsets are well-defined and in the dest region. */
+
+	if (!n) return dest;
+	s[0] = s[n-1] = c;
+	if (n <= 2) return dest;
+	s[1] = s[n-2] = c;
+	s[2] = s[n-3] = c;
+	if (n <= 6) return dest;
+	s[3] = s[n-4] = c;
+	if (n <= 8) return dest;
+
+	/* Advance pointer to align it at a 4-byte boundary,
+	 * and truncate n to a multiple of 4. The previous code
+	 * already took care of any head/tail that get cut off
+	 * by the alignment. */
+
+	k = -(uintptr_t)s & 3;
+	s += k;
+	n -= k;
+	n &= -4;
+
+#ifdef __GNUC__
+	typedef uint32_t __attribute__((__may_alias__)) u32;
+	typedef uint64_t __attribute__((__may_alias__)) u64;
+
+	u32 c32 = ((u32)-1)/255 * (unsigned char)c;
+
+	/* In preparation to copy 32 bytes at a time, aligned on
+	 * an 8-byte bounary, fill head/tail up to 28 bytes each.
+	 * As in the initial byte-based head/tail fill, each
+	 * conditional below ensures that the subsequent offsets
+	 * are valid (e.g. !(n<=24) implies n>=28). */
+
+	*(u32 *)(s+0) = c32;
+	*(u32 *)(s+n-4) = c32;
+	if (n <= 8) return dest;
+	*(u32 *)(s+4) = c32;
+	*(u32 *)(s+8) = c32;
+	*(u32 *)(s+n-12) = c32;
+	*(u32 *)(s+n-8) = c32;
+	if (n <= 24) return dest;
+	*(u32 *)(s+12) = c32;
+	*(u32 *)(s+16) = c32;
+	*(u32 *)(s+20) = c32;
+	*(u32 *)(s+24) = c32;
+	*(u32 *)(s+n-28) = c32;
+	*(u32 *)(s+n-24) = c32;
+	*(u32 *)(s+n-20) = c32;
+	*(u32 *)(s+n-16) = c32;
+
+	/* Align to a multiple of 8 so we can fill 64 bits at a time,
+	 * and avoid writing the same bytes twice as much as is
+	 * practical without introducing additional branching. */
+
+	k = 24 + ((uintptr_t)s & 4);
+	s += k;
+	n -= k;
+
+	/* If this loop is reached, 28 tail bytes have already been
+	 * filled, so any remainder when n drops below 32 can be
+	 * safely ignored. */
+
+	u64 c64 = c32 | ((u64)c32 << 32);
+	for (; n >= 32; n-=32, s+=32) {
+		*(u64 *)(s+0) = c64;
+		*(u64 *)(s+8) = c64;
+		*(u64 *)(s+16) = c64;
+		*(u64 *)(s+24) = c64;
+	}
+#else
+	/* Pure C fallback with no aliasing violations. */
+	for (; n; n--, s++) *s = c;
+#endif
+
+	return dest;
+}
diff --git a/fusl/src/string/rindex.c b/fusl/src/string/rindex.c
new file mode 100644
index 0000000..693c750
--- /dev/null
+++ b/fusl/src/string/rindex.c
@@ -0,0 +1,8 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <strings.h>
+
+char *rindex(const char *s, int c)
+{
+	return strrchr(s, c);
+}
diff --git a/fusl/src/string/stpcpy.c b/fusl/src/string/stpcpy.c
new file mode 100644
index 0000000..06623c4
--- /dev/null
+++ b/fusl/src/string/stpcpy.c
@@ -0,0 +1,28 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include "libc.h"
+
+#define ALIGN (sizeof(size_t))
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+char *__stpcpy(char *restrict d, const char *restrict s)
+{
+	size_t *wd;
+	const size_t *ws;
+
+	if ((uintptr_t)s % ALIGN == (uintptr_t)d % ALIGN) {
+		for (; (uintptr_t)s % ALIGN; s++, d++)
+			if (!(*d=*s)) return d;
+		wd=(void *)d; ws=(const void *)s;
+		for (; !HASZERO(*ws); *wd++ = *ws++);
+		d=(void *)wd; s=(const void *)ws;
+	}
+	for (; (*d=*s); s++, d++);
+
+	return d;
+}
+
+weak_alias(__stpcpy, stpcpy);
diff --git a/fusl/src/string/stpncpy.c b/fusl/src/string/stpncpy.c
new file mode 100644
index 0000000..1f57a4d
--- /dev/null
+++ b/fusl/src/string/stpncpy.c
@@ -0,0 +1,31 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include "libc.h"
+
+#define ALIGN (sizeof(size_t)-1)
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
+{
+	size_t *wd;
+	const size_t *ws;
+
+	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
+		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
+		if (!n || !*s) goto tail;
+		wd=(void *)d; ws=(const void *)s;
+		for (; n>=sizeof(size_t) && !HASZERO(*ws);
+		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
+		d=(void *)wd; s=(const void *)ws;
+	}
+	for (; n && (*d=*s); n--, s++, d++);
+tail:
+	memset(d, 0, n);
+	return d;
+}
+
+weak_alias(__stpncpy, stpncpy);
+
diff --git a/fusl/src/string/strcasecmp.c b/fusl/src/string/strcasecmp.c
new file mode 100644
index 0000000..3cd5f2d
--- /dev/null
+++ b/fusl/src/string/strcasecmp.c
@@ -0,0 +1,17 @@
+#include <strings.h>
+#include <ctype.h>
+#include "libc.h"
+
+int strcasecmp(const char *_l, const char *_r)
+{
+	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	for (; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++);
+	return tolower(*l) - tolower(*r);
+}
+
+int __strcasecmp_l(const char *l, const char *r, locale_t loc)
+{
+	return strcasecmp(l, r);
+}
+
+weak_alias(__strcasecmp_l, strcasecmp_l);
diff --git a/fusl/src/string/strcasestr.c b/fusl/src/string/strcasestr.c
new file mode 100644
index 0000000..af109f3
--- /dev/null
+++ b/fusl/src/string/strcasestr.c
@@ -0,0 +1,9 @@
+#define _GNU_SOURCE
+#include <string.h>
+
+char *strcasestr(const char *h, const char *n)
+{
+	size_t l = strlen(n);
+	for (; *h; h++) if (!strncasecmp(h, n, l)) return (char *)h;
+	return 0;
+}
diff --git a/fusl/src/string/strcat.c b/fusl/src/string/strcat.c
new file mode 100644
index 0000000..33f749b
--- /dev/null
+++ b/fusl/src/string/strcat.c
@@ -0,0 +1,7 @@
+#include <string.h>
+
+char *strcat(char *restrict dest, const char *restrict src)
+{
+	strcpy(dest + strlen(dest), src);
+	return dest;
+}
diff --git a/fusl/src/string/strchr.c b/fusl/src/string/strchr.c
new file mode 100644
index 0000000..bfae8f9
--- /dev/null
+++ b/fusl/src/string/strchr.c
@@ -0,0 +1,9 @@
+#include <string.h>
+
+char *__strchrnul(const char *, int);
+
+char *strchr(const char *s, int c)
+{
+	char *r = __strchrnul(s, c);
+	return *(unsigned char *)r == (unsigned char)c ? r : 0;
+}
diff --git a/fusl/src/string/strchrnul.c b/fusl/src/string/strchrnul.c
new file mode 100644
index 0000000..05700ad
--- /dev/null
+++ b/fusl/src/string/strchrnul.c
@@ -0,0 +1,26 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include "libc.h"
+
+#define ALIGN (sizeof(size_t))
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+char *__strchrnul(const char *s, int c)
+{
+	size_t *w, k;
+
+	c = (unsigned char)c;
+	if (!c) return (char *)s + strlen(s);
+
+	for (; (uintptr_t)s % ALIGN; s++)
+		if (!*s || *(unsigned char *)s == c) return (char *)s;
+	k = ONES * c;
+	for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
+	for (s = (void *)w; *s && *(unsigned char *)s != c; s++);
+	return (char *)s;
+}
+
+weak_alias(__strchrnul, strchrnul);
diff --git a/fusl/src/string/strcmp.c b/fusl/src/string/strcmp.c
new file mode 100644
index 0000000..808bd83
--- /dev/null
+++ b/fusl/src/string/strcmp.c
@@ -0,0 +1,7 @@
+#include <string.h>
+
+int strcmp(const char *l, const char *r)
+{
+	for (; *l==*r && *l; l++, r++);
+	return *(unsigned char *)l - *(unsigned char *)r;
+}
diff --git a/fusl/src/string/strcpy.c b/fusl/src/string/strcpy.c
new file mode 100644
index 0000000..f7e3ba3
--- /dev/null
+++ b/fusl/src/string/strcpy.c
@@ -0,0 +1,16 @@
+#include <string.h>
+
+char *__stpcpy(char *, const char *);
+
+char *strcpy(char *restrict dest, const char *restrict src)
+{
+#if 1
+	__stpcpy(dest, src);
+	return dest;
+#else
+	const unsigned char *s = src;
+	unsigned char *d = dest;
+	while ((*d++ = *s++));
+	return dest;
+#endif
+}
diff --git a/fusl/src/string/strcspn.c b/fusl/src/string/strcspn.c
new file mode 100644
index 0000000..cfdba11
--- /dev/null
+++ b/fusl/src/string/strcspn.c
@@ -0,0 +1,19 @@
+#include <string.h>
+
+#define BITOP(a,b,op) \
+ ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
+
+char *__strchrnul(const char *, int);
+
+size_t strcspn(const char *s, const char *c)
+{
+	const char *a = s;
+	size_t byteset[32/sizeof(size_t)];
+
+	if (!c[0] || !c[1]) return __strchrnul(s, *c)-a;
+
+	memset(byteset, 0, sizeof byteset);
+	for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++);
+	for (; *s && !BITOP(byteset, *(unsigned char *)s, &); s++);
+	return s-a;
+}
diff --git a/fusl/src/string/strdup.c b/fusl/src/string/strdup.c
new file mode 100644
index 0000000..dd5f80c
--- /dev/null
+++ b/fusl/src/string/strdup.c
@@ -0,0 +1,13 @@
+#include <stdlib.h>
+#include <string.h>
+#include "libc.h"
+
+char *__strdup(const char *s)
+{
+	size_t l = strlen(s);
+	char *d = malloc(l+1);
+	if (!d) return NULL;
+	return memcpy(d, s, l+1);
+}
+
+weak_alias(__strdup, strdup);
diff --git a/fusl/src/string/strerror_r.c b/fusl/src/string/strerror_r.c
new file mode 100644
index 0000000..da26b4f
--- /dev/null
+++ b/fusl/src/string/strerror_r.c
@@ -0,0 +1,20 @@
+#include <string.h>
+#include <errno.h>
+#include "libc.h"
+
+int strerror_r(int err, char *buf, size_t buflen)
+{
+	char *msg = strerror(err);
+	size_t l = strlen(msg);
+	if (l >= buflen) {
+		if (buflen) {
+			memcpy(buf, msg, buflen-1);
+			buf[buflen-1] = 0;
+		}
+		return ERANGE;
+	}
+	memcpy(buf, msg, l+1);
+	return 0;
+}
+
+weak_alias(strerror_r, __xpg_strerror_r);
diff --git a/fusl/src/string/strlcat.c b/fusl/src/string/strlcat.c
new file mode 100644
index 0000000..ef81209
--- /dev/null
+++ b/fusl/src/string/strlcat.c
@@ -0,0 +1,9 @@
+#define _BSD_SOURCE
+#include <string.h>
+
+size_t strlcat(char *d, const char *s, size_t n)
+{
+	size_t l = strnlen(d, n);
+	if (l == n) return l + strlen(s);
+	return l + strlcpy(d+l, s, n-l);
+}
diff --git a/fusl/src/string/strlcpy.c b/fusl/src/string/strlcpy.c
new file mode 100644
index 0000000..193d724
--- /dev/null
+++ b/fusl/src/string/strlcpy.c
@@ -0,0 +1,32 @@
+#define _BSD_SOURCE
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include "libc.h"
+
+#define ALIGN (sizeof(size_t)-1)
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+size_t strlcpy(char *d, const char *s, size_t n)
+{
+	char *d0 = d;
+	size_t *wd;
+	const size_t *ws;
+
+	if (!n--) goto finish;
+	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
+		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
+		if (n && *s) {
+			wd=(void *)d; ws=(const void *)s;
+			for (; n>=sizeof(size_t) && !HASZERO(*ws);
+			       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
+			d=(void *)wd; s=(const void *)ws;
+		}
+	}
+	for (; n && (*d=*s); n--, s++, d++);
+	*d = 0;
+finish:
+	return d-d0 + strlen(s);
+}
diff --git a/fusl/src/string/strlen.c b/fusl/src/string/strlen.c
new file mode 100644
index 0000000..929ddcb
--- /dev/null
+++ b/fusl/src/string/strlen.c
@@ -0,0 +1,18 @@
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define ALIGN (sizeof(size_t))
+#define ONES ((size_t)-1/UCHAR_MAX)
+#define HIGHS (ONES * (UCHAR_MAX/2+1))
+#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+
+size_t strlen(const char *s)
+{
+	const char *a = s;
+	const size_t *w;
+	for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a;
+	for (w = (const void *)s; !HASZERO(*w); w++);
+	for (s = (const void *)w; *s; s++);
+	return s-a;
+}
diff --git a/fusl/src/string/strncasecmp.c b/fusl/src/string/strncasecmp.c
new file mode 100644
index 0000000..3af5300
--- /dev/null
+++ b/fusl/src/string/strncasecmp.c
@@ -0,0 +1,18 @@
+#include <strings.h>
+#include <ctype.h>
+#include "libc.h"
+
+int strncasecmp(const char *_l, const char *_r, size_t n)
+{
+	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	if (!n--) return 0;
+	for (; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--);
+	return tolower(*l) - tolower(*r);
+}
+
+int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t loc)
+{
+	return strncasecmp(l, r, n);
+}
+
+weak_alias(__strncasecmp_l, strncasecmp_l);
diff --git a/fusl/src/string/strncat.c b/fusl/src/string/strncat.c
new file mode 100644
index 0000000..01ca2a2
--- /dev/null
+++ b/fusl/src/string/strncat.c
@@ -0,0 +1,10 @@
+#include <string.h>
+
+char *strncat(char *restrict d, const char *restrict s, size_t n)
+{
+	char *a = d;
+	d += strlen(d);
+	while (n && *s) n--, *d++ = *s++;
+	*d++ = 0;
+	return a;
+}
diff --git a/fusl/src/string/strncmp.c b/fusl/src/string/strncmp.c
new file mode 100644
index 0000000..e228843
--- /dev/null
+++ b/fusl/src/string/strncmp.c
@@ -0,0 +1,9 @@
+#include <string.h>
+
+int strncmp(const char *_l, const char *_r, size_t n)
+{
+	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	if (!n--) return 0;
+	for (; *l && *r && n && *l == *r ; l++, r++, n--);
+	return *l - *r;
+}
diff --git a/fusl/src/string/strncpy.c b/fusl/src/string/strncpy.c
new file mode 100644
index 0000000..441ba03
--- /dev/null
+++ b/fusl/src/string/strncpy.c
@@ -0,0 +1,9 @@
+#include <string.h>
+
+char *__stpncpy(char *, const char *, size_t);
+
+char *strncpy(char *restrict d, const char *restrict s, size_t n)
+{
+	__stpncpy(d, s, n);
+	return d;
+}
diff --git a/fusl/src/string/strndup.c b/fusl/src/string/strndup.c
new file mode 100644
index 0000000..617d27b
--- /dev/null
+++ b/fusl/src/string/strndup.c
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+#include <string.h>
+
+char *strndup(const char *s, size_t n)
+{
+	size_t l = strnlen(s, n);
+	char *d = malloc(l+1);
+	if (!d) return NULL;
+	memcpy(d, s, l);
+	d[l] = 0;
+	return d;
+}
diff --git a/fusl/src/string/strnlen.c b/fusl/src/string/strnlen.c
new file mode 100644
index 0000000..6442eb7
--- /dev/null
+++ b/fusl/src/string/strnlen.c
@@ -0,0 +1,7 @@
+#include <string.h>
+
+size_t strnlen(const char *s, size_t n)
+{
+	const char *p = memchr(s, 0, n);
+	return p ? p-s : n;
+}
diff --git a/fusl/src/string/strpbrk.c b/fusl/src/string/strpbrk.c
new file mode 100644
index 0000000..55947c6
--- /dev/null
+++ b/fusl/src/string/strpbrk.c
@@ -0,0 +1,7 @@
+#include <string.h>
+
+char *strpbrk(const char *s, const char *b)
+{
+	s += strcspn(s, b);
+	return *s ? (char *)s : 0;
+}
diff --git a/fusl/src/string/strrchr.c b/fusl/src/string/strrchr.c
new file mode 100644
index 0000000..635fb3c
--- /dev/null
+++ b/fusl/src/string/strrchr.c
@@ -0,0 +1,8 @@
+#include <string.h>
+
+void *__memrchr(const void *, int, size_t);
+
+char *strrchr(const char *s, int c)
+{
+	return __memrchr(s, c, strlen(s) + 1);
+}
diff --git a/fusl/src/string/strsep.c b/fusl/src/string/strsep.c
new file mode 100644
index 0000000..cb37c32
--- /dev/null
+++ b/fusl/src/string/strsep.c
@@ -0,0 +1,13 @@
+#define _GNU_SOURCE
+#include <string.h>
+
+char *strsep(char **str, const char *sep)
+{
+	char *s = *str, *end;
+	if (!s) return NULL;
+	end = s + strcspn(s, sep);
+	if (*end) *end++ = 0;
+	else end = 0;
+	*str = end;
+	return s;
+}
diff --git a/fusl/src/string/strsignal.c b/fusl/src/string/strsignal.c
new file mode 100644
index 0000000..96bfe84
--- /dev/null
+++ b/fusl/src/string/strsignal.c
@@ -0,0 +1,116 @@
+#include <signal.h>
+#include <string.h>
+#include "locale_impl.h"
+
+#if (SIGHUP == 1) && (SIGINT == 2) && (SIGQUIT == 3) && (SIGILL == 4) \
+ && (SIGTRAP == 5) && (SIGABRT == 6) && (SIGBUS == 7) && (SIGFPE == 8) \
+ && (SIGKILL == 9) && (SIGUSR1 == 10) && (SIGSEGV == 11) && (SIGUSR2 == 12) \
+ && (SIGPIPE == 13) && (SIGALRM == 14) && (SIGTERM == 15) && (SIGSTKFLT == 16) \
+ && (SIGCHLD == 17) && (SIGCONT == 18) && (SIGSTOP == 19) && (SIGTSTP == 20) \
+ && (SIGTTIN == 21) && (SIGTTOU == 22) && (SIGURG == 23) && (SIGXCPU == 24) \
+ && (SIGXFSZ == 25) && (SIGVTALRM == 26) && (SIGPROF == 27) && (SIGWINCH == 28) \
+ && (SIGPOLL == 29) && (SIGPWR == 30) && (SIGSYS == 31)
+
+#define sigmap(x) x
+
+#else
+
+static const char map[] = {
+	[SIGHUP]    = 1,
+	[SIGINT]    = 2,
+	[SIGQUIT]   = 3,
+	[SIGILL]    = 4,
+	[SIGTRAP]   = 5,
+	[SIGABRT]   = 6,
+	[SIGBUS]    = 7,
+	[SIGFPE]    = 8,
+	[SIGKILL]   = 9,
+	[SIGUSR1]   = 10,
+	[SIGSEGV]   = 11,
+	[SIGUSR2]   = 12,
+	[SIGPIPE]   = 13,
+	[SIGALRM]   = 14,
+	[SIGTERM]   = 15,
+	[SIGSTKFLT] = 16,
+	[SIGCHLD]   = 17,
+	[SIGCONT]   = 18,
+	[SIGSTOP]   = 19,
+	[SIGTSTP]   = 20,
+	[SIGTTIN]   = 21,
+	[SIGTTOU]   = 22,
+	[SIGURG]    = 23,
+	[SIGXCPU]   = 24,
+	[SIGXFSZ]   = 25,
+	[SIGVTALRM] = 26,
+	[SIGPROF]   = 27,
+	[SIGWINCH]  = 28,
+	[SIGPOLL]   = 29,
+	[SIGPWR]    = 30,
+	[SIGSYS]    = 31
+};
+
+#define sigmap(x) ((x) >= sizeof map ? (x) : map[(x)])
+
+#endif
+
+static const char strings[] =
+	"Unknown signal\0"
+	"Hangup\0"
+	"Interrupt\0"
+	"Quit\0"
+	"Illegal instruction\0"
+	"Trace/breakpoint trap\0"
+	"Aborted\0"
+	"Bus error\0"
+	"Arithmetic exception\0"
+	"Killed\0"
+	"User defined signal 1\0"
+	"Segmentation fault\0"
+	"User defined signal 2\0"
+	"Broken pipe\0"
+	"Alarm clock\0"
+	"Terminated\0"
+	"Stack fault\0"
+	"Child process status\0"
+	"Continued\0"
+	"Stopped (signal)\0"
+	"Stopped\0"
+	"Stopped (tty input)\0"
+	"Stopped (tty output)\0"
+	"Urgent I/O condition\0"
+	"CPU time limit exceeded\0"
+	"File size limit exceeded\0"
+	"Virtual timer expired\0"
+	"Profiling timer expired\0"
+	"Window changed\0"
+	"I/O possible\0"
+	"Power failure\0"
+	"Bad system call\0"
+	"RT32"
+	"\0RT33\0RT34\0RT35\0RT36\0RT37\0RT38\0RT39\0RT40"
+	"\0RT41\0RT42\0RT43\0RT44\0RT45\0RT46\0RT47\0RT48"
+	"\0RT49\0RT50\0RT51\0RT52\0RT53\0RT54\0RT55\0RT56"
+	"\0RT57\0RT58\0RT59\0RT60\0RT61\0RT62\0RT63\0RT64"
+#if _NSIG > 65
+	"\0RT65\0RT66\0RT67\0RT68\0RT69\0RT70\0RT71\0RT72"
+	"\0RT73\0RT74\0RT75\0RT76\0RT77\0RT78\0RT79\0RT80"
+	"\0RT81\0RT82\0RT83\0RT84\0RT85\0RT86\0RT87\0RT88"
+	"\0RT89\0RT90\0RT91\0RT92\0RT93\0RT94\0RT95\0RT96"
+	"\0RT97\0RT98\0RT99\0RT100\0RT101\0RT102\0RT103\0RT104"
+	"\0RT105\0RT106\0RT107\0RT108\0RT109\0RT110\0RT111\0RT112"
+	"\0RT113\0RT114\0RT115\0RT116\0RT117\0RT118\0RT119\0RT120"
+	"\0RT121\0RT122\0RT123\0RT124\0RT125\0RT126\0RT127\0RT128"
+#endif
+	"";
+
+char *strsignal(int signum)
+{
+	const char *s = strings;
+
+	signum = sigmap(signum);
+	if (signum - 1U >= _NSIG-1) signum = 0;
+
+	for (; signum--; s++) for (; *s; s++);
+
+	return (char *)LCTRANS_CUR(s);
+}
diff --git a/fusl/src/string/strspn.c b/fusl/src/string/strspn.c
new file mode 100644
index 0000000..9543dad
--- /dev/null
+++ b/fusl/src/string/strspn.c
@@ -0,0 +1,20 @@
+#include <string.h>
+
+#define BITOP(a,b,op) \
+ ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
+
+size_t strspn(const char *s, const char *c)
+{
+	const char *a = s;
+	size_t byteset[32/sizeof(size_t)] = { 0 };
+
+	if (!c[0]) return 0;
+	if (!c[1]) {
+		for (; *s == *c; s++);
+		return s-a;
+	}
+
+	for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++);
+	for (; *s && BITOP(byteset, *(unsigned char *)s, &); s++);
+	return s-a;
+}
diff --git a/fusl/src/string/strstr.c b/fusl/src/string/strstr.c
new file mode 100644
index 0000000..cd06912
--- /dev/null
+++ b/fusl/src/string/strstr.c
@@ -0,0 +1,155 @@
+#include <string.h>
+#include <stdint.h>
+
+static char *twobyte_strstr(const unsigned char *h, const unsigned char *n)
+{
+	uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
+	for (h++; *h && hw != nw; hw = hw<<8 | *++h);
+	return *h ? (char *)h-1 : 0;
+}
+
+static char *threebyte_strstr(const unsigned char *h, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+	for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8);
+	return *h ? (char *)h-2 : 0;
+}
+
+static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n)
+{
+	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+	for (h+=3; *h && hw != nw; hw = hw<<8 | *++h);
+	return *h ? (char *)h-3 : 0;
+}
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+#define BITOP(a,b,op) \
+ ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
+
+static char *twoway_strstr(const unsigned char *h, const unsigned char *n)
+{
+	const unsigned char *z;
+	size_t l, ip, jp, k, p, ms, p0, mem, mem0;
+	size_t byteset[32 / sizeof(size_t)] = { 0 };
+	size_t shift[256];
+
+	/* Computing length of needle and fill shift table */
+	for (l=0; n[l] && h[l]; l++)
+		BITOP(byteset, n[l], |=), shift[n[l]] = l+1;
+	if (n[l]) return 0; /* hit the end of h */
+
+	/* Compute maximal suffix */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] > n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	ms = ip;
+	p0 = p;
+
+	/* And with the opposite comparison */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] < n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	if (ip+1 > ms+1) ms = ip;
+	else p = p0;
+
+	/* Periodic needle? */
+	if (memcmp(n, n+p, ms+1)) {
+		mem0 = 0;
+		p = MAX(ms, l-ms-1) + 1;
+	} else mem0 = l-p;
+	mem = 0;
+
+	/* Initialize incremental end-of-haystack pointer */
+	z = h;
+
+	/* Search loop */
+	for (;;) {
+		/* Update incremental end-of-haystack pointer */
+		if (z-h < l) {
+			/* Fast estimate for MIN(l,63) */
+			size_t grow = l | 63;
+			const unsigned char *z2 = memchr(z, 0, grow);
+			if (z2) {
+				z = z2;
+				if (z-h < l) return 0;
+			} else z += grow;
+		}
+
+		/* Check last byte first; advance by shift on mismatch */
+		if (BITOP(byteset, h[l-1], &)) {
+			k = l-shift[h[l-1]];
+			//printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, shift[h[l-1]], l);
+			if (k) {
+				if (mem0 && mem && k < p) k = l-p;
+				h += k;
+				mem = 0;
+				continue;
+			}
+		} else {
+			h += l;
+			mem = 0;
+			continue;
+		}
+
+		/* Compare right half */
+		for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++);
+		if (n[k]) {
+			h += k-ms;
+			mem = 0;
+			continue;
+		}
+		/* Compare left half */
+		for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
+		if (k <= mem) return (char *)h;
+		h += p;
+		mem = mem0;
+	}
+}
+
+char *strstr(const char *h, const char *n)
+{
+	/* Return immediately on empty needle */
+	if (!n[0]) return (char *)h;
+
+	/* Use faster algorithms for short needles */
+	h = strchr(h, *n);
+	if (!h || !n[1]) return (char *)h;
+	if (!h[1]) return 0;
+	if (!n[2]) return twobyte_strstr((void *)h, (void *)n);
+	if (!h[2]) return 0;
+	if (!n[3]) return threebyte_strstr((void *)h, (void *)n);
+	if (!h[3]) return 0;
+	if (!n[4]) return fourbyte_strstr((void *)h, (void *)n);
+
+	return twoway_strstr((void *)h, (void *)n);
+}
diff --git a/fusl/src/string/strtok.c b/fusl/src/string/strtok.c
new file mode 100644
index 0000000..3508790
--- /dev/null
+++ b/fusl/src/string/strtok.c
@@ -0,0 +1,13 @@
+#include <string.h>
+
+char *strtok(char *restrict s, const char *restrict sep)
+{
+	static char *p;
+	if (!s && !(s = p)) return NULL;
+	s += strspn(s, sep);
+	if (!*s) return p = 0;
+	p = s + strcspn(s, sep);
+	if (*p) *p++ = 0;
+	else p = 0;
+	return s;
+}
diff --git a/fusl/src/string/strtok_r.c b/fusl/src/string/strtok_r.c
new file mode 100644
index 0000000..862d4fe
--- /dev/null
+++ b/fusl/src/string/strtok_r.c
@@ -0,0 +1,12 @@
+#include <string.h>
+
+char *strtok_r(char *restrict s, const char *restrict sep, char **restrict p)
+{
+	if (!s && !(s = *p)) return NULL;
+	s += strspn(s, sep);
+	if (!*s) return *p = 0;
+	*p = s + strcspn(s, sep);
+	if (**p) *(*p)++ = 0;
+	else *p = 0;
+	return s;
+}
diff --git a/fusl/src/string/strverscmp.c b/fusl/src/string/strverscmp.c
new file mode 100644
index 0000000..4daf276
--- /dev/null
+++ b/fusl/src/string/strverscmp.c
@@ -0,0 +1,34 @@
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <string.h>
+
+int strverscmp(const char *l0, const char *r0)
+{
+	const unsigned char *l = (const void *)l0;
+	const unsigned char *r = (const void *)r0;
+	size_t i, dp, j;
+	int z = 1;
+
+	/* Find maximal matching prefix and track its maximal digit
+	 * suffix and whether those digits are all zeros. */
+	for (dp=i=0; l[i]==r[i]; i++) {
+		int c = l[i];
+		if (!c) return 0;
+		if (!isdigit(c)) dp=i+1, z=1;
+		else if (c!='0') z=0;
+	}
+
+	if (l[dp]!='0' && r[dp]!='0') {
+		/* If we're not looking at a digit sequence that began
+		 * with a zero, longest digit string is greater. */
+		for (j=i; isdigit(l[j]); j++)
+			if (!isdigit(r[j])) return 1;
+		if (isdigit(r[j])) return -1;
+	} else if (z && dp<i && (isdigit(l[i]) || isdigit(r[i]))) {
+		/* Otherwise, if common prefix of digit sequence is
+		 * all zeros, digits order less than non-digits. */
+		return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
+	}
+
+	return l[i] - r[i];
+}
diff --git a/fusl/src/string/swab.c b/fusl/src/string/swab.c
new file mode 100644
index 0000000..ace0f46
--- /dev/null
+++ b/fusl/src/string/swab.c
@@ -0,0 +1,13 @@
+#include <unistd.h>
+
+void swab(const void *restrict _src, void *restrict _dest, ssize_t n)
+{
+	const char *src = _src;
+	char *dest = _dest;
+	for (; n>1; n-=2) {
+		dest[0] = src[1];
+		dest[1] = src[0];
+		dest += 2;
+		src += 2;
+	}
+}
diff --git a/fusl/src/string/wcpcpy.c b/fusl/src/string/wcpcpy.c
new file mode 100644
index 0000000..ef40134
--- /dev/null
+++ b/fusl/src/string/wcpcpy.c
@@ -0,0 +1,6 @@
+#include <wchar.h>
+
+wchar_t *wcpcpy(wchar_t *restrict d, const wchar_t *restrict s)
+{
+	return wcscpy(d, s) + wcslen(s);
+}
diff --git a/fusl/src/string/wcpncpy.c b/fusl/src/string/wcpncpy.c
new file mode 100644
index 0000000..b667f6d
--- /dev/null
+++ b/fusl/src/string/wcpncpy.c
@@ -0,0 +1,6 @@
+#include <wchar.h>
+
+wchar_t *wcpncpy(wchar_t *restrict d, const wchar_t *restrict s, size_t n)
+{
+	return wcsncpy(d, s, n) + wcsnlen(s, n);
+}
diff --git a/fusl/src/string/wcscasecmp.c b/fusl/src/string/wcscasecmp.c
new file mode 100644
index 0000000..3edeec7
--- /dev/null
+++ b/fusl/src/string/wcscasecmp.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+#include <wctype.h>
+
+int wcscasecmp(const wchar_t *l, const wchar_t *r)
+{
+	return wcsncasecmp(l, r, -1);
+}
diff --git a/fusl/src/string/wcscasecmp_l.c b/fusl/src/string/wcscasecmp_l.c
new file mode 100644
index 0000000..065dd0a
--- /dev/null
+++ b/fusl/src/string/wcscasecmp_l.c
@@ -0,0 +1,6 @@
+#include <wchar.h>
+
+int wcscasecmp_l(const wchar_t *l, const wchar_t *r, locale_t locale)
+{
+	return wcscasecmp(l, r);
+}
diff --git a/fusl/src/string/wcscat.c b/fusl/src/string/wcscat.c
new file mode 100644
index 0000000..d4f00eb
--- /dev/null
+++ b/fusl/src/string/wcscat.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+wchar_t *wcscat(wchar_t *restrict dest, const wchar_t *restrict src)
+{
+	wcscpy(dest + wcslen(dest), src);
+	return dest;
+}
diff --git a/fusl/src/string/wcschr.c b/fusl/src/string/wcschr.c
new file mode 100644
index 0000000..8dfc2f3
--- /dev/null
+++ b/fusl/src/string/wcschr.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+wchar_t *wcschr(const wchar_t *s, wchar_t c)
+{
+	if (!c) return (wchar_t *)s + wcslen(s);
+	for (; *s && *s != c; s++);
+	return *s ? (wchar_t *)s : 0;
+}
diff --git a/fusl/src/string/wcscmp.c b/fusl/src/string/wcscmp.c
new file mode 100644
index 0000000..26eeee7
--- /dev/null
+++ b/fusl/src/string/wcscmp.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+int wcscmp(const wchar_t *l, const wchar_t *r)
+{
+	for (; *l==*r && *l && *r; l++, r++);
+	return *l - *r;
+}
diff --git a/fusl/src/string/wcscpy.c b/fusl/src/string/wcscpy.c
new file mode 100644
index 0000000..625bf53
--- /dev/null
+++ b/fusl/src/string/wcscpy.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+wchar_t *wcscpy(wchar_t *restrict d, const wchar_t *restrict s)
+{
+	wchar_t *a = d;
+	while ((*d++ = *s++));
+	return a;
+}
diff --git a/fusl/src/string/wcscspn.c b/fusl/src/string/wcscspn.c
new file mode 100644
index 0000000..c4e5272
--- /dev/null
+++ b/fusl/src/string/wcscspn.c
@@ -0,0 +1,10 @@
+#include <wchar.h>
+
+size_t wcscspn(const wchar_t *s, const wchar_t *c)
+{
+	const wchar_t *a;
+	if (!c[0]) return wcslen(s);
+	if (!c[1]) return (s=wcschr(a=s, *c)) ? s-a : wcslen(a);
+	for (a=s; *s && !wcschr(c, *s); s++);
+	return s-a;
+}
diff --git a/fusl/src/string/wcsdup.c b/fusl/src/string/wcsdup.c
new file mode 100644
index 0000000..dd49c1b
--- /dev/null
+++ b/fusl/src/string/wcsdup.c
@@ -0,0 +1,11 @@
+#include <stdlib.h>
+#include <wchar.h>
+#include "libc.h"
+
+wchar_t *wcsdup(const wchar_t *s)
+{
+	size_t l = wcslen(s);
+	wchar_t *d = malloc((l+1)*sizeof(wchar_t));
+	if (!d) return NULL;
+	return wmemcpy(d, s, l+1);
+}
diff --git a/fusl/src/string/wcslen.c b/fusl/src/string/wcslen.c
new file mode 100644
index 0000000..1b7b665
--- /dev/null
+++ b/fusl/src/string/wcslen.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+size_t wcslen(const wchar_t *s)
+{
+	const wchar_t *a;
+	for (a=s; *s; s++);
+	return s-a;
+}
diff --git a/fusl/src/string/wcsncasecmp.c b/fusl/src/string/wcsncasecmp.c
new file mode 100644
index 0000000..8fefe79
--- /dev/null
+++ b/fusl/src/string/wcsncasecmp.c
@@ -0,0 +1,9 @@
+#include <wchar.h>
+#include <wctype.h>
+
+int wcsncasecmp(const wchar_t *l, const wchar_t *r, size_t n)
+{
+	if (!n--) return 0;
+	for (; *l && *r && n && (*l == *r || towlower(*l) == towlower(*r)); l++, r++, n--);
+	return towlower(*l) - towlower(*r);
+}
diff --git a/fusl/src/string/wcsncasecmp_l.c b/fusl/src/string/wcsncasecmp_l.c
new file mode 100644
index 0000000..6387248
--- /dev/null
+++ b/fusl/src/string/wcsncasecmp_l.c
@@ -0,0 +1,6 @@
+#include <wchar.h>
+
+int wcsncasecmp_l(const wchar_t *l, const wchar_t *r, size_t n, locale_t locale)
+{
+	return wcsncasecmp(l, r, n);
+}
diff --git a/fusl/src/string/wcsncat.c b/fusl/src/string/wcsncat.c
new file mode 100644
index 0000000..8563f1a
--- /dev/null
+++ b/fusl/src/string/wcsncat.c
@@ -0,0 +1,10 @@
+#include <wchar.h>
+
+wchar_t *wcsncat(wchar_t *restrict d, const wchar_t *restrict s, size_t n)
+{
+	wchar_t *a = d;
+	d += wcslen(d);
+	while (n && *s) n--, *d++ = *s++;
+	*d++ = 0;
+	return a;
+}
diff --git a/fusl/src/string/wcsncmp.c b/fusl/src/string/wcsncmp.c
new file mode 100644
index 0000000..4ab32a9
--- /dev/null
+++ b/fusl/src/string/wcsncmp.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+int wcsncmp(const wchar_t *l, const wchar_t *r, size_t n)
+{
+	for (; n && *l==*r && *l && *r; n--, l++, r++);
+	return n ? *l - *r : 0;
+}
diff --git a/fusl/src/string/wcsncpy.c b/fusl/src/string/wcsncpy.c
new file mode 100644
index 0000000..4bede04
--- /dev/null
+++ b/fusl/src/string/wcsncpy.c
@@ -0,0 +1,9 @@
+#include <wchar.h>
+
+wchar_t *wcsncpy(wchar_t *restrict d, const wchar_t *restrict s, size_t n)
+{
+	wchar_t *a = d;
+	while (n && *s) n--, *d++ = *s++;
+	wmemset(d, 0, n);
+	return a;
+}
diff --git a/fusl/src/string/wcsnlen.c b/fusl/src/string/wcsnlen.c
new file mode 100644
index 0000000..a776337
--- /dev/null
+++ b/fusl/src/string/wcsnlen.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+size_t wcsnlen(const wchar_t *s, size_t n)
+{
+	const wchar_t *z = wmemchr(s, 0, n);
+	if (z) n = z-s;
+	return n;
+}
diff --git a/fusl/src/string/wcspbrk.c b/fusl/src/string/wcspbrk.c
new file mode 100644
index 0000000..0c72c19
--- /dev/null
+++ b/fusl/src/string/wcspbrk.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+wchar_t *wcspbrk(const wchar_t *s, const wchar_t *b)
+{
+	s += wcscspn(s, b);
+	return *s ? (wchar_t *)s : NULL;
+}
diff --git a/fusl/src/string/wcsrchr.c b/fusl/src/string/wcsrchr.c
new file mode 100644
index 0000000..8961b9e
--- /dev/null
+++ b/fusl/src/string/wcsrchr.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+wchar_t *wcsrchr(const wchar_t *s, wchar_t c)
+{
+	const wchar_t *p;
+	for (p=s+wcslen(s); p>=s && *p!=c; p--);
+	return p>=s ? (wchar_t *)p : 0;
+}
diff --git a/fusl/src/string/wcsspn.c b/fusl/src/string/wcsspn.c
new file mode 100644
index 0000000..4320d8f
--- /dev/null
+++ b/fusl/src/string/wcsspn.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+size_t wcsspn(const wchar_t *s, const wchar_t *c)
+{
+	const wchar_t *a;
+	for (a=s; *s && wcschr(c, *s); s++);
+	return s-a;
+}
diff --git a/fusl/src/string/wcsstr.c b/fusl/src/string/wcsstr.c
new file mode 100644
index 0000000..4caaef3
--- /dev/null
+++ b/fusl/src/string/wcsstr.c
@@ -0,0 +1,105 @@
+#include <wchar.h>
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+static wchar_t *twoway_wcsstr(const wchar_t *h, const wchar_t *n)
+{
+	const wchar_t *z;
+	size_t l, ip, jp, k, p, ms, p0, mem, mem0;
+
+	/* Computing length of needle */
+	for (l=0; n[l] && h[l]; l++);
+	if (n[l]) return 0; /* hit the end of h */
+
+	/* Compute maximal suffix */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] > n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	ms = ip;
+	p0 = p;
+
+	/* And with the opposite comparison */
+	ip = -1; jp = 0; k = p = 1;
+	while (jp+k<l) {
+		if (n[ip+k] == n[jp+k]) {
+			if (k == p) {
+				jp += p;
+				k = 1;
+			} else k++;
+		} else if (n[ip+k] < n[jp+k]) {
+			jp += k;
+			k = 1;
+			p = jp - ip;
+		} else {
+			ip = jp++;
+			k = p = 1;
+		}
+	}
+	if (ip+1 > ms+1) ms = ip;
+	else p = p0;
+
+	/* Periodic needle? */
+	if (wmemcmp(n, n+p, ms+1)) {
+		mem0 = 0;
+		p = MAX(ms, l-ms-1) + 1;
+	} else mem0 = l-p;
+	mem = 0;
+
+	/* Initialize incremental end-of-haystack pointer */
+	z = h;
+
+	/* Search loop */
+	for (;;) {
+		/* Update incremental end-of-haystack pointer */
+		if (z-h < l) {
+			/* Fast estimate for MIN(l,63) */
+			size_t grow = l | 63;
+			const wchar_t *z2 = wmemchr(z, 0, grow);
+			if (z2) {
+				z = z2;
+				if (z-h < l) return 0;
+			} else z += grow;
+		}
+
+		/* Compare right half */
+		for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++);
+		if (n[k]) {
+			h += k-ms;
+			mem = 0;
+			continue;
+		}
+		/* Compare left half */
+		for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
+		if (k <= mem) return (wchar_t *)h;
+		h += p;
+		mem = mem0;
+	}
+}
+
+wchar_t *wcsstr(const wchar_t *restrict h, const wchar_t *restrict n)
+{
+	/* Return immediately on empty needle or haystack */
+	if (!n[0]) return (wchar_t *)h;
+	if (!h[0]) return 0;
+
+	/* Use faster algorithms for short needles */
+	h = wcschr(h, *n);
+	if (!h || !n[1]) return (wchar_t *)h;
+	if (!h[1]) return 0;
+
+	return twoway_wcsstr(h, n);
+}
diff --git a/fusl/src/string/wcstok.c b/fusl/src/string/wcstok.c
new file mode 100644
index 0000000..ecc8033
--- /dev/null
+++ b/fusl/src/string/wcstok.c
@@ -0,0 +1,12 @@
+#include <wchar.h>
+
+wchar_t *wcstok(wchar_t *restrict s, const wchar_t *restrict sep, wchar_t **restrict p)
+{
+	if (!s && !(s = *p)) return NULL;
+	s += wcsspn(s, sep);
+	if (!*s) return *p = 0;
+	*p = s + wcscspn(s, sep);
+	if (**p) *(*p)++ = 0;
+	else *p = 0;
+	return s;
+}
diff --git a/fusl/src/string/wcswcs.c b/fusl/src/string/wcswcs.c
new file mode 100644
index 0000000..9cfe4ac
--- /dev/null
+++ b/fusl/src/string/wcswcs.c
@@ -0,0 +1,6 @@
+#include <wchar.h>
+
+wchar_t *wcswcs(const wchar_t *haystack, const wchar_t *needle)
+{
+	return wcsstr(haystack, needle);
+}
diff --git a/fusl/src/string/wmemchr.c b/fusl/src/string/wmemchr.c
new file mode 100644
index 0000000..2bc2c27
--- /dev/null
+++ b/fusl/src/string/wmemchr.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+wchar_t *wmemchr(const wchar_t *s, wchar_t c, size_t n)
+{
+	for (; n && *s != c; n--, s++);
+	return n ? (wchar_t *)s : 0;
+}
diff --git a/fusl/src/string/wmemcmp.c b/fusl/src/string/wmemcmp.c
new file mode 100644
index 0000000..2a19326
--- /dev/null
+++ b/fusl/src/string/wmemcmp.c
@@ -0,0 +1,7 @@
+#include <wchar.h>
+
+int wmemcmp(const wchar_t *l, const wchar_t *r, size_t n)
+{
+	for (; n && *l==*r; n--, l++, r++);
+	return n ? *l-*r : 0;
+}
diff --git a/fusl/src/string/wmemcpy.c b/fusl/src/string/wmemcpy.c
new file mode 100644
index 0000000..52e6e6e
--- /dev/null
+++ b/fusl/src/string/wmemcpy.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+wchar_t *wmemcpy(wchar_t *restrict d, const wchar_t *restrict s, size_t n)
+{
+	wchar_t *a = d;
+	while (n--) *d++ = *s++;
+	return a;
+}
diff --git a/fusl/src/string/wmemmove.c b/fusl/src/string/wmemmove.c
new file mode 100644
index 0000000..e406f3d
--- /dev/null
+++ b/fusl/src/string/wmemmove.c
@@ -0,0 +1,11 @@
+#include <wchar.h>
+
+wchar_t *wmemmove(wchar_t *d, const wchar_t *s, size_t n)
+{
+	wchar_t *d0 = d;
+	if ((size_t)(d-s) < n)
+		while (n--) d[n] = s[n];
+	else
+		while (n--) *d++ = *s++;
+	return d0;
+}
diff --git a/fusl/src/string/wmemset.c b/fusl/src/string/wmemset.c
new file mode 100644
index 0000000..07a037a
--- /dev/null
+++ b/fusl/src/string/wmemset.c
@@ -0,0 +1,8 @@
+#include <wchar.h>
+
+wchar_t *wmemset(wchar_t *d, wchar_t c, size_t n)
+{
+	wchar_t *ret = d;
+	while (n--) *d++ = c;
+	return ret;
+}
diff --git a/fusl/src/string/x86_64/memcpy.s b/fusl/src/string/x86_64/memcpy.s
new file mode 100644
index 0000000..3d960ef
--- /dev/null
+++ b/fusl/src/string/x86_64/memcpy.s
@@ -0,0 +1,25 @@
+.global memcpy
+.global __memcpy_fwd
+.hidden __memcpy_fwd
+.type memcpy,@function
+memcpy:
+__memcpy_fwd:
+	mov %rdi,%rax
+	cmp $8,%rdx
+	jc 1f
+	test $7,%edi
+	jz 1f
+2:	movsb
+	dec %rdx
+	test $7,%edi
+	jnz 2b
+1:	mov %rdx,%rcx
+	shr $3,%rcx
+	rep
+	movsq
+	and $7,%edx
+	jz 1f
+2:	movsb
+	dec %edx
+	jnz 2b
+1:	ret
diff --git a/fusl/src/string/x86_64/memmove.s b/fusl/src/string/x86_64/memmove.s
new file mode 100644
index 0000000..172c025
--- /dev/null
+++ b/fusl/src/string/x86_64/memmove.s
@@ -0,0 +1,16 @@
+.global memmove
+.type memmove,@function
+memmove:
+	mov %rdi,%rax
+	sub %rsi,%rax
+	cmp %rdx,%rax
+.hidden __memcpy_fwd
+	jae __memcpy_fwd
+	mov %rdx,%rcx
+	lea -1(%rdi,%rdx),%rdi
+	lea -1(%rsi,%rdx),%rsi
+	std
+	rep movsb
+	cld
+	lea 1(%rdi),%rax
+	ret
diff --git a/fusl/src/string/x86_64/memset.s b/fusl/src/string/x86_64/memset.s
new file mode 100644
index 0000000..2d3f5e5
--- /dev/null
+++ b/fusl/src/string/x86_64/memset.s
@@ -0,0 +1,72 @@
+.global memset
+.type memset,@function
+memset:
+	movzbq %sil,%rax
+	mov $0x101010101010101,%r8
+	imul %r8,%rax
+
+	cmp $126,%rdx
+	ja 2f
+
+	test %edx,%edx
+	jz 1f
+
+	mov %sil,(%rdi)
+	mov %sil,-1(%rdi,%rdx)
+	cmp $2,%edx
+	jbe 1f
+
+	mov %ax,1(%rdi)
+	mov %ax,(-1-2)(%rdi,%rdx)
+	cmp $6,%edx
+	jbe 1f
+
+	mov %eax,(1+2)(%rdi)
+	mov %eax,(-1-2-4)(%rdi,%rdx)
+	cmp $14,%edx
+	jbe 1f
+
+	mov %rax,(1+2+4)(%rdi)
+	mov %rax,(-1-2-4-8)(%rdi,%rdx)
+	cmp $30,%edx
+	jbe 1f
+
+	mov %rax,(1+2+4+8)(%rdi)
+	mov %rax,(1+2+4+8+8)(%rdi)
+	mov %rax,(-1-2-4-8-16)(%rdi,%rdx)
+	mov %rax,(-1-2-4-8-8)(%rdi,%rdx)
+	cmp $62,%edx
+	jbe 1f
+
+	mov %rax,(1+2+4+8+16)(%rdi)
+	mov %rax,(1+2+4+8+16+8)(%rdi)
+	mov %rax,(1+2+4+8+16+16)(%rdi)
+	mov %rax,(1+2+4+8+16+24)(%rdi)
+	mov %rax,(-1-2-4-8-16-32)(%rdi,%rdx)
+	mov %rax,(-1-2-4-8-16-24)(%rdi,%rdx)
+	mov %rax,(-1-2-4-8-16-16)(%rdi,%rdx)
+	mov %rax,(-1-2-4-8-16-8)(%rdi,%rdx)
+
+1:	mov %rdi,%rax
+	ret
+
+2:	test $15,%edi
+	mov %rdi,%r8
+	mov %rax,-8(%rdi,%rdx)
+	mov %rdx,%rcx
+	jnz 2f
+
+1:	shr $3,%rcx
+	rep
+	stosq
+	mov %r8,%rax
+	ret
+
+2:	xor %edx,%edx
+	sub %edi,%edx
+	and $15,%edx
+	mov %rax,(%rdi)
+	mov %rax,8(%rdi)
+	sub %rdx,%rcx
+	add %rdx,%rdi
+	jmp 1b