| /* |
| * An implementation of what I call the "Sea of Stars" algorithm for |
| * POSIX fnmatch(). The basic idea is that we factor the pattern into |
| * a head component (which we match first and can reject without ever |
| * measuring the length of the string), an optional tail component |
| * (which only exists if the pattern contains at least one star), and |
| * an optional "sea of stars", a set of star-separated components |
| * between the head and tail. After the head and tail matches have |
| * been removed from the input string, the components in the "sea of |
| * stars" are matched sequentially by searching for their first |
| * occurrence past the end of the previous match. |
| * |
| * - Rich Felker, April 2012 |
| */ |
| |
| #include <string.h> |
| #include <fnmatch.h> |
| #include <stdlib.h> |
| #include <wchar.h> |
| #include <wctype.h> |
| #include "locale_impl.h" |
| |
| #define END 0 |
| #define UNMATCHABLE -2 |
| #define BRACKET -3 |
| #define QUESTION -4 |
| #define STAR -5 |
| |
| static int str_next(const char* str, size_t n, size_t* step) { |
| if (!n) { |
| *step = 0; |
| return 0; |
| } |
| if (str[0] >= 128U) { |
| wchar_t wc; |
| int k = mbtowc(&wc, str, n); |
| if (k < 0) { |
| *step = 1; |
| return -1; |
| } |
| *step = k; |
| return wc; |
| } |
| *step = 1; |
| return str[0]; |
| } |
| |
| static int pat_next(const char* pat, size_t m, size_t* step, int flags) { |
| int esc = 0; |
| if (!m || !*pat) { |
| *step = 0; |
| return END; |
| } |
| *step = 1; |
| if (pat[0] == '\\' && pat[1] && !(flags & FNM_NOESCAPE)) { |
| *step = 2; |
| pat++; |
| esc = 1; |
| goto escaped; |
| } |
| if (pat[0] == '[') { |
| size_t k = 1; |
| if (k < m) |
| if (pat[k] == '^' || pat[k] == '!') |
| k++; |
| if (k < m) |
| if (pat[k] == ']') |
| k++; |
| for (; k < m && pat[k] && pat[k] != ']'; k++) { |
| if (k + 1 < m && pat[k + 1] && pat[k] == '[' && |
| (pat[k + 1] == ':' || pat[k + 1] == '.' || pat[k + 1] == '=')) { |
| int z = pat[k + 1]; |
| k += 2; |
| if (k < m && pat[k]) |
| k++; |
| while (k < m && pat[k] && (pat[k - 1] != z || pat[k] != ']')) |
| k++; |
| if (k == m || !pat[k]) |
| break; |
| } |
| } |
| if (k == m || !pat[k]) { |
| *step = 1; |
| return '['; |
| } |
| *step = k + 1; |
| return BRACKET; |
| } |
| if (pat[0] == '*') |
| return STAR; |
| if (pat[0] == '?') |
| return QUESTION; |
| escaped: |
| if (pat[0] >= 128U) { |
| wchar_t wc; |
| int k = mbtowc(&wc, pat, m); |
| if (k < 0) { |
| *step = 0; |
| return UNMATCHABLE; |
| } |
| *step = k + esc; |
| return wc; |
| } |
| return pat[0]; |
| } |
| |
| static int casefold(int k) { |
| int c = towupper(k); |
| return c == k ? towlower(k) : c; |
| } |
| |
| static int match_bracket(const char* p, int k, int kfold) { |
| wchar_t wc; |
| int inv = 0; |
| p++; |
| if (*p == '^' || *p == '!') { |
| inv = 1; |
| p++; |
| } |
| if (*p == ']') { |
| if (k == ']') |
| return !inv; |
| p++; |
| } else if (*p == '-') { |
| if (k == '-') |
| return !inv; |
| p++; |
| } |
| wc = p[-1]; |
| for (; *p != ']'; p++) { |
| if (p[0] == '-' && p[1] != ']') { |
| wchar_t wc2; |
| int l = mbtowc(&wc2, p + 1, 4); |
| if (l < 0) |
| return 0; |
| if (wc <= wc2) |
| if ((unsigned)k - wc <= wc2 - wc || (unsigned)kfold - wc <= wc2 - wc) |
| return !inv; |
| p += l - 1; |
| continue; |
| } |
| if (p[0] == '[' && (p[1] == ':' || p[1] == '.' || p[1] == '=')) { |
| const char* p0 = p + 2; |
| int z = p[1]; |
| p += 3; |
| while (p[-1] != z || p[0] != ']') |
| p++; |
| if (z == ':' && p - 1 - p0 < 16) { |
| char buf[16]; |
| memcpy(buf, p0, p - 1 - p0); |
| buf[p - 1 - p0] = 0; |
| if (iswctype(k, wctype(buf)) || iswctype(kfold, wctype(buf))) |
| return !inv; |
| } |
| continue; |
| } |
| if (*p < 128U) { |
| wc = (unsigned char)*p; |
| } else { |
| int l = mbtowc(&wc, p, 4); |
| if (l < 0) |
| return 0; |
| p += l - 1; |
| } |
| if (wc == k || wc == kfold) |
| return !inv; |
| } |
| return inv; |
| } |
| |
| static int fnmatch_internal(const char* pat, |
| size_t m, |
| const char* str, |
| size_t n, |
| int flags) { |
| const char *p, *ptail, *endpat; |
| const char *s, *stail, *endstr; |
| size_t pinc, sinc, tailcnt = 0; |
| int c, k, kfold; |
| |
| if (flags & FNM_PERIOD) { |
| if (*str == '.' && *pat != '.') |
| return FNM_NOMATCH; |
| } |
| for (;;) { |
| switch ((c = pat_next(pat, m, &pinc, flags))) { |
| case UNMATCHABLE: |
| return FNM_NOMATCH; |
| case STAR: |
| pat++; |
| m--; |
| break; |
| default: |
| k = str_next(str, n, &sinc); |
| if (k <= 0) |
| return (c == END) ? 0 : FNM_NOMATCH; |
| str += sinc; |
| n -= sinc; |
| kfold = flags & FNM_CASEFOLD ? casefold(k) : k; |
| if (c == BRACKET) { |
| if (!match_bracket(pat, k, kfold)) |
| return FNM_NOMATCH; |
| } else if (c != QUESTION && k != c && kfold != c) { |
| return FNM_NOMATCH; |
| } |
| pat += pinc; |
| m -= pinc; |
| continue; |
| } |
| break; |
| } |
| |
| /* Compute real pat length if it was initially unknown/-1 */ |
| m = strnlen(pat, m); |
| endpat = pat + m; |
| |
| /* Find the last * in pat and count chars needed after it */ |
| for (p = ptail = pat; p < endpat; p += pinc) { |
| switch (pat_next(p, endpat - p, &pinc, flags)) { |
| case UNMATCHABLE: |
| return FNM_NOMATCH; |
| case STAR: |
| tailcnt = 0; |
| ptail = p + 1; |
| break; |
| default: |
| tailcnt++; |
| break; |
| } |
| } |
| |
| /* Past this point we need not check for UNMATCHABLE in pat, |
| * because all of pat has already been parsed once. */ |
| |
| /* Compute real str length if it was initially unknown/-1 */ |
| n = strnlen(str, n); |
| endstr = str + n; |
| if (n < tailcnt) |
| return FNM_NOMATCH; |
| |
| /* Find the final tailcnt chars of str, accounting for UTF-8. |
| * On illegal sequences we may get it wrong, but in that case |
| * we necessarily have a matching failure anyway. */ |
| for (s = endstr; s > str && tailcnt; tailcnt--) { |
| if (s[-1] < 128U || MB_CUR_MAX == 1) |
| s--; |
| else |
| while ((unsigned char)*--s - 0x80U < 0x40 && s > str) |
| ; |
| } |
| if (tailcnt) |
| return FNM_NOMATCH; |
| stail = s; |
| |
| /* Check that the pat and str tails match */ |
| p = ptail; |
| for (;;) { |
| c = pat_next(p, endpat - p, &pinc, flags); |
| p += pinc; |
| if ((k = str_next(s, endstr - s, &sinc)) <= 0) { |
| if (c != END) |
| return FNM_NOMATCH; |
| break; |
| } |
| s += sinc; |
| kfold = flags & FNM_CASEFOLD ? casefold(k) : k; |
| if (c == BRACKET) { |
| if (!match_bracket(p - pinc, k, kfold)) |
| return FNM_NOMATCH; |
| } else if (c != QUESTION && k != c && kfold != c) { |
| return FNM_NOMATCH; |
| } |
| } |
| |
| /* We're all done with the tails now, so throw them out */ |
| endstr = stail; |
| endpat = ptail; |
| |
| /* Match pattern components until there are none left */ |
| while (pat < endpat) { |
| p = pat; |
| s = str; |
| for (;;) { |
| c = pat_next(p, endpat - p, &pinc, flags); |
| p += pinc; |
| /* Encountering * completes/commits a component */ |
| if (c == STAR) { |
| pat = p; |
| str = s; |
| break; |
| } |
| k = str_next(s, endstr - s, &sinc); |
| if (!k) |
| return FNM_NOMATCH; |
| kfold = flags & FNM_CASEFOLD ? casefold(k) : k; |
| if (c == BRACKET) { |
| if (!match_bracket(p - pinc, k, kfold)) |
| break; |
| } else if (c != QUESTION && k != c && kfold != c) { |
| break; |
| } |
| s += sinc; |
| } |
| if (c == STAR) |
| continue; |
| /* If we failed, advance str, by 1 char if it's a valid |
| * char, or past all invalid bytes otherwise. */ |
| k = str_next(str, endstr - str, &sinc); |
| if (k > 0) |
| str += sinc; |
| else |
| for (str++; str_next(str, endstr - str, &sinc) < 0; str++) |
| ; |
| } |
| |
| return 0; |
| } |
| |
| int fnmatch(const char* pat, const char* str, int flags) { |
| const char *s, *p; |
| size_t inc; |
| int c; |
| if (flags & FNM_PATHNAME) |
| for (;;) { |
| for (s = str; *s && *s != '/'; s++) |
| ; |
| for (p = pat; (c = pat_next(p, -1, &inc, flags)) != END && c != '/'; |
| p += inc) |
| ; |
| if (c != *s && (!*s || !(flags & FNM_LEADING_DIR))) |
| return FNM_NOMATCH; |
| if (fnmatch_internal(pat, p - pat, str, s - str, flags)) |
| return FNM_NOMATCH; |
| if (!c) |
| return 0; |
| str = s + 1; |
| pat = p + inc; |
| } |
| else if (flags & FNM_LEADING_DIR) { |
| for (s = str; *s; s++) { |
| if (*s != '/') |
| continue; |
| if (!fnmatch_internal(pat, -1, str, s - str, flags)) |
| return 0; |
| } |
| } |
| return fnmatch_internal(pat, -1, str, -1, flags); |
| } |