| Add code support for ICU. | 
 |  | 
 | diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c | 
 | index b86a547..0f41df9 100644 | 
 | --- a/third_party/libxml/encoding.c | 
 | +++ b/third_party/libxml/encoding.c | 
 | @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; | 
 |  static int xmlCharEncodingAliasesNb = 0; | 
 |  static int xmlCharEncodingAliasesMax = 0; | 
 |   | 
 | -#ifdef LIBXML_ICONV_ENABLED | 
 | +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | 
 |  #if 0 | 
 |  #define DEBUG_ENCODING  /* Define this to get encoding traces */ | 
 |  #endif | 
 | @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) | 
 |                      NULL, 0, val, NULL, NULL, 0, 0, msg, val); | 
 |  } | 
 |   | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +static uconv_t*  | 
 | +openIcuConverter(const char* name, int toUnicode) | 
 | +{ | 
 | +  UErrorCode status = U_ZERO_ERROR; | 
 | +  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | 
 | +  if (conv == NULL) | 
 | +    return NULL; | 
 | + | 
 | +  conv->uconv = ucnv_open(name, &status); | 
 | +  if (U_FAILURE(status)) | 
 | +    goto error; | 
 | + | 
 | +  status = U_ZERO_ERROR; | 
 | +  if (toUnicode) { | 
 | +    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,  | 
 | +                        NULL, NULL, NULL, &status); | 
 | +  } | 
 | +  else { | 
 | +    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,  | 
 | +                        NULL, NULL, NULL, &status); | 
 | +  } | 
 | +  if (U_FAILURE(status)) | 
 | +    goto error; | 
 | + | 
 | +  status = U_ZERO_ERROR; | 
 | +  conv->utf8 = ucnv_open("UTF-8", &status); | 
 | +  if (U_SUCCESS(status)) | 
 | +    return conv; | 
 | + | 
 | +error: | 
 | +  if (conv->uconv)  | 
 | +    ucnv_close(conv->uconv); | 
 | +  xmlFree(conv); | 
 | +  return NULL; | 
 | +} | 
 | + | 
 | +static void | 
 | +closeIcuConverter(uconv_t *conv) | 
 | +{ | 
 | +  if (conv != NULL) { | 
 | +    ucnv_close(conv->uconv); | 
 | +    ucnv_close(conv->utf8); | 
 | +    xmlFree(conv); | 
 | +  } | 
 | +} | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 | + | 
 |  /************************************************************************ | 
 |   *									* | 
 |   *		Conversions To/From UTF8 encoding			* | 
 | @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, | 
 |  #ifdef LIBXML_ICONV_ENABLED | 
 |      handler->iconv_in = NULL; | 
 |      handler->iconv_out = NULL; | 
 | -#endif /* LIBXML_ICONV_ENABLED */ | 
 | +#endif | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    handler->uconv_in = NULL; | 
 | +    handler->uconv_out = NULL; | 
 | +#endif | 
 |   | 
 |      /* | 
 |       * registers and returns the handler. | 
 | @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { | 
 |      xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | 
 |      xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | 
 |  #endif /* LIBXML_OUTPUT_ENABLED */ | 
 | -#ifndef LIBXML_ICONV_ENABLED | 
 | +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | 
 |  #ifdef LIBXML_ISO8859X_ENABLED | 
 |      xmlRegisterCharEncodingHandlersISO8859x (); | 
 |  #endif | 
 | @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { | 
 |      xmlCharEncodingHandlerPtr enc; | 
 |      iconv_t icv_in, icv_out; | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    xmlCharEncodingHandlerPtr enc; | 
 | +    uconv_t *ucv_in, *ucv_out; | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |      char upper[100]; | 
 |      int i; | 
 |   | 
 | @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { | 
 |  		    "iconv : problems with filters for '%s'\n", name); | 
 |      } | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    /* check whether icu can handle this */ | 
 | +    ucv_in = openIcuConverter(name, 1); | 
 | +    ucv_out = openIcuConverter(name, 0); | 
 | +    if (ucv_in != NULL && ucv_out != NULL) { | 
 | +	    enc = (xmlCharEncodingHandlerPtr) | 
 | +	          xmlMalloc(sizeof(xmlCharEncodingHandler)); | 
 | +	    if (enc == NULL) { | 
 | +                closeIcuConverter(ucv_in); | 
 | +                closeIcuConverter(ucv_out); | 
 | +		return(NULL); | 
 | +	    } | 
 | +	    enc->name = xmlMemStrdup(name); | 
 | +	    enc->input = NULL; | 
 | +	    enc->output = NULL; | 
 | +	    enc->uconv_in = ucv_in; | 
 | +	    enc->uconv_out = ucv_out; | 
 | +#ifdef DEBUG_ENCODING | 
 | +            xmlGenericError(xmlGenericErrorContext, | 
 | +		    "Found ICU converter handler for encoding %s\n", name); | 
 | +#endif | 
 | +	    return enc; | 
 | +    } else if (ucv_in != NULL || ucv_out != NULL) { | 
 | +            closeIcuConverter(ucv_in); | 
 | +            closeIcuConverter(ucv_out); | 
 | +	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | 
 | +		    "ICU converter : problems with filters for '%s'\n", name); | 
 | +    } | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |   | 
 |  #ifdef DEBUG_ENCODING | 
 |      xmlGenericError(xmlGenericErrorContext, | 
 | @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, | 
 |   | 
 |  /************************************************************************ | 
 |   *									* | 
 | + *		ICU based generic conversion functions	         	* | 
 | + *									* | 
 | + ************************************************************************/ | 
 | + | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +/** | 
 | + * xmlUconvWrapper: | 
 | + * @cd: ICU uconverter data structure | 
 | + * @toUnicode : non-zero if toUnicode. 0 otherwise. | 
 | + * @out:  a pointer to an array of bytes to store the result | 
 | + * @outlen:  the length of @out | 
 | + * @in:  a pointer to an array of ISO Latin 1 chars | 
 | + * @inlen:  the length of @in | 
 | + * | 
 | + * Returns 0 if success, or  | 
 | + *     -1 by lack of space, or | 
 | + *     -2 if the transcoding fails (for *in is not valid utf8 string or | 
 | + *        the result of transformation can't fit into the encoding we want), or | 
 | + *     -3 if there the last byte can't form a single output char. | 
 | + *      | 
 | + * The value of @inlen after return is the number of octets consumed | 
 | + *     as the return value is positive, else unpredictable. | 
 | + * The value of @outlen after return is the number of ocetes consumed. | 
 | + */ | 
 | +static int | 
 | +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | 
 | +                const unsigned char *in, int *inlen) { | 
 | +    const char *ucv_in = (const char *) in; | 
 | +    char *ucv_out = (char *) out; | 
 | +    UErrorCode err = U_ZERO_ERROR; | 
 | + | 
 | +    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | 
 | +        if (outlen != NULL) *outlen = 0; | 
 | +        return(-1); | 
 | +    } | 
 | + | 
 | +    /*  | 
 | +     * TODO(jungshik) | 
 | +     * 1. is ucnv_convert(To|From)Algorithmic better? | 
 | +     * 2. had we better use an explicit pivot buffer? | 
 | +     * 3. error returned comes from 'fromUnicode' only even | 
 | +     *    when toUnicode is true ! | 
 | +     */ | 
 | +    if (toUnicode) { | 
 | +        /* encoding => UTF-16 => UTF-8 */ | 
 | +        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | 
 | +                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | 
 | +                       0, TRUE, &err); | 
 | +    } else { | 
 | +        /* UTF-8 => UTF-16 => encoding */ | 
 | +        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | 
 | +                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | 
 | +                       0, TRUE, &err); | 
 | +    } | 
 | +    *inlen = ucv_in - (const char*) in;  | 
 | +    *outlen = ucv_out - (char *) out; | 
 | +    if (U_SUCCESS(err)) | 
 | +        return 0; | 
 | +    if (err == U_BUFFER_OVERFLOW_ERROR) | 
 | +        return -1; | 
 | +    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | 
 | +        return -2; | 
 | +    /* if (err == U_TRUNCATED_CHAR_FOUND) */ | 
 | +    return -3; | 
 | +} | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 | + | 
 | +/************************************************************************ | 
 | + *									* | 
 |   *		The real API used by libxml for on-the-fly conversion	* | 
 |   *									* | 
 |   ************************************************************************/ | 
 | @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, | 
 |  	if (ret == -1) ret = -3; | 
 |      } | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    else if (handler->uconv_in != NULL) { | 
 | +	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | 
 | +	                      &written, in->content, &toconv); | 
 | +	xmlBufferShrink(in, toconv); | 
 | +	out->use += written; | 
 | +	out->content[out->use] = 0; | 
 | +	if (ret == -1) ret = -3; | 
 | +    } | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |  #ifdef DEBUG_ENCODING | 
 |      switch (ret) { | 
 |          case 0: | 
 | @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, | 
 |              ret = -3; | 
 |      } | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    else if (handler->uconv_in != NULL) { | 
 | +        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | 
 | +                              &written, in->content, &toconv); | 
 | +        xmlBufferShrink(in, toconv); | 
 | +        out->use += written; | 
 | +        out->content[out->use] = 0; | 
 | +        if (ret == -1) | 
 | +            ret = -3; | 
 | +    } | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |      switch (ret) { | 
 |          case 0: | 
 |  #ifdef DEBUG_ENCODING | 
 | @@ -2015,6 +2190,15 @@ retry: | 
 |  	    out->content[out->use] = 0; | 
 |  	} | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +	else if (handler->uconv_out != NULL) { | 
 | +	    ret = xmlUconvWrapper(handler->uconv_out, 0, | 
 | +                              &out->content[out->use], | 
 | + 				              &written, NULL, &toconv); | 
 | +	    out->use += written; | 
 | +	    out->content[out->use] = 0; | 
 | +	} | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |  #ifdef DEBUG_ENCODING | 
 |  	xmlGenericError(xmlGenericErrorContext, | 
 |  		"initialized encoder\n"); | 
 | @@ -2061,6 +2245,26 @@ retry: | 
 |  	} | 
 |      } | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    else if (handler->uconv_out != NULL) { | 
 | +	ret = xmlUconvWrapper(handler->uconv_out, 0, | 
 | +                              &out->content[out->use], | 
 | +	                      &written, in->content, &toconv); | 
 | +	xmlBufferShrink(in, toconv); | 
 | +	out->use += written; | 
 | +	writtentot += written; | 
 | +	out->content[out->use] = 0; | 
 | +	if (ret == -1) { | 
 | +	    if (written > 0) { | 
 | +		/* | 
 | +		 * Can be a limitation of iconv | 
 | +		 */ | 
 | +		goto retry; | 
 | +	    } | 
 | +	    ret = -3; | 
 | +	} | 
 | +    } | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |      else { | 
 |  	xmlEncodingErr(XML_I18N_NO_OUTPUT, | 
 |  		       "xmlCharEncOutFunc: no output function !\n", NULL); | 
 | @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { | 
 |  	xmlFree(handler); | 
 |      } | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | 
 | +	if (handler->name != NULL) | 
 | +	    xmlFree(handler->name); | 
 | +	handler->name = NULL; | 
 | +	if (handler->uconv_out != NULL) { | 
 | +	    closeIcuConverter(handler->uconv_out); | 
 | +	    handler->uconv_out = NULL; | 
 | +	} | 
 | +	if (handler->uconv_in != NULL) { | 
 | +	    closeIcuConverter(handler->uconv_in); | 
 | +	    handler->uconv_in = NULL; | 
 | +	} | 
 | +	xmlFree(handler); | 
 | +    } | 
 | +#endif | 
 |  #ifdef DEBUG_ENCODING | 
 |      if (ret) | 
 |          xmlGenericError(xmlGenericErrorContext, | 
 | @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | 
 |  		    cur += toconv; | 
 |  		} while (ret == -2); | 
 |  #endif | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +	    } else if (handler->uconv_out != NULL) { | 
 | +	        do { | 
 | +		    toconv = in->end - cur; | 
 | +		    written = 32000; | 
 | +		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | 
 | +	                      &written, cur, &toconv); | 
 | +		    if (ret < 0) { | 
 | +		        if (written > 0) | 
 | +			    ret = -2; | 
 | +			else | 
 | +			    return(-1); | 
 | +		    } | 
 | +		    unused += written; | 
 | +		    cur += toconv; | 
 | +		} while (ret == -2); | 
 |              } else { | 
 |  	        /* could not find a converter */ | 
 |  	        return(-1); | 
 | @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | 
 |      } | 
 |      return(in->consumed + (in->cur - in->base)); | 
 |  } | 
 | +#endif | 
 |   | 
 | -#ifndef LIBXML_ICONV_ENABLED | 
 | +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | 
 |  #ifdef LIBXML_ISO8859X_ENABLED | 
 |   | 
 |  /** | 
 | diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/include/libxml/encoding.h | 
 | index c74b25f..b5f8b48 100644 | 
 | --- a/third_party/libxml/include/libxml/encoding.h | 
 | +++ b/third_party/libxml/include/libxml/encoding.h | 
 | @@ -26,6 +26,24 @@ | 
 |   | 
 |  #ifdef LIBXML_ICONV_ENABLED | 
 |  #include <iconv.h> | 
 | +#else  | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +#include <unicode/ucnv.h> | 
 | +#if 0 | 
 | +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | 
 | + * to prevent unwanted ICU symbols being exposed to users of libxml2. | 
 | + * One particular case is Qt4 conflicting on UChar32. | 
 | + */ | 
 | +#include <stdint.h> | 
 | +struct UConverter; | 
 | +typedef struct UConverter UConverter; | 
 | +#ifdef _MSC_VER | 
 | +typedef wchar_t UChar; | 
 | +#else | 
 | +typedef uint16_t UChar; | 
 | +#endif | 
 | +#endif | 
 | +#endif | 
 |  #endif | 
 |  #ifdef __cplusplus | 
 |  extern "C" { | 
 | @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, | 
 |   * Block defining the handlers for non UTF-8 encodings. | 
 |   * If iconv is supported, there are two extra fields. | 
 |   */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +struct _uconv_t { | 
 | +  UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | 
 | +  UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | 
 | +}; | 
 | +typedef struct _uconv_t uconv_t; | 
 | +#endif | 
 |   | 
 |  typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | 
 |  typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | 
 | @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { | 
 |      iconv_t                    iconv_in; | 
 |      iconv_t                    iconv_out; | 
 |  #endif /* LIBXML_ICONV_ENABLED */ | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +    uconv_t                    *uconv_in; | 
 | +    uconv_t                    *uconv_out; | 
 | +#endif /* LIBXML_ICU_ENABLED */ | 
 |  }; | 
 |   | 
 |  #ifdef __cplusplus | 
 | diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/include/libxml/parser.h | 
 | index dd79c42..3580b63 100644 | 
 | --- a/third_party/libxml/include/libxml/parser.h | 
 | +++ b/third_party/libxml/include/libxml/parser.h | 
 | @@ -1222,6 +1222,7 @@ typedef enum { | 
 |      XML_WITH_DEBUG_MEM = 29, | 
 |      XML_WITH_DEBUG_RUN = 30, | 
 |      XML_WITH_ZLIB = 31, | 
 | +    XML_WITH_ICU = 32, | 
 |      XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | 
 |  } xmlFeature; | 
 |   | 
 | diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/libxml/include/libxml/xmlversion.h.in | 
 | index 4739f3a..de310ab 100644 | 
 | --- a/third_party/libxml/include/libxml/xmlversion.h.in | 
 | +++ b/third_party/libxml/include/libxml/xmlversion.h.in | 
 | @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); | 
 |  #endif | 
 |   | 
 |  /** | 
 | + * LIBXML_ICU_ENABLED: | 
 | + * | 
 | + * Whether icu support is available | 
 | + */ | 
 | +#if @WITH_ICU@ | 
 | +#define LIBXML_ICU_ENABLED | 
 | +#endif | 
 | + | 
 | +/** | 
 |   * LIBXML_ISO8859X_ENABLED: | 
 |   * | 
 |   * Whether ISO-8859-* support is made available in case iconv is not | 
 | diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c | 
 | index 85e7599..3ba2a06 100644 | 
 | --- a/third_party/libxml/parser.c | 
 | +++ b/third_party/libxml/parser.c | 
 | @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) | 
 |  #else | 
 |              return(0); | 
 |  #endif | 
 | +        case XML_WITH_ICU: | 
 | +#ifdef LIBXML_ICU_ENABLED | 
 | +            return(1); | 
 | +#else | 
 | +            return(0); | 
 | +#endif | 
 |          default: | 
 |  	    break; | 
 |       } |