utf8.c - mozsearch

mozilla-central/security/nss/lib/base/utf8.c (file symbol)

Enable keyboard shortcuts

Source code

File a bug in NSS :: Libraries

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*

 * utf8.c

 * This file contains some additional utility routines required for

 * handling UTF8 strings.

*/

#ifndef BASE_H

#include "base.h"

#endif /* BASE_H */

#include "plstr.h"

/*

 * NOTES:

 * There's an "is hex string" function in pki1/atav.c.  If we need

 * it in more places, pull that one out.

*/

/*

 * nssUTF8_CaseIgnoreMatch

 * Returns true if the two UTF8-encoded strings pointed to by the

 * two specified NSSUTF8 pointers differ only in typcase.

 * The error may be one of the following values:

 *  NSS_ERROR_INVALID_POINTER

 * Return value:

 *  PR_TRUE if the strings match, ignoring case

 *  PR_FALSE if they don't

 *  PR_FALSE upon error

*/

NSS_IMPLEMENT PRBool

nssUTF8_CaseIgnoreMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)

#ifdef NSSDEBUG

    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        if ((PRStatus *)NULL != statusOpt) {

            *statusOpt = PR_FAILURE;

        return PR_FALSE;

#endif /* NSSDEBUG */

    if ((PRStatus *)NULL != statusOpt) {

        *statusOpt = PR_SUCCESS;

/*

     * XXX fgmr

     * This is, like, so wrong!

*/

    if (0 == PL_strcasecmp((const char *)a, (const char *)b)) {

        return PR_TRUE;

    } else {

        return PR_FALSE;

/*

 * nssUTF8_PrintableMatch

 * Returns true if the two Printable strings pointed to by the

 * two specified NSSUTF8 pointers match when compared with the

 * rules for Printable String (leading and trailing spaces are

 * disregarded, extents of whitespace match irregardless of length,

 * and case is not significant), then PR_TRUE will be returned.

 * Otherwise, PR_FALSE will be returned.  Upon failure, PR_FALSE

 * will be returned.  If the optional statusOpt argument is not

 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that

 * location.

 * The error may be one of the following values:

 *  NSS_ERROR_INVALID_POINTER

 * Return value:

 *  PR_TRUE if the strings match, ignoring case

 *  PR_FALSE if they don't

 *  PR_FALSE upon error

*/

NSS_IMPLEMENT PRBool

nssUTF8_PrintableMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)

    PRUint8 *c;

    PRUint8 *d;

#ifdef NSSDEBUG

    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        if ((PRStatus *)NULL != statusOpt) {

            *statusOpt = PR_FAILURE;

        return PR_FALSE;

#endif /* NSSDEBUG */

    if ((PRStatus *)NULL != statusOpt) {

        *statusOpt = PR_SUCCESS;

    c = (PRUint8 *)a;

    d = (PRUint8 *)b;

    while (' ' == *c) {

        c++;

    while (' ' == *d) {

        d++;

    while (('\0' != *c) && ('\0' != *d)) {

        PRUint8 e, f;

        e = *c;

        f = *d;

        if (('a' <= e) && (e <= 'z')) {

            e -= ('a' - 'A');

        if (('a' <= f) && (f <= 'z')) {

            f -= ('a' - 'A');

        if (e != f) {

            return PR_FALSE;

        c++;

        d++;

        if (' ' == *c) {

            while (' ' == *c) {

                c++;

            c--;

        if (' ' == *d) {

            while (' ' == *d) {

                d++;

            d--;

    while (' ' == *c) {

        c++;

    while (' ' == *d) {

        d++;

    if (*c == *d) {

        /* And both '\0', btw */

        return PR_TRUE;

    } else {

        return PR_FALSE;

/*

 * nssUTF8_Duplicate

 * This routine duplicates the UTF8-encoded string pointed to by the

 * specified NSSUTF8 pointer.  If the optional arenaOpt argument is

 * not null, the memory required will be obtained from that arena;

 * otherwise, the memory required will be obtained from the heap.

 * A pointer to the new string will be returned.  In case of error,

 * an error will be placed on the error stack and NULL will be

 * returned.

 * The error may be one of the following values:

 *  NSS_ERROR_INVALID_POINTER

 *  NSS_ERROR_INVALID_ARENA

 *  NSS_ERROR_NO_MEMORY

*/

NSS_IMPLEMENT NSSUTF8 *

nssUTF8_Duplicate(const NSSUTF8 *s, NSSArena *arenaOpt)

    NSSUTF8 *rv;

    PRUint32 len;

#ifdef NSSDEBUG

    if ((const NSSUTF8 *)NULL == s) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        return (NSSUTF8 *)NULL;

    if ((NSSArena *)NULL != arenaOpt) {

        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {

            return (NSSUTF8 *)NULL;

#endif /* NSSDEBUG */

    len = PL_strlen((const char *)s);

#ifdef PEDANTIC

    if ('\0' != ((const char *)s)[len]) {

        /* must have wrapped, e.g., too big for PRUint32 */

        nss_SetError(NSS_ERROR_NO_MEMORY);

        return (NSSUTF8 *)NULL;

#endif     /* PEDANTIC */

    len++; /* zero termination */

    rv = nss_ZAlloc(arenaOpt, len);

    if ((void *)NULL == rv) {

        return (NSSUTF8 *)NULL;

    (void)nsslibc_memcpy(rv, s, len);

    return rv;

/*

 * nssUTF8_Size

 * This routine returns the length in bytes (including the terminating

 * null) of the UTF8-encoded string pointed to by the specified

 * NSSUTF8 pointer.  Zero is returned on error.

 * The error may be one of the following values:

 *  NSS_ERROR_INVALID_POINTER

 *  NSS_ERROR_VALUE_TOO_LARGE

 * Return value:

 *  0 on error

 *  nonzero length of the string.

*/

NSS_IMPLEMENT PRUint32

nssUTF8_Size(const NSSUTF8 *s, PRStatus *statusOpt)

    PRUint32 sv;

#ifdef NSSDEBUG

    if ((const NSSUTF8 *)NULL == s) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        if ((PRStatus *)NULL != statusOpt) {

            *statusOpt = PR_FAILURE;

        return 0;

#endif /* NSSDEBUG */

    sv = PL_strlen((const char *)s) + 1;

#ifdef PEDANTIC

    if ('\0' != ((const char *)s)[sv - 1]) {

        /* wrapped */

        nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);

        if ((PRStatus *)NULL != statusOpt) {

            *statusOpt = PR_FAILURE;

        return 0;

#endif /* PEDANTIC */

    if ((PRStatus *)NULL != statusOpt) {

        *statusOpt = PR_SUCCESS;

    return sv;

/*

 * nssUTF8_Length

 * This routine returns the length in characters (not including the

 * terminating null) of the UTF8-encoded string pointed to by the

 * specified NSSUTF8 pointer.

 * The error may be one of the following values:

 *  NSS_ERROR_INVALID_POINTER

 *  NSS_ERROR_VALUE_TOO_LARGE

 *  NSS_ERROR_INVALID_STRING

 * Return value:

 *  length of the string (which may be zero)

 *  0 on error

*/

NSS_IMPLEMENT PRUint32

nssUTF8_Length(const NSSUTF8 *s, PRStatus *statusOpt)

    PRUint32 l = 0;

    const PRUint8 *c = (const PRUint8 *)s;

#ifdef NSSDEBUG

    if ((const NSSUTF8 *)NULL == s) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        goto loser;

#endif /* NSSDEBUG */

/*

     * From RFC 3629:

     * UTF8-octets = *( UTF8-char )

     * UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4

     * UTF8-1      = %x00-7F

     * UTF8-2      = %xC2-DF UTF8-tail

     * UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /

     *               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )

     * UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /

     *               %xF4 %x80-8F 2( UTF8-tail )

     * UTF8-tail   = %x80-BF

*/

    while (0 != *c) {

        PRUint32 incr;

        if (*c < 0x80) {

            incr = 1;

        } else if (*c < 0xC2) {

            nss_SetError(NSS_ERROR_INVALID_STRING);

            goto loser;

        } else if (*c < 0xE0) {

            incr = 2;

        } else if (*c == 0xE0) {

            if (c[1] < 0xA0) {

                nss_SetError(NSS_ERROR_INVALID_STRING);

                goto loser;

            incr = 3;

        } else if (*c < 0xF0) {

            if (*c == 0xED && c[1] > 0x9F) {

                nss_SetError(NSS_ERROR_INVALID_STRING);

                goto loser;

            incr = 3;

        } else if (*c == 0xF0) {

            if (c[1] < 0x90) {

                nss_SetError(NSS_ERROR_INVALID_STRING);

                goto loser;

            incr = 4;

        } else if (*c < 0xF4) {

            incr = 4;

        } else if (*c == 0xF4) {

            if (c[1] > 0x8F) {

                nss_SetError(NSS_ERROR_INVALID_STRING);

                goto loser;

            incr = 4;

        } else {

            nss_SetError(NSS_ERROR_INVALID_STRING);

            goto loser;

        l += incr;

#ifdef PEDANTIC

        if (l < incr) {

            /* Wrapped-- too big */

            nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);

            goto loser;

#endif /* PEDANTIC */

            const PRUint8 *d;

            for (d = &c[1]; d < &c[incr]; d++) {

                if ((*d & 0xC0) != 0x80) {

                    nss_SetError(NSS_ERROR_INVALID_STRING);

                    goto loser;

        c += incr;

    if ((PRStatus *)NULL != statusOpt) {

        *statusOpt = PR_SUCCESS;

    return l;

loser:

    if ((PRStatus *)NULL != statusOpt) {

        *statusOpt = PR_FAILURE;

    return 0;

/*

 * nssUTF8_Create

 * This routine creates a UTF8 string from a string in some other

 * format.  Some types of string may include embedded null characters,

 * so for them the length parameter must be used.  For string types

 * that are null-terminated, the length parameter is optional; if it

 * is zero, it will be ignored.  If the optional arena argument is

 * non-null, the memory used for the new string will be obtained from

 * that arena, otherwise it will be obtained from the heap.  This

 * routine may return NULL upon error, in which case it will have

 * placed an error on the error stack.

 * The error may be one of the following:

 *  NSS_ERROR_INVALID_POINTER

 *  NSS_ERROR_NO_MEMORY

 *  NSS_ERROR_UNSUPPORTED_TYPE

 * Return value:

 *  NULL upon error

 *  A non-null pointer to a new UTF8 string otherwise

*/

extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */

NSS_IMPLEMENT NSSUTF8 *

nssUTF8_Create(NSSArena *arenaOpt, nssStringType type, const void *inputString,

               PRUint32 size /* in bytes, not characters */

    NSSUTF8 *rv = NULL;

#ifdef NSSDEBUG

    if ((NSSArena *)NULL != arenaOpt) {

        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {

            return (NSSUTF8 *)NULL;

    if ((const void *)NULL == inputString) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        return (NSSUTF8 *)NULL;

#endif /* NSSDEBUG */

    switch (type) {

        case nssStringType_DirectoryString:

            /* This is a composite type requiring BER */

            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);

            break;

        case nssStringType_TeletexString:

/*

             * draft-ietf-pkix-ipki-part1-11 says in part:

             * In addition, many legacy implementations support names encoded

             * in the ISO 8859-1 character set (Latin1String) but tag them as

             * TeletexString.  The Latin1String includes characters used in

             * Western European countries which are not part of the

             * TeletexString charcter set.  Implementations that process

             * TeletexString SHOULD be prepared to handle the entire ISO

             * 8859-1 character set.[ISO 8859-1].

*/

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_PrintableString:

/*

             * PrintableString consists of A-Za-z0-9 ,()+,-./:=?

             * This is a subset of ASCII, which is a subset of UTF8.

             * So we can just duplicate the string over.

*/

            if (0 == size) {

                rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);

            } else {

                rv = nss_ZAlloc(arenaOpt, size + 1);

                if ((NSSUTF8 *)NULL == rv) {

                    return (NSSUTF8 *)NULL;

                (void)nsslibc_memcpy(rv, inputString, size);

            break;

        case nssStringType_UniversalString:

            /* 4-byte unicode */

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_BMPString:

            /* Base Multilingual Plane of Unicode */

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_UTF8String:

            if (0 == size) {

                rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);

            } else {

                rv = nss_ZAlloc(arenaOpt, size + 1);

                if ((NSSUTF8 *)NULL == rv) {

                    return (NSSUTF8 *)NULL;

                (void)nsslibc_memcpy(rv, inputString, size);

            break;

        case nssStringType_PHGString:

/*

             * PHGString is an IA5String (with case-insensitive comparisons).

             * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has

             * currency symbol.

*/

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_GeneralString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        default:

            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);

            break;

    return rv;

NSS_IMPLEMENT NSSItem *

nssUTF8_GetEncoding(NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type,

                    NSSUTF8 *string)

    NSSItem *rv = (NSSItem *)NULL;

    PRStatus status = PR_SUCCESS;

#ifdef NSSDEBUG

    if ((NSSArena *)NULL != arenaOpt) {

        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {

            return (NSSItem *)NULL;

    if ((NSSUTF8 *)NULL == string) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        return (NSSItem *)NULL;

#endif /* NSSDEBUG */

    switch (type) {

        case nssStringType_DirectoryString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_TeletexString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_PrintableString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_UniversalString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_BMPString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        case nssStringType_UTF8String: {

            NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);

            if ((NSSUTF8 *)NULL == dup) {

                return (NSSItem *)NULL;

            if ((NSSItem *)NULL == rvOpt) {

                rv = nss_ZNEW(arenaOpt, NSSItem);

                if ((NSSItem *)NULL == rv) {

                    (void)nss_ZFreeIf(dup);

                    return (NSSItem *)NULL;

            } else {

                rv = rvOpt;

            rv->data = dup;

            dup = (NSSUTF8 *)NULL;

            rv->size = nssUTF8_Size(rv->data, &status);

            if ((0 == rv->size) && (PR_SUCCESS != status)) {

                if ((NSSItem *)NULL == rvOpt) {

                    (void)nss_ZFreeIf(rv);

                return (NSSItem *)NULL;

        } break;

        case nssStringType_PHGString:

            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */

            break;

        default:

            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);

            break;

    return rv;

/*

 * nssUTF8_CopyIntoFixedBuffer

 * This will copy a UTF8 string into a fixed-length buffer, making

 * sure that the all characters are valid.  Any remaining space will

 * be padded with the specified ASCII character, typically either

 * null or space.

 * Blah, blah, blah.

*/

NSS_IMPLEMENT PRStatus

nssUTF8_CopyIntoFixedBuffer(NSSUTF8 *string, char *buffer, PRUint32 bufferSize,

                            char pad)

    PRUint32 stringSize = 0;

#ifdef NSSDEBUG

    if ((char *)NULL == buffer) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        return PR_FALSE;

    if (0 == bufferSize) {

        nss_SetError(NSS_ERROR_INVALID_ARGUMENT);

        return PR_FALSE;

    if ((pad & 0x80) != 0x00) {

        nss_SetError(NSS_ERROR_INVALID_ARGUMENT);

        return PR_FALSE;

#endif /* NSSDEBUG */

    if ((NSSUTF8 *)NULL == string) {

        string = (NSSUTF8 *)"";

    stringSize = nssUTF8_Size(string, (PRStatus *)NULL);

    stringSize--; /* don't count the trailing null */

    if (stringSize > bufferSize) {

        PRUint32 bs = bufferSize;

        (void)nsslibc_memcpy(buffer, string, bufferSize);

        if ((((buffer[bs - 1] & 0x80) == 0x00)) ||

            ((bs > 1) && ((buffer[bs - 2] & 0xE0) == 0xC0)) ||

            ((bs > 2) && ((buffer[bs - 3] & 0xF0) == 0xE0)) ||

            ((bs > 3) && ((buffer[bs - 4] & 0xF8) == 0xF0)) ||

            ((bs > 4) && ((buffer[bs - 5] & 0xFC) == 0xF8)) ||

            ((bs > 5) && ((buffer[bs - 6] & 0xFE) == 0xFC))) {

            /* It fit exactly */

            return PR_SUCCESS;

        /* Too long.  We have to trim the last character */

        for (/*bs*/; bs != 0; bs--) {

            if ((buffer[bs - 1] & 0xC0) != 0x80) {

                buffer[bs - 1] = pad;

                break;

            } else {

                buffer[bs - 1] = pad;

    } else {

        (void)nsslibc_memset(buffer, pad, bufferSize);

        (void)nsslibc_memcpy(buffer, string, stringSize);

    return PR_SUCCESS;

/*

 * nssUTF8_Equal

*/

NSS_IMPLEMENT PRBool

nssUTF8_Equal(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)

    PRUint32 la, lb;

#ifdef NSSDEBUG

    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {

        nss_SetError(NSS_ERROR_INVALID_POINTER);

        if ((PRStatus *)NULL != statusOpt) {

            *statusOpt = PR_FAILURE;

        return PR_FALSE;

#endif /* NSSDEBUG */

    la = nssUTF8_Size(a, statusOpt);

    if (0 == la) {

        return PR_FALSE;

    lb = nssUTF8_Size(b, statusOpt);

    if (0 == lb) {

        return PR_FALSE;

    if (la != lb) {

        return PR_FALSE;

    return nsslibc_memequal(a, b, la, statusOpt);