Normalize inputs to String.prototype.localeCompare (#97)
NFC-normalize the inputs, otherwise strings like "Å" and "A\u030A" (latin A with combining diacritical mark) don't compare equal.
This commit is contained in:
parent
a721bda7b5
commit
b56a82d19f
2 changed files with 74 additions and 36 deletions
108
quickjs.c
108
quickjs.c
|
@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods;
|
|||
static const JSClassExoticMethods js_module_ns_exotic_methods;
|
||||
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;
|
||||
|
||||
static int compare_u32(uint32_t a, uint32_t b)
|
||||
{
|
||||
return -(a < b) + (b < a); // -1, 0 or 1
|
||||
}
|
||||
|
||||
static JSValue js_int32(int32_t v)
|
||||
{
|
||||
return JS_MKVAL(JS_TAG_INT, v);
|
||||
|
@ -3930,14 +3935,8 @@ static int js_string_compare(JSContext *ctx,
|
|||
int res, len;
|
||||
len = min_int(p1->len, p2->len);
|
||||
res = js_string_memcmp(p1, p2, len);
|
||||
if (res == 0) {
|
||||
if (p1->len == p2->len)
|
||||
res = 0;
|
||||
else if (p1->len < p2->len)
|
||||
res = -1;
|
||||
else
|
||||
res = 1;
|
||||
}
|
||||
if (res == 0)
|
||||
res = compare_u32(p1->len, p2->len);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -39138,24 +39137,80 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
|
|||
return !lre_is_cased(c1);
|
||||
}
|
||||
|
||||
static int to_utf32_buf(JSContext *ctx, JSString *p, uint32_t **pbuf)
|
||||
{
|
||||
uint32_t *b;
|
||||
int i, j, n;
|
||||
|
||||
j = -1;
|
||||
n = p->len;
|
||||
b = js_malloc(ctx, max_int(1, n) * sizeof(*b));
|
||||
if (b)
|
||||
for (i = j = 0; i < n;)
|
||||
b[j++] = string_getc(p, &i);
|
||||
*pbuf = b;
|
||||
return j;
|
||||
}
|
||||
|
||||
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
|
||||
int argc, JSValueConst *argv)
|
||||
{
|
||||
JSValue a, b;
|
||||
int cmp;
|
||||
int i, n, an, bn, cmp;
|
||||
uint32_t *as, *bs, *ts;
|
||||
JSValue a, b, ret;
|
||||
|
||||
ret = JS_EXCEPTION;
|
||||
as = NULL;
|
||||
bs = NULL;
|
||||
|
||||
a = JS_ToStringCheckObject(ctx, this_val);
|
||||
if (JS_IsException(a))
|
||||
return JS_EXCEPTION;
|
||||
|
||||
b = JS_ToString(ctx, argv[0]);
|
||||
if (JS_IsException(b)) {
|
||||
JS_FreeValue(ctx, a);
|
||||
return JS_EXCEPTION;
|
||||
}
|
||||
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
|
||||
if (JS_IsException(b))
|
||||
goto exception;
|
||||
|
||||
an = to_utf32_buf(ctx, JS_VALUE_GET_STRING(a), &as);
|
||||
if (an == -1)
|
||||
goto exception;
|
||||
|
||||
bn = to_utf32_buf(ctx, JS_VALUE_GET_STRING(b), &bs);
|
||||
if (bn == -1)
|
||||
goto exception;
|
||||
|
||||
// TODO(bnoordhuis) skip normalization when input is latin1
|
||||
an = unicode_normalize(&ts, as, an, UNICODE_NFC, ctx,
|
||||
(DynBufReallocFunc *)js_realloc);
|
||||
if (an == -1)
|
||||
goto exception;
|
||||
js_free(ctx, as);
|
||||
as = ts;
|
||||
|
||||
// TODO(bnoordhuis) skip normalization when input is latin1
|
||||
bn = unicode_normalize(&ts, bs, bn, UNICODE_NFC, ctx,
|
||||
(DynBufReallocFunc *)js_realloc);
|
||||
if (bn == -1)
|
||||
goto exception;
|
||||
js_free(ctx, bs);
|
||||
bs = ts;
|
||||
|
||||
n = min_int(an, bn);
|
||||
for (i = 0; i < n; i++)
|
||||
if (as[i] != bs[i])
|
||||
break;
|
||||
if (i < n)
|
||||
cmp = compare_u32(as[i], bs[i]);
|
||||
else
|
||||
cmp = compare_u32(an, bn);
|
||||
ret = js_int32(cmp);
|
||||
|
||||
exception:
|
||||
JS_FreeValue(ctx, a);
|
||||
JS_FreeValue(ctx, b);
|
||||
return JS_NewInt32(ctx, cmp);
|
||||
js_free(ctx, as);
|
||||
js_free(ctx, bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
|
||||
|
@ -39200,29 +39255,14 @@ static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
|
|||
static int JS_ToUTF32String(JSContext *ctx, uint32_t **pbuf, JSValueConst val1)
|
||||
{
|
||||
JSValue val;
|
||||
JSString *p;
|
||||
uint32_t *buf;
|
||||
int i, j, len;
|
||||
int len;
|
||||
|
||||
val = JS_ToString(ctx, val1);
|
||||
if (JS_IsException(val))
|
||||
return -1;
|
||||
p = JS_VALUE_GET_STRING(val);
|
||||
len = p->len;
|
||||
/* UTF32 buffer length is len minus the number of correct surrogates pairs */
|
||||
buf = js_malloc(ctx, sizeof(buf[0]) * max_int(len, 1));
|
||||
if (!buf) {
|
||||
JS_FreeValue(ctx, val);
|
||||
goto fail;
|
||||
}
|
||||
for(i = j = 0; i < len;)
|
||||
buf[j++] = string_getc(p, &i);
|
||||
len = to_utf32_buf(ctx, JS_VALUE_GET_STRING(val), pbuf);
|
||||
JS_FreeValue(ctx, val);
|
||||
*pbuf = buf;
|
||||
return j;
|
||||
fail:
|
||||
*pbuf = NULL;
|
||||
return -1;
|
||||
return len;
|
||||
}
|
||||
|
||||
static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
|
||||
|
|
|
@ -37,8 +37,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
|
|||
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
|
||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
|
||||
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
|
||||
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
||||
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: strict mode: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
||||
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
|
||||
|
|
Loading…
Reference in a new issue