Normalize inputs to String.prototype.localeCompare (#97)
NFC-normalize the inputs, otherwise strings like "Å" and "A\u030A" (latin A with combining diacritical mark) don't compare equal.
This commit is contained in:
parent
a721bda7b5
commit
b56a82d19f
2 changed files with 74 additions and 36 deletions
108
quickjs.c
108
quickjs.c
|
@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods;
|
||||||
static const JSClassExoticMethods js_module_ns_exotic_methods;
|
static const JSClassExoticMethods js_module_ns_exotic_methods;
|
||||||
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;
|
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;
|
||||||
|
|
||||||
|
static int compare_u32(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
return -(a < b) + (b < a); // -1, 0 or 1
|
||||||
|
}
|
||||||
|
|
||||||
static JSValue js_int32(int32_t v)
|
static JSValue js_int32(int32_t v)
|
||||||
{
|
{
|
||||||
return JS_MKVAL(JS_TAG_INT, v);
|
return JS_MKVAL(JS_TAG_INT, v);
|
||||||
|
@ -3930,14 +3935,8 @@ static int js_string_compare(JSContext *ctx,
|
||||||
int res, len;
|
int res, len;
|
||||||
len = min_int(p1->len, p2->len);
|
len = min_int(p1->len, p2->len);
|
||||||
res = js_string_memcmp(p1, p2, len);
|
res = js_string_memcmp(p1, p2, len);
|
||||||
if (res == 0) {
|
if (res == 0)
|
||||||
if (p1->len == p2->len)
|
res = compare_u32(p1->len, p2->len);
|
||||||
res = 0;
|
|
||||||
else if (p1->len < p2->len)
|
|
||||||
res = -1;
|
|
||||||
else
|
|
||||||
res = 1;
|
|
||||||
}
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39138,24 +39137,80 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
|
||||||
return !lre_is_cased(c1);
|
return !lre_is_cased(c1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int to_utf32_buf(JSContext *ctx, JSString *p, uint32_t **pbuf)
|
||||||
|
{
|
||||||
|
uint32_t *b;
|
||||||
|
int i, j, n;
|
||||||
|
|
||||||
|
j = -1;
|
||||||
|
n = p->len;
|
||||||
|
b = js_malloc(ctx, max_int(1, n) * sizeof(*b));
|
||||||
|
if (b)
|
||||||
|
for (i = j = 0; i < n;)
|
||||||
|
b[j++] = string_getc(p, &i);
|
||||||
|
*pbuf = b;
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
|
static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
|
||||||
int argc, JSValueConst *argv)
|
int argc, JSValueConst *argv)
|
||||||
{
|
{
|
||||||
JSValue a, b;
|
int i, n, an, bn, cmp;
|
||||||
int cmp;
|
uint32_t *as, *bs, *ts;
|
||||||
|
JSValue a, b, ret;
|
||||||
|
|
||||||
|
ret = JS_EXCEPTION;
|
||||||
|
as = NULL;
|
||||||
|
bs = NULL;
|
||||||
|
|
||||||
a = JS_ToStringCheckObject(ctx, this_val);
|
a = JS_ToStringCheckObject(ctx, this_val);
|
||||||
if (JS_IsException(a))
|
if (JS_IsException(a))
|
||||||
return JS_EXCEPTION;
|
return JS_EXCEPTION;
|
||||||
|
|
||||||
b = JS_ToString(ctx, argv[0]);
|
b = JS_ToString(ctx, argv[0]);
|
||||||
if (JS_IsException(b)) {
|
if (JS_IsException(b))
|
||||||
JS_FreeValue(ctx, a);
|
goto exception;
|
||||||
return JS_EXCEPTION;
|
|
||||||
}
|
an = to_utf32_buf(ctx, JS_VALUE_GET_STRING(a), &as);
|
||||||
cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
|
if (an == -1)
|
||||||
|
goto exception;
|
||||||
|
|
||||||
|
bn = to_utf32_buf(ctx, JS_VALUE_GET_STRING(b), &bs);
|
||||||
|
if (bn == -1)
|
||||||
|
goto exception;
|
||||||
|
|
||||||
|
// TODO(bnoordhuis) skip normalization when input is latin1
|
||||||
|
an = unicode_normalize(&ts, as, an, UNICODE_NFC, ctx,
|
||||||
|
(DynBufReallocFunc *)js_realloc);
|
||||||
|
if (an == -1)
|
||||||
|
goto exception;
|
||||||
|
js_free(ctx, as);
|
||||||
|
as = ts;
|
||||||
|
|
||||||
|
// TODO(bnoordhuis) skip normalization when input is latin1
|
||||||
|
bn = unicode_normalize(&ts, bs, bn, UNICODE_NFC, ctx,
|
||||||
|
(DynBufReallocFunc *)js_realloc);
|
||||||
|
if (bn == -1)
|
||||||
|
goto exception;
|
||||||
|
js_free(ctx, bs);
|
||||||
|
bs = ts;
|
||||||
|
|
||||||
|
n = min_int(an, bn);
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
if (as[i] != bs[i])
|
||||||
|
break;
|
||||||
|
if (i < n)
|
||||||
|
cmp = compare_u32(as[i], bs[i]);
|
||||||
|
else
|
||||||
|
cmp = compare_u32(an, bn);
|
||||||
|
ret = js_int32(cmp);
|
||||||
|
|
||||||
|
exception:
|
||||||
JS_FreeValue(ctx, a);
|
JS_FreeValue(ctx, a);
|
||||||
JS_FreeValue(ctx, b);
|
JS_FreeValue(ctx, b);
|
||||||
return JS_NewInt32(ctx, cmp);
|
js_free(ctx, as);
|
||||||
|
js_free(ctx, bs);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
|
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
|
||||||
|
@ -39200,29 +39255,14 @@ static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
|
||||||
static int JS_ToUTF32String(JSContext *ctx, uint32_t **pbuf, JSValueConst val1)
|
static int JS_ToUTF32String(JSContext *ctx, uint32_t **pbuf, JSValueConst val1)
|
||||||
{
|
{
|
||||||
JSValue val;
|
JSValue val;
|
||||||
JSString *p;
|
int len;
|
||||||
uint32_t *buf;
|
|
||||||
int i, j, len;
|
|
||||||
|
|
||||||
val = JS_ToString(ctx, val1);
|
val = JS_ToString(ctx, val1);
|
||||||
if (JS_IsException(val))
|
if (JS_IsException(val))
|
||||||
return -1;
|
return -1;
|
||||||
p = JS_VALUE_GET_STRING(val);
|
len = to_utf32_buf(ctx, JS_VALUE_GET_STRING(val), pbuf);
|
||||||
len = p->len;
|
|
||||||
/* UTF32 buffer length is len minus the number of correct surrogates pairs */
|
|
||||||
buf = js_malloc(ctx, sizeof(buf[0]) * max_int(len, 1));
|
|
||||||
if (!buf) {
|
|
||||||
JS_FreeValue(ctx, val);
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
for(i = j = 0; i < len;)
|
|
||||||
buf[j++] = string_getc(p, &i);
|
|
||||||
JS_FreeValue(ctx, val);
|
JS_FreeValue(ctx, val);
|
||||||
*pbuf = buf;
|
return len;
|
||||||
return j;
|
|
||||||
fail:
|
|
||||||
*pbuf = NULL;
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
|
static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
|
||||||
|
|
|
@ -37,8 +37,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
|
||||||
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
|
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
|
||||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||||
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
|
|
||||||
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
|
|
||||||
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
||||||
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: strict mode: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
test262/test/built-ins/TypedArray/prototype/set/array-arg-targetbuffer-detached-on-get-src-value-no-throw.js:30: strict mode: TypeError: out-of-bound numeric index (Testing with Float64Array.)
|
||||||
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
|
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
|
||||||
|
|
Loading…
Reference in a new issue