190 lines
4.2 KiB
C
190 lines
4.2 KiB
C
|
|
/*
|
|
* Copyright (C) Igor Sysoev
|
|
* Copyright (C) NGINX, Inc.
|
|
*/
|
|
|
|
#include <nxt_main.h>
|
|
|
|
|
|
#define NXT_UTF8_START_TEST 0xc2
|
|
//#define NXT_UTF8_START_TEST 0
|
|
|
|
|
|
static u_char invalid[] = {
|
|
|
|
/* Invalid first byte less than 0xc2. */
|
|
1, 0x80, 0x00, 0x00, 0x00,
|
|
1, 0xc0, 0x00, 0x00, 0x00,
|
|
2, 0xc0, 0x00, 0x00, 0x00,
|
|
3, 0xc0, 0x00, 0x00, 0x00,
|
|
4, 0xc0, 0x00, 0x00, 0x00,
|
|
|
|
/* Invalid 0x0x110000 value. */
|
|
4, 0xf4, 0x90, 0x80, 0x80,
|
|
|
|
/* Incomplete length. */
|
|
2, 0xe0, 0xaf, 0xb5, 0x00,
|
|
|
|
/* Overlong values. */
|
|
2, 0xc0, 0x80, 0x00, 0x00,
|
|
2, 0xc1, 0xb3, 0x00, 0x00,
|
|
3, 0xe0, 0x80, 0x80, 0x00,
|
|
3, 0xe0, 0x81, 0xb3, 0x00,
|
|
3, 0xe0, 0x90, 0x9a, 0x00,
|
|
4, 0xf0, 0x80, 0x8a, 0x80,
|
|
4, 0xf0, 0x80, 0x81, 0xb3,
|
|
4, 0xf0, 0x80, 0xaf, 0xb5,
|
|
};
|
|
|
|
|
|
static nxt_int_t
|
|
nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len)
|
|
{
|
|
u_char *p, utf8[4];
|
|
size_t size;
|
|
uint32_t u, d;
|
|
nxt_uint_t i;
|
|
const u_char *pp;
|
|
|
|
pp = overlong;
|
|
|
|
d = nxt_utf8_decode(&pp, overlong + len);
|
|
|
|
len = pp - overlong;
|
|
|
|
if (d != 0xffffffff) {
|
|
p = nxt_utf8_encode(utf8, d);
|
|
|
|
size = (p != NULL) ? p - utf8 : 0;
|
|
|
|
if (len != size || nxt_memcmp(overlong, utf8, size) != 0) {
|
|
|
|
u = 0;
|
|
for (i = 0; i < len; i++) {
|
|
u = (u << 8) + overlong[i];
|
|
}
|
|
|
|
nxt_log_alert(thr->log,
|
|
"nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz",
|
|
u, len, d, size);
|
|
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
|
|
return NXT_OK;
|
|
}
|
|
|
|
|
|
nxt_int_t
|
|
nxt_utf8_unit_test(nxt_thread_t *thr)
|
|
{
|
|
u_char *p, utf8[4];
|
|
size_t len;
|
|
int32_t n;
|
|
uint32_t u, d;
|
|
nxt_uint_t i, k, l, m;
|
|
const u_char *pp;
|
|
|
|
nxt_thread_time_update(thr);
|
|
|
|
nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test started");
|
|
|
|
/* Test valid UTF-8. */
|
|
|
|
for (u = 0; u < 0x110000; u++) {
|
|
|
|
p = nxt_utf8_encode(utf8, u);
|
|
|
|
if (p == NULL) {
|
|
nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u);
|
|
return NXT_ERROR;
|
|
}
|
|
|
|
pp = utf8;
|
|
|
|
d = nxt_utf8_decode(&pp, p);
|
|
|
|
if (u != d) {
|
|
nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD",
|
|
u, d);
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
|
|
/* Test some invalid UTF-8. */
|
|
|
|
for (i = 0; i < sizeof(invalid); i += 5) {
|
|
|
|
len = invalid[i];
|
|
utf8[0] = invalid[i + 1];
|
|
utf8[1] = invalid[i + 2];
|
|
utf8[2] = invalid[i + 3];
|
|
utf8[3] = invalid[i + 4];
|
|
|
|
pp = utf8;
|
|
|
|
d = nxt_utf8_decode(&pp, utf8 + len);
|
|
|
|
if (d != 0xffffffff) {
|
|
|
|
u = 0;
|
|
for (i = 0; i < len; i++) {
|
|
u = (u << 8) + utf8[i];
|
|
}
|
|
|
|
nxt_log_alert(thr->log,
|
|
"nxt_utf8_decode(%05uxD, %uz) failed: %05uxD",
|
|
u, len, d);
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
|
|
/* Test all overlong UTF-8. */
|
|
|
|
for (i = NXT_UTF8_START_TEST; i < 256; i++) {
|
|
utf8[0] = i;
|
|
|
|
if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) {
|
|
return NXT_ERROR;
|
|
}
|
|
|
|
for (k = 0; k < 256; k++) {
|
|
utf8[1] = k;
|
|
|
|
if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) {
|
|
return NXT_ERROR;
|
|
}
|
|
|
|
for (l = 0; l < 256; l++) {
|
|
utf8[2] = l;
|
|
|
|
if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) {
|
|
return NXT_ERROR;
|
|
}
|
|
|
|
for (m = 0; m < 256; m++) {
|
|
utf8[3] = m;
|
|
|
|
if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) {
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ",
|
|
(u_char *) "abc абв αβγ",
|
|
sizeof("ABC АБВ ΑΒΓ") - 1,
|
|
sizeof("abc абв αβγ") - 1);
|
|
|
|
if (n != 0) {
|
|
nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed");
|
|
return NXT_ERROR;
|
|
}
|
|
|
|
nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 unit test passed");
|
|
return NXT_OK;
|
|
}
|