Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
openldap
OpenLDAP
Commits
2d51ad52
Commit
2d51ad52
authored
Jan 15, 2002
by
Howard Chu
Browse files
Added LDAP_UTF8_CHARLEN2() to validate shortest possible encoding
of UTF8 string. (Returns charlen if valid, 0 if not.)
parent
13af7fb0
Changes
4
Hide whitespace changes
Inline
Side-by-side
include/ldap_pvt_uc.h
View file @
2d51ad52
...
...
@@ -47,6 +47,12 @@ LDAP_F (ber_len_t) ldap_utf8_chars( const char * );
LDAP_F
(
int
)
ldap_utf8_offset
(
const
char
*
);
/* returns the length (in bytes) indicated by the UTF-8 character */
LDAP_F
(
int
)
ldap_utf8_charlen
(
const
char
*
);
/* returns the length (in bytes) indicated by the UTF-8 character
* also checks that shortest possible encoding was used
*/
LDAP_F
(
int
)
ldap_utf8_charlen2
(
const
char
*
);
/* copies a UTF-8 character and returning number of bytes copied */
LDAP_F
(
int
)
ldap_utf8_copy
(
char
*
,
const
char
*
);
...
...
@@ -76,10 +82,20 @@ LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);
/* Optimizations */
LDAP_V
(
const
char
)
ldap_utf8_lentab
[
128
];
LDAP_V
(
const
char
)
ldap_utf8_mintab
[
32
];
#define LDAP_UTF8_ISASCII(p) ( !(*(unsigned char *)(p) & 0x80 ) )
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_lentab[*(unsigned char *)(p) ^ 0x80] )
/* This is like CHARLEN but additionally validates to make sure
* the char used the shortest possible encoding.
* 'l' is used to temporarily hold the result of CHARLEN.
*/
#define LDAP_UTF8_CHARLEN2(p, l) ( ( ( l = LDAP_UTF8_CHARLEN( p )) < 3 || \
( ldap_utf8_mintab[*(unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \
l : 0 )
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
? 1 : ldap_utf8_offset((p)) )
...
...
libraries/libldap/getdn.c
View file @
2d51ad52
...
...
@@ -1894,7 +1894,7 @@ strval2strlen( struct berval *val, unsigned flags, ber_len_t *len )
continue
;
}
cl
=
LDAP_UTF8_CHARLEN
(
p
);
cl
=
LDAP_UTF8_CHARLEN
2
(
p
,
cl
);
if
(
cl
==
0
)
{
/* illegal utf-8 char! */
return
(
-
1
);
...
...
@@ -1903,7 +1903,7 @@ strval2strlen( struct berval *val, unsigned flags, ber_len_t *len )
ber_len_t
cnt
;
for
(
cnt
=
1
;
cnt
<
cl
;
cnt
++
)
{
if
(
(
p
[
cnt
]
&
0x
8
0
)
=
=
0x
0
0
)
{
if
(
(
p
[
cnt
]
&
0x
c
0
)
!
=
0x
8
0
)
{
return
(
-
1
);
}
}
...
...
libraries/libldap/utf-8-conv.c
View file @
2d51ad52
...
...
@@ -85,7 +85,7 @@ ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char )
utf8char
=
""
;
/* Get UTF-8 sequence length from 1st byte */
utflen
=
LDAP_UTF8_CHARLEN
(
utf8char
);
utflen
=
LDAP_UTF8_CHARLEN
2
(
utf8char
,
utflen
);
if
(
utflen
==
0
||
utflen
>
LDAP_MAX_UTF8_LEN
)
return
-
1
;
/* Invalid input */
...
...
@@ -130,7 +130,7 @@ ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count )
while
(
*
utf8str
&&
(
wcstr
==
NULL
||
wclen
<
count
)
)
{
/* Get UTF-8 sequence length from 1st byte */
utflen
=
LDAP_UTF8_CHARLEN
(
utf8str
);
utflen
=
LDAP_UTF8_CHARLEN
2
(
utf8str
,
utflen
);
if
(
utflen
==
0
||
utflen
>
LDAP_MAX_UTF8_LEN
)
return
-
1
;
/* Invalid input */
...
...
libraries/libldap/utf-8.c
View file @
2d51ad52
...
...
@@ -91,6 +91,29 @@ int ldap_utf8_charlen( const char * p )
return
ldap_utf8_lentab
[
*
(
unsigned
char
*
)
p
^
0x80
];
}
/*
* Make sure the UTF-8 char used the shortest possible encoding
* returns charlen if valid, 0 if not.
*/
/* mask of required bits in second octet */
const
char
ldap_utf8_mintab
[]
=
{
0x20
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x30
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x80
,
0x38
,
0x80
,
0x80
,
0x80
,
0x3c
,
0x80
,
0x00
,
0x00
};
int
ldap_utf8_charlen2
(
const
char
*
p
)
{
int
i
=
LDAP_UTF8_CHARLEN
(
p
);
if
(
i
>
2
)
{
if
(
!
(
ldap_utf8_mintab
[
*
p
&
0x1f
]
&
p
[
1
]
)
)
i
=
0
;
}
return
i
;
}
/* conv UTF-8 to UCS-4, useful for comparisons */
ldap_ucs4_t
ldap_x_utf8_to_ucs4
(
const
char
*
p
)
{
...
...
@@ -100,7 +123,7 @@ ldap_ucs4_t ldap_x_utf8_to_ucs4( const char * p )
static
unsigned
char
mask
[]
=
{
0
,
0x7f
,
0x1f
,
0x0f
,
0x07
,
0x03
,
0x01
};
len
=
LDAP_UTF8_CHARLEN
(
p
);
len
=
LDAP_UTF8_CHARLEN
2
(
p
,
len
);
if
(
len
==
0
)
return
LDAP_UCS4_INVALID
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment