Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
openldap
OpenLDAP
Commits
c476878f
Commit
c476878f
authored
Feb 27, 2002
by
Stig Venaas
Browse files
Using new UTF8bv* all over, getting rid of UTF8normalize() and
UTF8normcmp().
parent
94983da9
Changes
3
Hide whitespace changes
Inline
Side-by-side
include/ldap_pvt_uc.h
View file @
c476878f
...
...
@@ -143,20 +143,11 @@ LDAP_LUNICODE_F(void) ucstr2upper(
#define LDAP_UTF8_ARG2NFC 0x4U
#define LDAP_UTF8_APPROX 0x8U
LDAP_LUNICODE_F
(
char
*
)
UTF8normalize
(
struct
berval
*
,
unsigned
);
LDAP_LUNICODE_F
(
struct
berval
*
)
UTF8bvnormalize
(
struct
berval
*
,
struct
berval
*
,
unsigned
);
LDAP_LUNICODE_F
(
int
)
UTF8normcmp
(
const
char
*
,
const
char
*
,
unsigned
);
LDAP_LUNICODE_F
(
int
)
UTF8bvnormcmp
(
struct
berval
*
,
struct
berval
*
,
...
...
libraries/liblunicode/ucstr.c
View file @
c476878f
...
...
@@ -92,156 +92,6 @@ void ucstr2upper(
}
}
char
*
UTF8normalize
(
struct
berval
*
bv
,
unsigned
casefold
)
{
int
i
,
j
,
len
,
clen
,
outpos
,
ucsoutlen
,
outsize
,
last
;
char
*
out
,
*
s
;
unsigned
long
*
ucs
,
*
p
,
*
ucsout
;
static
unsigned
char
mask
[]
=
{
0
,
0x7f
,
0x1f
,
0x0f
,
0x07
,
0x03
,
0x01
};
if
(
bv
==
NULL
)
{
return
NULL
;
}
s
=
bv
->
bv_val
;
len
=
bv
->
bv_len
;
/* See if the string is pure ASCII so we can shortcut */
for
(
i
=
0
;
i
<
len
;
i
++
)
{
if
(
s
[
i
]
&
0x80
)
/* non-ASCII */
break
;
}
/* It's pure ASCII or zero-len */
if
(
i
==
len
)
{
out
=
malloc
(
len
+
1
);
if
(
i
&&
!
casefold
)
{
strncpy
(
out
,
bv
->
bv_val
,
len
);
}
else
{
for
(
j
=
0
;
j
<
i
;
j
++
)
out
[
j
]
=
TOUPPER
(
s
[
j
]
);
}
out
[
len
]
=
'\0'
;
return
out
;
}
outsize
=
len
+
7
;
out
=
(
char
*
)
malloc
(
outsize
);
if
(
out
==
NULL
)
{
return
NULL
;
}
/* FIXME: Should first check to see if string is already in
* proper normalized form.
*/
outpos
=
0
;
/* finish off everything up to character before first non-ascii */
if
(
LDAP_UTF8_ISASCII
(
s
)
)
{
for
(
i
=
1
;
(
i
<
len
)
&&
LDAP_UTF8_ISASCII
(
s
+
i
);
i
++
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
}
if
(
i
==
len
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
len
-
1
]
)
:
s
[
len
-
1
];
out
[
outpos
]
=
'\0'
;
return
out
;
}
}
else
{
i
=
0
;
}
p
=
ucs
=
(
long
*
)
malloc
(
len
*
sizeof
(
*
ucs
)
);
if
(
ucs
==
NULL
)
{
free
(
out
);
return
NULL
;
}
/* convert character before first non-ascii to ucs-4 */
if
(
i
>
0
)
{
*
p
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
p
++
;
}
/* s[i] is now first non-ascii character */
for
(;;)
{
/* s[i] is non-ascii */
/* convert everything up to next ascii to ucs-4 */
while
(
i
<
len
)
{
clen
=
LDAP_UTF8_CHARLEN2
(
s
+
i
,
clen
);
if
(
clen
==
0
)
{
free
(
ucs
);
free
(
out
);
return
NULL
;
}
if
(
clen
==
1
)
{
/* ascii */
break
;
}
*
p
=
s
[
i
]
&
mask
[
clen
];
i
++
;
for
(
j
=
1
;
j
<
clen
;
j
++
)
{
if
(
(
s
[
i
]
&
0xc0
)
!=
0x80
)
{
free
(
ucs
);
free
(
out
);
return
NULL
;
}
*
p
<<=
6
;
*
p
|=
s
[
i
]
&
0x3f
;
i
++
;
}
if
(
casefold
)
{
*
p
=
uctoupper
(
*
p
);
}
p
++
;
}
/* normalize ucs of length p - ucs */
uccanondecomp
(
ucs
,
p
-
ucs
,
&
ucsout
,
&
ucsoutlen
);
ucsoutlen
=
uccanoncomp
(
ucsout
,
ucsoutlen
);
/* convert ucs to utf-8 and store in out */
for
(
j
=
0
;
j
<
ucsoutlen
;
j
++
)
{
/* allocate more space if not enough room for
6 bytes and terminator */
if
(
outsize
-
outpos
<
7
)
{
outsize
=
ucsoutlen
-
j
+
outpos
+
6
;
out
=
(
char
*
)
realloc
(
out
,
outsize
);
if
(
out
==
NULL
)
{
free
(
ucs
);
return
NULL
;
}
}
outpos
+=
ldap_x_ucs4_to_utf8
(
ucsout
[
j
],
&
out
[
outpos
]
);
}
if
(
i
==
len
)
{
break
;
}
last
=
i
;
/* s[i] is ascii */
/* finish off everything up to char before next non-ascii */
for
(
i
++
;
(
i
<
len
)
&&
LDAP_UTF8_ISASCII
(
s
+
i
);
i
++
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
}
if
(
i
==
len
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
len
-
1
]
)
:
s
[
len
-
1
];
break
;
}
/* convert character before next non-ascii to ucs-4 */
*
ucs
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
p
=
ucs
+
1
;
}
free
(
ucs
);
out
[
outpos
]
=
'\0'
;
return
out
;
}
struct
berval
*
UTF8bvnormalize
(
struct
berval
*
bv
,
struct
berval
*
newbv
,
...
...
@@ -412,102 +262,6 @@ struct berval * UTF8bvnormalize(
return
ber_str2bv
(
out
,
outpos
,
0
,
newbv
);
}
/* compare UTF8-strings, optionally ignore casing, string pointers must not be NULL */
/* slow, should be optimized */
int
UTF8normcmp
(
const
char
*
s1
,
const
char
*
s2
,
unsigned
casefold
)
{
int
i
,
l1
,
l2
,
len
,
ulen
,
res
;
unsigned
long
*
ucs
,
*
ucsout1
,
*
ucsout2
;
l1
=
strlen
(
s1
);
l2
=
strlen
(
s2
);
if
(
(
l1
==
0
)
||
(
l2
==
0
)
)
{
if
(
l1
==
l2
)
{
return
0
;
}
return
*
s1
-
*
s2
>
0
?
1
:
-
1
;
}
/* See if we can get away with a straight ASCII compare */
len
=
(
l1
<
l2
)
?
l1
:
l2
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
/* Is either char non-ASCII? */
if
((
s1
[
i
]
&
0x80
)
||
(
s2
[
i
]
&
0x80
))
break
;
if
(
casefold
)
{
char
c1
=
TOUPPER
(
s1
[
i
]);
char
c2
=
TOUPPER
(
s2
[
i
]);
res
=
c1
-
c2
;
}
else
{
res
=
s1
[
i
]
-
s2
[
i
];
}
if
(
res
)
return
res
;
}
/* Strings were ASCII, equal up to minlen */
if
(
i
==
len
)
return
l1
-
l2
;
/* FIXME: Should first check to see if strings are already in
* proper normalized form.
*/
ucs
=
(
long
*
)
malloc
(
(
l1
>
l2
?
l1
:
l2
)
*
sizeof
(
*
ucs
)
);
if
(
ucs
==
NULL
)
{
return
l1
>
l2
?
1
:
-
1
;
/* what to do??? */
}
/*
* XXYYZ: we convert to ucs4 even though -llunicode
* expects ucs2 in an unsigned long
*/
/* convert and normalize 1st string */
for
(
i
=
0
,
ulen
=
0
;
i
<
l1
;
i
+=
len
,
ulen
++
)
{
ucs
[
ulen
]
=
ldap_x_utf8_to_ucs4
(
s1
+
i
);
if
(
ucs
[
ulen
]
==
LDAP_UCS4_INVALID
)
{
free
(
ucs
);
return
-
1
;
/* what to do??? */
}
len
=
LDAP_UTF8_CHARLEN
(
s1
+
i
);
}
uccanondecomp
(
ucs
,
ulen
,
&
ucsout1
,
&
l1
);
l1
=
uccanoncomp
(
ucsout1
,
l1
);
/* convert and normalize 2nd string */
for
(
i
=
0
,
ulen
=
0
;
i
<
l2
;
i
+=
len
,
ulen
++
)
{
ucs
[
ulen
]
=
ldap_x_utf8_to_ucs4
(
s2
+
i
);
if
(
ucs
[
ulen
]
==
LDAP_UCS4_INVALID
)
{
free
(
ucsout1
);
free
(
ucs
);
return
1
;
/* what to do??? */
}
len
=
LDAP_UTF8_CHARLEN
(
s2
+
i
);
}
uccanondecomp
(
ucs
,
ulen
,
&
ucsout2
,
&
l2
);
l2
=
uccanoncomp
(
ucsout2
,
l2
);
free
(
ucs
);
res
=
casefold
?
ucstrncasecmp
(
ucsout1
,
ucsout2
,
l1
<
l2
?
l1
:
l2
)
:
ucstrncmp
(
ucsout1
,
ucsout2
,
l1
<
l2
?
l1
:
l2
);
free
(
ucsout1
);
free
(
ucsout2
);
if
(
res
!=
0
)
{
return
res
;
}
if
(
l1
==
l2
)
{
return
0
;
}
return
l1
>
l2
?
1
:
-
1
;
}
/* compare UTF8-strings, optionally ignore casing */
/* slow, should be optimized */
int
UTF8bvnormcmp
(
...
...
servers/slapd/schema_init.c
View file @
c476878f
...
...
@@ -776,16 +776,17 @@ approxIndexer(
{
char
*
c
;
int
i
,
j
,
len
,
wordcount
,
keycount
=
0
;
struct
berval
*
val
,
*
newkeys
;
struct
berval
*
newkeys
;
BerVarray
keys
=
NULL
;
for
(
j
=
0
;
values
[
j
].
bv_val
!=
NULL
;
j
++
)
{
struct
berval
val
=
{
0
,
NULL
};
/* Yes, this is necessary */
val
=
UTF8bvnormalize
(
&
values
[
j
],
NULL
,
LDAP_UTF8_APPROX
);
assert
(
val
!=
NULL
&&
val
->
bv_val
!=
NULL
);
UTF8bvnormalize
(
&
values
[
j
],
&
val
,
LDAP_UTF8_APPROX
);
assert
(
val
.
bv_val
!=
NULL
);
/* Isolate how many words there are. There will be a key for each */
for
(
wordcount
=
0
,
c
=
val
->
bv_val
;
*
c
;
c
++
)
{
for
(
wordcount
=
0
,
c
=
val
.
bv_val
;
*
c
;
c
++
)
{
len
=
strcspn
(
c
,
SLAPD_APPROX_DELIMITER
);
if
(
len
>=
SLAPD_APPROX_WORDLEN
)
wordcount
++
;
c
+=
len
;
...
...
@@ -801,7 +802,7 @@ approxIndexer(
keys
=
newkeys
;
/* Get a phonetic copy of each word */
for
(
c
=
val
->
bv_val
,
i
=
0
;
i
<
wordcount
;
c
+=
len
+
1
)
{
for
(
c
=
val
.
bv_val
,
i
=
0
;
i
<
wordcount
;
c
+=
len
+
1
)
{
len
=
strlen
(
c
);
if
(
len
<
SLAPD_APPROX_WORDLEN
)
continue
;
ber_str2bv
(
phonetic
(
c
),
0
,
0
,
&
keys
[
keycount
]
);
...
...
@@ -809,7 +810,7 @@ approxIndexer(
i
++
;
}
ber_
bv
free
(
val
);
ber_
mem
free
(
val
.
bv_val
);
}
keys
[
keycount
].
bv_val
=
NULL
;
*
keysp
=
keys
;
...
...
@@ -997,9 +998,7 @@ caseExactMatch(
struct
berval
*
value
,
void
*
assertedValue
)
{
*
matchp
=
UTF8normcmp
(
value
->
bv_val
,
((
struct
berval
*
)
assertedValue
)
->
bv_val
,
LDAP_UTF8_NOCASEFOLD
);
*
matchp
=
UTF8bvnormcmp
(
value
,
(
struct
berval
*
)
assertedValue
,
LDAP_UTF8_NOCASEFOLD
);
return
LDAP_SUCCESS
;
}
...
...
@@ -1195,8 +1194,7 @@ static int caseExactIgnoreIndexer(
for
(
i
=
0
;
values
[
i
].
bv_val
!=
NULL
;
i
++
)
{
struct
berval
value
;
ber_str2bv
(
UTF8normalize
(
&
values
[
i
],
casefold
),
0
,
0
,
&
value
);
UTF8bvnormalize
(
&
values
[
i
],
&
value
,
casefold
);
HASH_Init
(
&
HASHcontext
);
if
(
prefix
!=
NULL
&&
prefix
->
bv_len
>
0
)
{
...
...
@@ -1236,8 +1234,9 @@ static int caseExactIgnoreFilter(
BerVarray
keys
;
HASH_CONTEXT
HASHcontext
;
unsigned
char
HASHdigest
[
HASH_BYTES
];
struct
berval
value
;
struct
berval
value
=
{
0
,
NULL
}
;
struct
berval
digest
;
digest
.
bv_val
=
HASHdigest
;
digest
.
bv_len
=
sizeof
(
HASHdigest
);
...
...
@@ -1247,8 +1246,7 @@ static int caseExactIgnoreFilter(
casefold
=
strcmp
(
mr
->
smr_oid
,
caseExactMatchOID
)
?
LDAP_UTF8_CASEFOLD
:
LDAP_UTF8_NOCASEFOLD
;
ber_str2bv
(
UTF8normalize
(
((
struct
berval
*
)
assertValue
),
casefold
),
0
,
0
,
&
value
);
UTF8bvnormalize
(
(
struct
berval
*
)
assertValue
,
&
value
,
casefold
);
/* This usually happens if filter contains bad UTF8 */
if
(
value
.
bv_val
==
NULL
)
{
keys
=
ch_malloc
(
sizeof
(
struct
berval
)
);
...
...
@@ -1316,8 +1314,7 @@ static int caseExactIgnoreSubstringsIndexer(
nvalues
=
ch_malloc
(
sizeof
(
struct
berval
)
*
(
i
+
1
)
);
for
(
i
=
0
;
values
[
i
].
bv_val
!=
NULL
;
i
++
)
{
ber_str2bv
(
UTF8normalize
(
&
values
[
i
],
casefold
),
0
,
0
,
&
nvalues
[
i
]
);
UTF8bvnormalize
(
&
values
[
i
],
&
nvalues
[
i
],
casefold
);
}
nvalues
[
i
].
bv_val
=
NULL
;
values
=
nvalues
;
...
...
@@ -1647,9 +1644,7 @@ caseIgnoreMatch(
struct
berval
*
value
,
void
*
assertedValue
)
{
*
matchp
=
UTF8normcmp
(
value
->
bv_val
,
((
struct
berval
*
)
assertedValue
)
->
bv_val
,
LDAP_UTF8_CASEFOLD
);
*
matchp
=
UTF8bvnormcmp
(
value
,
(
struct
berval
*
)
assertedValue
,
LDAP_UTF8_CASEFOLD
);
return
LDAP_SUCCESS
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment