Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
openldap
OpenLDAP
Commits
94983da9
Commit
94983da9
authored
Feb 26, 2002
by
Stig Venaas
Browse files
Added code for approximate matching in UTF8bvnormalize() and changed to use
this in approxMatch etc in schema_init.c
parent
5d347b61
Changes
3
Hide whitespace changes
Inline
Side-by-side
include/ldap_pvt_uc.h
View file @
94983da9
...
...
@@ -141,6 +141,7 @@ LDAP_LUNICODE_F(void) ucstr2upper(
#define LDAP_UTF8_CASEFOLD 0x1U
#define LDAP_UTF8_ARG1NFC 0x2U
#define LDAP_UTF8_ARG2NFC 0x4U
#define LDAP_UTF8_APPROX 0x8U
LDAP_LUNICODE_F
(
char
*
)
UTF8normalize
(
struct
berval
*
,
...
...
libraries/liblunicode/ucstr.c
View file @
94983da9
...
...
@@ -245,12 +245,14 @@ char * UTF8normalize(
struct
berval
*
UTF8bvnormalize
(
struct
berval
*
bv
,
struct
berval
*
newbv
,
unsigned
casefold
)
unsigned
flags
)
{
int
i
,
j
,
len
,
clen
,
outpos
,
ucsoutlen
,
outsize
,
last
;
char
*
out
,
*
s
;
unsigned
long
*
ucs
,
*
p
,
*
ucsout
;
unsigned
casefold
=
flags
&
LDAP_UTF8_CASEFOLD
;
unsigned
approx
=
flags
&
LDAP_UTF8_APPROX
;
static
unsigned
char
mask
[]
=
{
0
,
0x7f
,
0x1f
,
0x0f
,
0x07
,
0x03
,
0x01
};
...
...
@@ -361,20 +363,28 @@ struct berval * UTF8bvnormalize(
}
/* normalize ucs of length p - ucs */
uccanondecomp
(
ucs
,
p
-
ucs
,
&
ucsout
,
&
ucsoutlen
);
ucsoutlen
=
uccanoncomp
(
ucsout
,
ucsoutlen
);
/* convert ucs to utf-8 and store in out */
for
(
j
=
0
;
j
<
ucsoutlen
;
j
++
)
{
/* allocate more space if not enough room for
6 bytes and terminator */
if
(
outsize
-
outpos
<
7
)
{
outsize
=
ucsoutlen
-
j
+
outpos
+
6
;
out
=
(
char
*
)
realloc
(
out
,
outsize
);
if
(
out
==
NULL
)
{
free
(
ucs
);
return
NULL
;
if
(
approx
)
{
for
(
j
=
0
;
j
<
ucsoutlen
;
j
++
)
{
if
(
ucsout
[
j
]
<
0x80
)
{
out
[
outpos
++
]
=
ucsout
[
j
];
}
}
outpos
+=
ldap_x_ucs4_to_utf8
(
ucsout
[
j
],
&
out
[
outpos
]
);
}
else
{
ucsoutlen
=
uccanoncomp
(
ucsout
,
ucsoutlen
);
/* convert ucs to utf-8 and store in out */
for
(
j
=
0
;
j
<
ucsoutlen
;
j
++
)
{
/* allocate more space if not enough room for
6 bytes and terminator */
if
(
outsize
-
outpos
<
7
)
{
outsize
=
ucsoutlen
-
j
+
outpos
+
6
;
out
=
(
char
*
)
realloc
(
out
,
outsize
);
if
(
out
==
NULL
)
{
free
(
ucs
);
return
NULL
;
}
}
outpos
+=
ldap_x_ucs4_to_utf8
(
ucsout
[
j
],
&
out
[
outpos
]
);
}
}
if
(
i
==
len
)
{
...
...
servers/slapd/schema_init.c
View file @
94983da9
...
...
@@ -646,30 +646,6 @@ err:
return
NULL
;
}
/* Strip characters with the 8th bit set */
static
char
*
strip8bitChars
(
char
*
in
)
{
char
*
p
=
in
,
*
q
;
if
(
in
==
NULL
)
{
return
NULL
;
}
while
(
*
p
)
{
if
(
*
p
&
0x80
)
{
q
=
p
;
while
(
*++
q
&
0x80
)
{
/* empty */
}
p
=
AC_MEMCPY
(
p
,
q
,
strlen
(
q
)
+
1
);
}
else
{
p
++
;
}
}
return
in
;
}
#ifndef SLAPD_APPROX_OLDSINGLESTRING
#if defined(SLAPD_APPROX_INITIALS)
...
...
@@ -689,31 +665,27 @@ approxMatch(
struct
berval
*
value
,
void
*
assertedValue
)
{
char
*
val
,
*
nval
,
*
assertv
,
**
values
,
**
words
,
*
c
;
struct
berval
*
nval
,
*
assertv
;
char
*
val
,
**
values
,
**
words
,
*
c
;
int
i
,
count
,
len
,
nextchunk
=
0
,
nextavail
=
0
;
size_t
avlen
;
/* Yes, this is necessary */
nval
=
UTF8normalize
(
value
,
LDAP_UTF8_
NOCASEFOLD
);
nval
=
UTF8
bv
normalize
(
value
,
NULL
,
LDAP_UTF8_
APPROX
);
if
(
nval
==
NULL
)
{
*
matchp
=
1
;
return
LDAP_SUCCESS
;
}
strip8bitChars
(
nval
);
/* Yes, this is necessary */
assertv
=
UTF8normalize
(
((
struct
berval
*
)
assertedValue
),
LDAP_UTF8_NOCASEFOLD
);
assertv
=
UTF8bvnormalize
(
((
struct
berval
*
)
assertedValue
),
NULL
,
LDAP_UTF8_APPROX
);
if
(
assertv
==
NULL
)
{
ch_
free
(
nval
);
ber_bv
free
(
nval
);
*
matchp
=
1
;
return
LDAP_SUCCESS
;
}
strip8bitChars
(
assertv
);
avlen
=
strlen
(
assertv
);
/* Isolate how many words there are */
for
(
c
=
nval
,
count
=
1
;
*
c
;
c
++
)
{
for
(
c
=
nval
->
bv_val
,
count
=
1
;
*
c
;
c
++
)
{
c
=
strpbrk
(
c
,
SLAPD_APPROX_DELIMITER
);
if
(
c
==
NULL
)
break
;
*
c
=
'\0'
;
...
...
@@ -723,7 +695,7 @@ approxMatch(
/* Get a phonetic copy of each word */
words
=
(
char
**
)
ch_malloc
(
count
*
sizeof
(
char
*
)
);
values
=
(
char
**
)
ch_malloc
(
count
*
sizeof
(
char
*
)
);
for
(
c
=
nval
,
i
=
0
;
i
<
count
;
i
++
,
c
+=
strlen
(
c
)
+
1
)
{
for
(
c
=
nval
->
bv_val
,
i
=
0
;
i
<
count
;
i
++
,
c
+=
strlen
(
c
)
+
1
)
{
words
[
i
]
=
c
;
values
[
i
]
=
phonetic
(
c
);
}
...
...
@@ -731,8 +703,8 @@ approxMatch(
/* Work through the asserted value's words, to see if at least some
of the words are there, in the same order. */
len
=
0
;
while
(
(
size
_t
)
nextchunk
<
a
v
len
)
{
len
=
strcspn
(
assertv
+
nextchunk
,
SLAPD_APPROX_DELIMITER
);
while
(
(
ber_len
_t
)
nextchunk
<
a
ssertv
->
bv_
len
)
{
len
=
strcspn
(
assertv
->
bv_val
+
nextchunk
,
SLAPD_APPROX_DELIMITER
);
if
(
len
==
0
)
{
nextchunk
++
;
continue
;
...
...
@@ -741,7 +713,7 @@ approxMatch(
else
if
(
len
==
1
)
{
/* Single letter words need to at least match one word's initial */
for
(
i
=
nextavail
;
i
<
count
;
i
++
)
if
(
!
strncasecmp
(
assertv
+
nextchunk
,
words
[
i
],
1
))
{
if
(
!
strncasecmp
(
assertv
->
bv_val
+
nextchunk
,
words
[
i
],
1
))
{
nextavail
=
i
+
1
;
break
;
}
...
...
@@ -749,8 +721,8 @@ approxMatch(
#endif
else
{
/* Isolate the next word in the asserted value and phonetic it */
assertv
[
nextchunk
+
len
]
=
'\0'
;
val
=
phonetic
(
assertv
+
nextchunk
);
assertv
->
bv_val
[
nextchunk
+
len
]
=
'\0'
;
val
=
phonetic
(
assertv
->
bv_val
+
nextchunk
);
/* See if this phonetic chunk is in the remaining words of *value */
for
(
i
=
nextavail
;
i
<
count
;
i
++
){
...
...
@@ -781,13 +753,13 @@ approxMatch(
}
/* Cleanup allocs */
free
(
assertv
);
ber_bv
free
(
assertv
);
for
(
i
=
0
;
i
<
count
;
i
++
)
{
ch_free
(
values
[
i
]
);
}
ch_free
(
values
);
ch_free
(
words
);
ch_
free
(
nval
);
ber_bv
free
(
nval
);
return
LDAP_SUCCESS
;
}
...
...
@@ -802,18 +774,18 @@ approxIndexer(
BerVarray
values
,
BerVarray
*
keysp
)
{
char
*
val
,
*
c
;
char
*
c
;
int
i
,
j
,
len
,
wordcount
,
keycount
=
0
;
struct
berval
*
newkeys
;
struct
berval
*
val
,
*
newkeys
;
BerVarray
keys
=
NULL
;
for
(
j
=
0
;
values
[
j
].
bv_val
!=
NULL
;
j
++
)
{
/* Yes, this is necessary */
val
=
UTF8normalize
(
&
values
[
j
],
LDAP_UTF8_
NOCASEFOLD
);
strip8bitChars
(
val
);
val
=
UTF8
bv
normalize
(
&
values
[
j
],
NULL
,
LDAP_UTF8_
APPROX
);
assert
(
val
!=
NULL
&&
val
->
bv_val
!=
NULL
);
/* Isolate how many words there are. There will be a key for each */
for
(
wordcount
=
0
,
c
=
val
;
*
c
;
c
++
)
{
for
(
wordcount
=
0
,
c
=
val
->
bv_
val
;
*
c
;
c
++
)
{
len
=
strcspn
(
c
,
SLAPD_APPROX_DELIMITER
);
if
(
len
>=
SLAPD_APPROX_WORDLEN
)
wordcount
++
;
c
+=
len
;
...
...
@@ -829,7 +801,7 @@ approxIndexer(
keys
=
newkeys
;
/* Get a phonetic copy of each word */
for
(
c
=
val
,
i
=
0
;
i
<
wordcount
;
c
+=
len
+
1
)
{
for
(
c
=
val
->
bv_val
,
i
=
0
;
i
<
wordcount
;
c
+=
len
+
1
)
{
len
=
strlen
(
c
);
if
(
len
<
SLAPD_APPROX_WORDLEN
)
continue
;
ber_str2bv
(
phonetic
(
c
),
0
,
0
,
&
keys
[
keycount
]
);
...
...
@@ -837,7 +809,7 @@ approxIndexer(
i
++
;
}
free
(
val
);
ber_bv
free
(
val
);
}
keys
[
keycount
].
bv_val
=
NULL
;
*
keysp
=
keys
;
...
...
@@ -855,23 +827,23 @@ approxFilter(
void
*
assertValue
,
BerVarray
*
keysp
)
{
char
*
val
,
*
c
;
char
*
c
;
int
i
,
count
,
len
;
struct
berval
*
val
;
BerVarray
keys
;
/* Yes, this is necessary */
val
=
UTF8normalize
(
((
struct
berval
*
)
assertValue
),
LDAP_UTF8_NOCASEFOLD
);
if
(
val
==
NULL
)
{
val
=
UTF8bvnormalize
(
((
struct
berval
*
)
assertValue
),
NULL
,
LDAP_UTF8_APPROX
);
if
(
val
==
NULL
||
val
->
bv_val
==
NULL
)
{
keys
=
(
struct
berval
*
)
ch_malloc
(
sizeof
(
struct
berval
)
);
keys
[
0
].
bv_val
=
NULL
;
*
keysp
=
keys
;
ber_bvfree
(
val
);
return
LDAP_SUCCESS
;
}
strip8bitChars
(
val
);
/* Isolate how many words there are. There will be a key for each */
for
(
count
=
0
,
c
=
val
;
*
c
;
c
++
)
{
for
(
count
=
0
,
c
=
val
->
bv_
val
;
*
c
;
c
++
)
{
len
=
strcspn
(
c
,
SLAPD_APPROX_DELIMITER
);
if
(
len
>=
SLAPD_APPROX_WORDLEN
)
count
++
;
c
+=
len
;
...
...
@@ -883,14 +855,14 @@ approxFilter(
keys
=
(
struct
berval
*
)
ch_malloc
(
(
count
+
1
)
*
sizeof
(
struct
berval
)
);
/* Get a phonetic copy of each word */
for
(
c
=
val
,
i
=
0
;
i
<
count
;
c
+=
len
+
1
)
{
for
(
c
=
val
->
bv_val
,
i
=
0
;
i
<
count
;
c
+=
len
+
1
)
{
len
=
strlen
(
c
);
if
(
len
<
SLAPD_APPROX_WORDLEN
)
continue
;
ber_str2bv
(
phonetic
(
c
),
0
,
0
,
&
keys
[
i
]
);
i
++
;
}
free
(
val
);
ber_bv
free
(
val
);
keys
[
count
].
bv_val
=
NULL
;
*
keysp
=
keys
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment