Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
openldap
OpenLDAP
Commits
8c61bd27
Commit
8c61bd27
authored
Jan 19, 2001
by
Stig Venaas
Browse files
Added UTF8normalize() that does Unicode canonical normalization and
optionally case folding
parent
111dd4cc
Changes
2
Hide whitespace changes
Inline
Side-by-side
include/ldap_pvt_uc.h
View file @
8c61bd27
...
...
@@ -126,6 +126,10 @@ LDAP_LUNICODE_F(void) ucstr2upper(
ldap_unicode_t
*
,
ber_len_t
);
LDAP_LUNICODE_F
(
char
*
)
UTF8normalize
(
const
char
*
,
char
casefold
);
LDAP_END_DECL
#endif
...
...
libraries/liblunicode/ucstr.c
View file @
8c61bd27
...
...
@@ -75,4 +75,134 @@ void ucstr2upper(
}
}
char
*
UTF8normalize
(
const
char
*
s
,
char
casefold
)
{
int
i
,
j
,
len
,
clen
,
outpos
,
ucsoutlen
,
outsize
,
last
;
char
*
out
;
unsigned
long
*
ucs
,
*
p
,
*
ucsout
;
static
unsigned
char
mask
[]
=
{
0
,
0x7f
,
0x1f
,
0x0f
,
0x07
,
0x03
,
0x01
};
if
(
s
==
NULL
)
{
return
NULL
;
}
len
=
strlen
(
s
);
if
(
len
==
0
)
{
out
=
(
char
*
)
malloc
(
1
);
*
out
=
'\0'
;
return
out
;
}
outsize
=
len
+
7
;
out
=
(
char
*
)
malloc
(
outsize
);
if
(
out
==
NULL
)
{
return
NULL
;
}
outpos
=
0
;
/* finish off everything up to character before first non-ascii */
if
(
LDAP_UTF8_ISASCII
(
s
)
)
{
for
(
i
=
1
;
(
i
<
len
)
&&
LDAP_UTF8_ISASCII
(
s
+
i
);
i
++
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
}
if
(
i
==
len
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
len
-
1
]
)
:
s
[
len
-
1
];
out
[
outpos
]
=
'\0'
;
return
out
;
}
}
else
{
i
=
0
;
}
p
=
ucs
=
(
long
*
)
malloc
(
len
*
sizeof
(
*
ucs
)
);
if
(
ucs
==
NULL
)
{
free
(
out
);
return
NULL
;
}
/* convert character before first non-ascii to ucs-4 */
if
(
i
>
0
)
{
*
p
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
p
++
;
}
/* s[i] is now first non-ascii character */
for
(;;)
{
/* s[i] is non-ascii */
/* convert everything up to next ascii to ucs-4 */
while
(
i
<
len
)
{
clen
=
LDAP_UTF8_CHARLEN
(
s
+
i
);
if
(
clen
==
0
)
{
free
(
ucs
);
free
(
out
);
return
NULL
;
}
if
(
clen
==
1
)
{
/* ascii */
break
;
}
*
p
=
s
[
i
]
&
mask
[
clen
];
i
++
;
for
(
j
=
1
;
j
<
clen
;
j
++
)
{
if
(
(
s
[
i
]
&
0xc0
)
!=
0x80
)
{
free
(
ucs
);
free
(
out
);
return
NULL
;
}
*
p
<<=
6
;
*
p
|=
s
[
i
]
&
0x3f
;
i
++
;
}
if
(
casefold
)
{
*
p
=
uctoupper
(
*
p
);
}
p
++
;
}
/* normalize ucs of length p - ucs */
uccanondecomp
(
ucs
,
p
-
ucs
,
&
ucsout
,
&
ucsoutlen
);
ucsoutlen
=
uccanoncomp
(
ucsout
,
ucsoutlen
);
/* convert ucs to utf-8 and store in out */
for
(
j
=
0
;
j
<
ucsoutlen
;
j
++
)
{
/* allocate more space if not enough room for
6 bytes and terminator */
if
(
outsize
-
outpos
<
7
)
{
outsize
=
ucsoutlen
-
j
+
outpos
+
6
;
out
=
(
char
*
)
realloc
(
out
,
outsize
);
if
(
out
==
NULL
)
{
free
(
ucs
);
return
NULL
;
}
}
outpos
+=
ldap_ucs4_to_utf8
(
ucsout
[
j
],
&
out
[
outpos
]
);
}
if
(
i
==
len
)
{
break
;
}
last
=
i
;
/* s[i] is ascii */
/* finish off everything up to char before next non-ascii */
for
(
i
++
;
(
i
<
len
)
&&
LDAP_UTF8_ISASCII
(
s
+
i
);
i
++
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
}
if
(
i
==
len
)
{
out
[
outpos
++
]
=
casefold
?
TOUPPER
(
s
[
len
-
1
]
)
:
s
[
len
-
1
];
break
;
}
/* convert character before next non-ascii to ucs-4 */
*
ucs
=
casefold
?
TOUPPER
(
s
[
i
-
1
]
)
:
s
[
i
-
1
];
p
=
ucs
+
1
;
}
free
(
ucs
);
out
[
outpos
]
=
'\0'
;
return
out
;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment