ldap_pvt_uc.h 4.97 KB
Newer Older
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1
2
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
3
 * Copyright 1998-2003 The OpenLDAP Foundation, Redwood City, California, USA
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
5
 * All rights reserved.
 *
6
7
8
9
10
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.  A copy of this license is available at
 * http://www.OpenLDAP.org/license.html or in file LICENSE in the
 * top-level directory of the distribution.
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
12
13
14
 */
/*
 * ldap_pvt_uc.h - Header for Unicode functions.
 * These are meant to be used by the OpenLDAP distribution only.
15
 * These should be named ldap_pvt_....()
Kurt Zeilenga's avatar
Kurt Zeilenga committed
16
17
18
19
20
21
22
23
24
25
26
 */

#ifndef _LDAP_PVT_UC_H
#define _LDAP_PVT_UC_H 1

#include <lber.h>				/* get ber_slen_t */

#include "../libraries/liblunicode/ucdata/ucdata.h"

LDAP_BEGIN_DECL

27
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
28
29
30
31
32
33
34
35
36
37
 * UTF-8 (in utf-8.c)
 */

typedef short ldap_ucs2_t;

/* UCDATA uses UCS-2 passed in an unsigned long */
typedef unsigned long ldap_unicode_t;
#define ldap_utf8_to_unicode( p ) ldap_utf8_to_ucs4((p))
#define ldap_unicode_to_utf8( c, buf ) ldap_ucs4_to_ucs4((c),(buf))

38
39
40
41
42
/* Convert a string with csize octets per character to UTF-8 */
LDAP_F( int ) ldap_ucs_to_utf8s LDAP_P((
	struct berval *ucs, int csize, struct berval *utf8s ));


Kurt Zeilenga's avatar
Kurt Zeilenga committed
43
44
45
46
47
48
49
50
/* returns the number of bytes in the UTF-8 string */
LDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
/* returns the number of UTF-8 characters in the string */
LDAP_F (ber_len_t) ldap_utf8_chars( const char * );
/* returns the length (in bytes) of the UTF-8 character */
LDAP_F (int) ldap_utf8_offset( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character */
LDAP_F (int) ldap_utf8_charlen( const char * );
51
52
53
54
55
56

/* returns the length (in bytes) indicated by the UTF-8 character
 * also checks that shortest possible encoding was used
 */
LDAP_F (int) ldap_utf8_charlen2( const char * );

Kurt Zeilenga's avatar
Kurt Zeilenga committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/* copies a UTF-8 character and returning number of bytes copied */
LDAP_F (int) ldap_utf8_copy( char *, const char *);

/* returns pointer of next UTF-8 character in string */
LDAP_F (char*) ldap_utf8_next( const char * );
/* returns pointer of previous UTF-8 character in string */
LDAP_F (char*) ldap_utf8_prev( const char * );

/* primitive ctype routines -- not aware of non-ascii characters */
LDAP_F (int) ldap_utf8_isascii( const char * );
LDAP_F (int) ldap_utf8_isalpha( const char * );
LDAP_F (int) ldap_utf8_isalnum( const char * );
LDAP_F (int) ldap_utf8_isdigit( const char * );
LDAP_F (int) ldap_utf8_isxdigit( const char * );
LDAP_F (int) ldap_utf8_isspace( const char * );

/* span characters not in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
/* span characters in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
/* return first occurance of character in string */
LDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
/* return first character of set in string */
LDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
/* reentrant tokenizer */
LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);

/* Optimizations */
85
LDAP_V (const char) ldap_utf8_lentab[128];
86
LDAP_V (const char) ldap_utf8_mintab[32];
87

88
#define LDAP_UTF8_ISASCII(p) ( !(*(const unsigned char *)(p) & 0x80 ) )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
89
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
90
	? 1 : ldap_utf8_lentab[*(const unsigned char *)(p) ^ 0x80] )
91
92
93
94
95
96

/* This is like CHARLEN but additionally validates to make sure
 * the char used the shortest possible encoding.
 * 'l' is used to temporarily hold the result of CHARLEN.
 */
#define LDAP_UTF8_CHARLEN2(p, l) ( ( ( l = LDAP_UTF8_CHARLEN( p )) < 3 || \
97
	( ldap_utf8_mintab[*(const unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \
98
99
	l : 0 )

Kurt Zeilenga's avatar
Kurt Zeilenga committed
100
101
102
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
	? 1 : ldap_utf8_offset((p)) )

Kurt Zeilenga's avatar
Kurt Zeilenga committed
103
#define LDAP_UTF8_COPY(d,s) ( LDAP_UTF8_ISASCII(s) \
Kurt Zeilenga's avatar
Kurt Zeilenga committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
	? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )

#define LDAP_UTF8_NEXT(p) (	LDAP_UTF8_ISASCII(p) \
	? (char *)(p)+1 : ldap_utf8_next((p)) )

#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))

/* For symmetry */
#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))
#define LDAP_UTF8_DECR(p) ((p)=LDAP_UTF8_PREV((p)))


/* these probably should be renamed */
LDAP_LUNICODE_F(int) ucstrncmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(int) ucstrncasecmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrnchr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrncasechr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(void) ucstr2upper(
	ldap_unicode_t *,
	ber_len_t );

Kurt Zeilenga's avatar
Kurt Zeilenga committed
141
#define LDAP_UTF8_NOCASEFOLD	0x0U
142
143
144
#define LDAP_UTF8_CASEFOLD	0x1U
#define LDAP_UTF8_ARG1NFC	0x2U
#define LDAP_UTF8_ARG2NFC	0x4U
145
#define LDAP_UTF8_APPROX	0x8U
146

147
148
149
LDAP_LUNICODE_F(struct berval *) UTF8bvnormalize(
	struct berval *,
	struct berval *,
Howard Chu's avatar
Howard Chu committed
150
151
	unsigned,
	void *memctx );
152

153
154
155
LDAP_LUNICODE_F(int) UTF8bvnormcmp(
	struct berval *,
	struct berval *,
Howard Chu's avatar
Howard Chu committed
156
157
	unsigned,
	void *memctx );
158

Kurt Zeilenga's avatar
Kurt Zeilenga committed
159
160
161
162
LDAP_END_DECL

#endif