ldap_pvt_uc.h 4.17 KB
Newer Older
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1
2
/* $OpenLDAP$ */
/*
3
 * Copyright 1998-2001 The OpenLDAP Foundation, Redwood City, California, USA
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
5
 * All rights reserved.
 *
6
7
8
9
10
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.  A copy of this license is available at
 * http://www.OpenLDAP.org/license.html or in file LICENSE in the
 * top-level directory of the distribution.
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
12
13
14
 */
/*
 * ldap_pvt_uc.h - Header for Unicode functions.
 * These are meant to be used by the OpenLDAP distribution only.
15
 * These should be named ldap_pvt_....()
Kurt Zeilenga's avatar
Kurt Zeilenga committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 */

#ifndef _LDAP_PVT_UC_H
#define _LDAP_PVT_UC_H 1

#include <ldap_cdefs.h>
#include <lber.h>				/* get ber_slen_t */

#ifdef _MSC_VER
#include "../libraries/liblunicode/ucdata/ucdata.h"
#else
#include "../libraries/liblunicode/ucdata.h"
#endif

LDAP_BEGIN_DECL

32
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
 * UTF-8 (in utf-8.c)
 */

typedef short ldap_ucs2_t;

/* UCDATA uses UCS-2 passed in an unsigned long */
typedef unsigned long ldap_unicode_t;
#define ldap_utf8_to_unicode( p ) ldap_utf8_to_ucs4((p))
#define ldap_unicode_to_utf8( c, buf ) ldap_ucs4_to_ucs4((c),(buf))

/* returns the number of bytes in the UTF-8 string */
LDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
/* returns the number of UTF-8 characters in the string */
LDAP_F (ber_len_t) ldap_utf8_chars( const char * );
/* returns the length (in bytes) of the UTF-8 character */
LDAP_F (int) ldap_utf8_offset( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character */
LDAP_F (int) ldap_utf8_charlen( const char * );
/* copies a UTF-8 character and returning number of bytes copied */
LDAP_F (int) ldap_utf8_copy( char *, const char *);

/* returns pointer of next UTF-8 character in string */
LDAP_F (char*) ldap_utf8_next( const char * );
/* returns pointer of previous UTF-8 character in string */
LDAP_F (char*) ldap_utf8_prev( const char * );

/* primitive ctype routines -- not aware of non-ascii characters */
LDAP_F (int) ldap_utf8_isascii( const char * );
LDAP_F (int) ldap_utf8_isalpha( const char * );
LDAP_F (int) ldap_utf8_isalnum( const char * );
LDAP_F (int) ldap_utf8_isdigit( const char * );
LDAP_F (int) ldap_utf8_isxdigit( const char * );
LDAP_F (int) ldap_utf8_isspace( const char * );

/* span characters not in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
/* span characters in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
/* return first occurance of character in string */
LDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
/* return first character of set in string */
LDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
/* reentrant tokenizer */
LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);

/* Optimizations */
79
80
81
LDAP_V (const char) ldap_utf8_lentab[128];

#define LDAP_UTF8_ISASCII(p) ( *(unsigned char *)(p) ^ 0x80 )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
82
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
83
	? 1 : ldap_utf8_lentab[*(unsigned char *)(p) ^ 0x80] )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
	? 1 : ldap_utf8_offset((p)) )

#define LDAP_UTF8_COPY(d,s) (	LDAP_UTF8_ISASCII(s) \
	? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )

#define LDAP_UTF8_NEXT(p) (	LDAP_UTF8_ISASCII(p) \
	? (char *)(p)+1 : ldap_utf8_next((p)) )

#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))

/* For symmetry */
#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))
#define LDAP_UTF8_DECR(p) ((p)=LDAP_UTF8_PREV((p)))


/* these probably should be renamed */
LDAP_LUNICODE_F(int) ucstrncmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(int) ucstrncasecmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrnchr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrncasechr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(void) ucstr2upper(
	ldap_unicode_t *,
	ber_len_t );

125
126
127
#define UTF8_CASEFOLD 1
#define UTF8_NOCASEFOLD 0

128
LDAP_LUNICODE_F(char *) UTF8normalize(
129
	struct berval *,
130
131
132
133
134
135
	char );

LDAP_LUNICODE_F(int) UTF8normcmp(
	const char *,
	const char *,
	char );
136

Kurt Zeilenga's avatar
Kurt Zeilenga committed
137
138
139
140
LDAP_END_DECL

#endif