ldap_pvt_uc.h 4.17 KB
Newer Older
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1
2
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
3
 * Copyright 1998-2002 The OpenLDAP Foundation, Redwood City, California, USA
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
5
 * All rights reserved.
 *
6
7
8
9
10
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.  A copy of this license is available at
 * http://www.OpenLDAP.org/license.html or in file LICENSE in the
 * top-level directory of the distribution.
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
12
13
14
 */
/*
 * ldap_pvt_uc.h - Header for Unicode functions.
 * These are meant to be used by the OpenLDAP distribution only.
15
 * These should be named ldap_pvt_....()
Kurt Zeilenga's avatar
Kurt Zeilenga committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
 */

#ifndef _LDAP_PVT_UC_H
#define _LDAP_PVT_UC_H 1

#include <lber.h>				/* get ber_slen_t */

#ifdef _MSC_VER
#include "../libraries/liblunicode/ucdata/ucdata.h"
#else
#include "../libraries/liblunicode/ucdata.h"
#endif

LDAP_BEGIN_DECL

31
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
 * UTF-8 (in utf-8.c)
 */

typedef short ldap_ucs2_t;

/* UCDATA uses UCS-2 passed in an unsigned long */
typedef unsigned long ldap_unicode_t;
#define ldap_utf8_to_unicode( p ) ldap_utf8_to_ucs4((p))
#define ldap_unicode_to_utf8( c, buf ) ldap_ucs4_to_ucs4((c),(buf))

/* returns the number of bytes in the UTF-8 string */
LDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
/* returns the number of UTF-8 characters in the string */
LDAP_F (ber_len_t) ldap_utf8_chars( const char * );
/* returns the length (in bytes) of the UTF-8 character */
LDAP_F (int) ldap_utf8_offset( const char * );
/* returns the length (in bytes) indicated by the UTF-8 character */
LDAP_F (int) ldap_utf8_charlen( const char * );
/* copies a UTF-8 character and returning number of bytes copied */
LDAP_F (int) ldap_utf8_copy( char *, const char *);

/* returns pointer of next UTF-8 character in string */
LDAP_F (char*) ldap_utf8_next( const char * );
/* returns pointer of previous UTF-8 character in string */
LDAP_F (char*) ldap_utf8_prev( const char * );

/* primitive ctype routines -- not aware of non-ascii characters */
LDAP_F (int) ldap_utf8_isascii( const char * );
LDAP_F (int) ldap_utf8_isalpha( const char * );
LDAP_F (int) ldap_utf8_isalnum( const char * );
LDAP_F (int) ldap_utf8_isdigit( const char * );
LDAP_F (int) ldap_utf8_isxdigit( const char * );
LDAP_F (int) ldap_utf8_isspace( const char * );

/* span characters not in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
/* span characters in set, return bytes spanned */
LDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
/* return first occurance of character in string */
LDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
/* return first character of set in string */
LDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
/* reentrant tokenizer */
LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);

/* Optimizations */
78
79
80
LDAP_V (const char) ldap_utf8_lentab[128];

#define LDAP_UTF8_ISASCII(p) ( *(unsigned char *)(p) ^ 0x80 )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
81
#define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
82
	? 1 : ldap_utf8_lentab[*(unsigned char *)(p) ^ 0x80] )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
83
84
85
#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
	? 1 : ldap_utf8_offset((p)) )

Kurt Zeilenga's avatar
Kurt Zeilenga committed
86
#define LDAP_UTF8_COPY(d,s) ( LDAP_UTF8_ISASCII(s) \
Kurt Zeilenga's avatar
Kurt Zeilenga committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
	? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )

#define LDAP_UTF8_NEXT(p) (	LDAP_UTF8_ISASCII(p) \
	? (char *)(p)+1 : ldap_utf8_next((p)) )

#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))

/* For symmetry */
#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))
#define LDAP_UTF8_DECR(p) ((p)=LDAP_UTF8_PREV((p)))


/* these probably should be renamed */
LDAP_LUNICODE_F(int) ucstrncmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(int) ucstrncasecmp(
	const ldap_unicode_t *,
	const ldap_unicode_t *,
	ber_len_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrnchr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(ldap_unicode_t *) ucstrncasechr(
	const ldap_unicode_t *,
	ber_len_t,
	ldap_unicode_t );

LDAP_LUNICODE_F(void) ucstr2upper(
	ldap_unicode_t *,
	ber_len_t );

Kurt Zeilenga's avatar
Kurt Zeilenga committed
124
125
#define LDAP_UTF8_CASEFOLD		0x1U
#define LDAP_UTF8_NOCASEFOLD	0x0U
126

127
LDAP_LUNICODE_F(char *) UTF8normalize(
128
	struct berval *,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
129
	unsigned );
130
131
132
133

LDAP_LUNICODE_F(int) UTF8normcmp(
	const char *,
	const char *,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
134
	unsigned );
135

Kurt Zeilenga's avatar
Kurt Zeilenga committed
136
137
138
139
LDAP_END_DECL

#endif