schema_init.c 119 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2003 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

Kurt Zeilenga's avatar
Kurt Zeilenga committed
31
#ifdef SLAP_NVALUES
32
33
34
/* TO BE DELETED */
#define SLAP_MR_DN_FOLD (0)

35
36
37
#define SLAP_MR_ASSOCIATED(mr, with) \
	((mr) == (with) || (mr)->smr_associated == (with))

38
39
40
41
42
43
44
#define xUTF8StringNormalize NULL
#define xIA5StringNormalize NULL
#define xtelephoneNumberNormalize NULL
#define xgeneralizedTimeNormalize NULL
#define xintegerNormalize NULL
#define xnumericStringNormalize NULL
#define xnameUIDNormalize NULL
45
#define xdnNormalize NULL
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/* (new) normalization routines */
#define caseExactIA5Normalize						IA5StringNormalize
#define caseIgnoreIA5Normalize						IA5StringNormalize
#define caseExactNormalize							UTF8StringNormalize
#define caseIgnoreNormalize							UTF8StringNormalize

#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define bitStringNormalize							NULL
#define telephoneNumberNormalize					NULL

62
#define distinguishedNameNormalize	dnNormalize
63
64
65
66
#define distinguishedNameMatch  	dnMatch
#define distinguishedNameIndexer	octetStringIndexer
#define distinguishedNameFilter		octetStringFilter

67
#define uniqueMemberMatch			dnMatch
68
69
70
71
72

#define objectIdentifierMatch	octetStringMatch
#define objectIdentifierIndexer	octetStringIndexer
#define objectIdentifierFilter	octetStringFilter

73
74
#define OpenLDAPaciMatch						NULL

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#define bitStringMatch			octetStringMatch
#define bitStringIndexer		octetStringIndexer
#define bitStringFilter			octetStringFilter

#define integerMatch NULL
#define integerOrderingMatch NULL
#define integerIndexer NULL
#define integerFilter NULL

#define generalizedTimeMatch	NULL
#define generalizedTimeOrderingMatch	NULL

#define caseIgnoreMatch		octetStringMatch
#define caseIgnoreOrderingMatch		octetStringOrderingMatch
#define caseIgnoreIndexer	octetStringIndexer
#define caseIgnoreFilter	octetStringFilter

92
#define caseIgnoreSubstringsMatch		octetStringSubstringsMatch
93
94
95
96
97
98
99
100
#define caseIgnoreSubstringsIndexer		NULL
#define caseIgnoreSubstringsFilter		NULL

#define caseExactMatch		octetStringMatch
#define caseExactOrderingMatch		octetStringOrderingMatch
#define caseExactIndexer	octetStringIndexer
#define caseExactFilter		octetStringFilter

101
#define caseExactSubstringsMatch		octetStringSubstringsMatch
102
103
104
105
106
107
108
#define caseExactSubstringsIndexer		NULL
#define caseExactSubstringsFilter		NULL

#define caseExactIA5Match		octetStringMatch
#define caseExactIA5Indexer		octetStringIndexer
#define caseExactIA5Filter		octetStringFilter

109
#define caseExactIA5SubstringsMatch			octetStringSubstringsMatch
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#define caseExactIA5SubstringsIndexer		NULL
#define caseExactIA5SubstringsFilter		NULL

#define caseIgnoreIA5Match		octetStringMatch
#define caseIgnoreIA5Indexer	octetStringIndexer
#define caseIgnoreIA5Filter		octetStringFilter

#define caseIgnoreIA5SubstringsMatch		caseExactIA5SubstringsMatch
#define caseIgnoreIA5SubstringsIndexer		caseExactIA5SubstringsIndexer
#define caseIgnoreIA5SubstringsFilter		caseExactIA5SubstringsFilter

#define numericStringMatch		octetStringMatch
#define numericStringIndexer	octetStringIndexer
#define numericStringFilter		octetStringFilter

#define numericStringSubstringsMatch		caseExactIA5SubstringsMatch
#define numericStringSubstringsIndexer		caseExactIA5SubstringsIndexer
#define numericStringSubstringsFilter		caseExactIA5SubstringsFilter

#define telephoneNumberMatch		octetStringMatch
#define telephoneNumberIndexer		octetStringIndexer
#define telephoneNumberFilter		octetStringFilter

#define telephoneNumberSubstringsMatch		caseExactIA5SubstringsMatch
#define telephoneNumberSubstringsIndexer	caseExactIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseExactIA5SubstringsFilter

Kurt Zeilenga's avatar
Kurt Zeilenga committed
137
138
#endif

139
/* validatation routines */
140
#define berValidate						blobValidate
141

142
/* approx matching rules */
143
144
145
146
#ifdef SLAP_NVALUES
#define directoryStringApproxMatchOID	NULL
#define IA5StringApproxMatchOID			NULL
#else
147
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
148
149
150
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
151
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
152
#define IA5StringApproxMatch			approxMatch
153
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
154
#define IA5StringApproxFilter			approxFilter
155
#endif
156

157
158
#ifndef SLAP_NVALUES

159
160
#define xdnNormalize dnNormalize

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/* (new) normalization routines */
#define caseExactNormalize							NULL
#define caseExactIA5Normalize						NULL
#define caseIgnoreNormalize							NULL
#define caseIgnoreIA5Normalize						NULL
#define distinguishedNameNormalize					NULL
#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define uniqueMemberNormalize						NULL
#define bitStringNormalize							NULL
#define telephoneNumberNormalize					NULL


178
179
180
181
/* matching routines */
#define bitStringMatch					octetStringMatch
#define bitStringIndexer				octetStringIndexer
#define bitStringFilter					octetStringFilter
182

183
184
185
186
187
#define numericStringMatch				caseIgnoreIA5Match
#define numericStringIndexer			NULL
#define numericStringFilter				NULL
#define numericStringSubstringsIndexer	NULL
#define numericStringSubstringsFilter	NULL
188

189
190
191
192
#define objectIdentifierMatch			octetStringMatch
#define objectIdentifierIndexer			caseIgnoreIA5Indexer
#define objectIdentifierFilter			caseIgnoreIA5Filter

193
194
#define octetStringSubstringsMatch		NULL
#define OpenLDAPaciMatch				NULL
195

196
197
198
199
200
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

#define uniqueMemberMatch				dnMatch
#define numericStringSubstringsMatch    NULL
201

202
203
204
205
206
207
208
209
210
211
212
213
#define caseExactIndexer				caseExactIgnoreIndexer
#define caseExactFilter					caseExactIgnoreFilter
#define caseExactOrderingMatch			caseExactMatch
#define caseExactSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseExactSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseExactSubstringsFilter		caseExactIgnoreSubstringsFilter
#define caseIgnoreIndexer				caseExactIgnoreIndexer
#define caseIgnoreFilter				caseExactIgnoreFilter
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseIgnoreSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseIgnoreSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseIgnoreSubstringsFilter		caseExactIgnoreSubstringsFilter
214

215
216
217
218
219
220
221
222
223
224
225
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		integerMatch

#define distinguishedNameMatch			dnMatch
#define distinguishedNameIndexer		caseExactIgnoreIndexer
#define distinguishedNameFilter			caseExactIgnoreFilter

#define telephoneNumberMatch			caseIgnoreIA5Match
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
#define telephoneNumberIndexer				caseIgnoreIA5Indexer
#define telephoneNumberFilter				caseIgnoreIA5Filter
226
227
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter
228
#endif
229

230

231
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
232
{
233
	ber_len_t i;
234
235
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
236
237

	if( c == 0 ) return NULL;
238
239
240
241
242
243
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
244
	}
245
246

	return NULL;
247
}
248

249
250
251
static int
octetStringMatch(
	int *matchp,
252
	slap_mask_t flags,
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

270
271
272
273
274
275
276
277
278
279
280
static int
octetStringOrderingMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	ber_len_t v_len  = value->bv_len;
	ber_len_t av_len = ((struct berval *) assertedValue)->bv_len;
281

282
283
284
	int match = memcmp( value->bv_val,
		((struct berval *) assertedValue)->bv_val,
		(v_len < av_len ? v_len : av_len) );
285
286
287

	if( match == 0 ) match = v_len - av_len;

288
289
290
291
	*matchp = match;
	return LDAP_SUCCESS;
}

292
/* Index generation function */
293
int octetStringIndexer(
294
295
	slap_mask_t use,
	slap_mask_t flags,
296
297
298
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
299
300
	BerVarray values,
	BerVarray *keysp )
301
302
303
{
	int i;
	size_t slen, mlen;
304
	BerVarray keys;
305
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
306
	unsigned char	HASHdigest[HASH_BYTES];
307
	struct berval digest;
308
309
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
310

311
	for( i=0; values[i].bv_val != NULL; i++ ) {
312
313
314
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
315
316
317
	/* we should have at least one value at this point */
	assert( i > 0 );

318
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
319

320
321
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
322

323
	for( i=0; values[i].bv_val != NULL; i++ ) {
324
		HASH_Init( &HASHcontext );
325
		if( prefix != NULL && prefix->bv_len > 0 ) {
326
			HASH_Update( &HASHcontext,
327
328
				prefix->bv_val, prefix->bv_len );
		}
329
		HASH_Update( &HASHcontext,
330
			syntax->ssyn_oid, slen );
331
		HASH_Update( &HASHcontext,
332
			mr->smr_oid, mlen );
333
		HASH_Update( &HASHcontext,
334
			values[i].bv_val, values[i].bv_len );
335
		HASH_Final( HASHdigest, &HASHcontext );
336

337
		ber_dupbv( &keys[i], &digest );
338
339
	}

340
	keys[i].bv_val = NULL;
341
	keys[i].bv_len = 0;
342
343
344
345
346
347
348

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
349
int octetStringFilter(
350
351
	slap_mask_t use,
	slap_mask_t flags,
352
353
354
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
355
	void * assertedValue,
356
	BerVarray *keysp )
357
358
{
	size_t slen, mlen;
359
	BerVarray keys;
360
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
361
	unsigned char	HASHdigest[HASH_BYTES];
362
	struct berval *value = (struct berval *) assertedValue;
363
	struct berval digest;
364
365
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
366

367
368
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
369

370
	keys = ch_malloc( sizeof( struct berval ) * 2 );
371

372
	HASH_Init( &HASHcontext );
373
	if( prefix != NULL && prefix->bv_len > 0 ) {
374
		HASH_Update( &HASHcontext,
375
376
			prefix->bv_val, prefix->bv_len );
	}
377
	HASH_Update( &HASHcontext,
378
		syntax->ssyn_oid, slen );
379
	HASH_Update( &HASHcontext,
380
		mr->smr_oid, mlen );
381
	HASH_Update( &HASHcontext,
382
		value->bv_val, value->bv_len );
383
	HASH_Final( HASHdigest, &HASHcontext );
384

385
386
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
387
	keys[1].bv_len = 0;
388
389
390
391
392

	*keysp = keys;

	return LDAP_SUCCESS;
}
393

394
395
396
397
398
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
399
400
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
401
402
}

403
static int
404
blobValidate(
405
406
407
408
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
409
	return LDAP_SUCCESS;
410
411
}

412
413
414
415
416
417
418
419
420
421
422
423
424
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
425

426
427
428
429
430
431
432
433
434
435
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
436
437
438
439
	{
		return LDAP_INVALID_SYNTAX;
	}

440
	for( i=in->bv_len-3; i>0; i-- ) {
441
442
443
444
445
446
447
448
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

489
490
491
492
493
494
495
496
497
#ifdef SLAP_NVALUES
static int
uniqueMemberNormalize(
	slap_mask_t usage,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
#else
498
static int
499
xnameUIDNormalize(
500
501
502
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
503
#endif
504
505
506
507
508
509
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
510
		struct berval uid = { 0, NULL };
511
512
513
514
515

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
516
			uid.bv_val = strrchr( out.bv_val, '#' );
517

518
			if( uid.bv_val == NULL ) {
519
520
521
522
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

523
524
			uid.bv_len = out.bv_len - (uid.bv_val - out.bv_val);
			out.bv_len -= uid.bv_len--;
525
526

			/* temporarily trim the UID */
527
			*(uid.bv_val++) = '\0';
528
529
530
531
532
533
534
535
536
		}

		rc = dnNormalize2( NULL, &out, normalized );

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

537
		if( uid.bv_len ) {
538
			normalized->bv_val = ch_realloc( normalized->bv_val,
539
				normalized->bv_len + uid.bv_len + sizeof("#") );
540
541
542
543
544
545

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
546
547
				uid.bv_val, uid.bv_len );
			normalized->bv_len += uid.bv_len;
548
549
550
551
552
553
554
555
556
557
558

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
590
	slap_mask_t flags,
591
592
593
594
595
596
597
598
599
600
601
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

602
603
604
605
606
/*-------------------------------------------------------------------
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Summary:
607
  StringSyntax		X.500	LDAP	Matching/Comments
608
609
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
610
  PrintableString	subset	subset	i/e + ignore insignificant spaces
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

626
  In LDAPv3, a directory string is a UTF-8 encoded UCS string.
627
  A directory string cannot be zero length.
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which
  would be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces
  and could be empty.  However, in X.500, all attribute values of
  numeric string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::=
	    NumericString (SIZE(1..ub-international-isdn-number))

  Unforunately, some assertion values are don't carry the same
  constraint (but its unclear how such an assertion could ever
  be true). In LDAP, there is one syntax (numericString) not two
  (numericString with constraint, numericString without constraint).
  This should be treated as numericString with non-empty constraint.
  Note that while someone may have no ISDN number, there are no ISDN
  numbers which are zero length.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters
  and can be empty.  In X.500, semantics much like NumericString (see
  serialNumber for a like example) excepting uses insignificant space
  handling instead of ignore all spaces.  

IA5String
  Basically same as PrintableString.  There are no examples in X.500,
  but same logic applies.  So we require them to be non-empty as
  well.

-------------------------------------------------------------------*/

674
675
676
677
678
679
680
681
682
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

683
684
685
686
	if( in->bv_len == 0 && syntax == slap_schema.si_syn_directoryString ) {
		/* directory strings cannot be empty */
		return LDAP_INVALID_SYNTAX;
	}
687

688
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
689
		/* get the length indicated by the first byte */
690
		len = LDAP_UTF8_CHARLEN2( u, len );
691

Kurt Zeilenga's avatar
Kurt Zeilenga committed
692
693
694
		/* very basic checks */
		switch( len ) {
			case 6:
695
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
696
697
698
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
699
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
700
701
702
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
703
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
704
705
706
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
707
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
708
709
710
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
711
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
712
713
714
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
715
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
716
717
718
719
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
720
721
722

		/* make sure len corresponds with the offset
			to the next character */
723
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
724
725
	}

726
727
728
	if( count != 0 ) {
		return LDAP_INVALID_SYNTAX;
	}
729

730
	return LDAP_SUCCESS;
731
732
}

733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
#ifdef SLAP_NVALUES
static int
UTF8StringNormalize(
	slap_mask_t use,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
{
	struct berval tmp, nvalue;
	int flags;
	int i, wasspace;

	if( val->bv_val == NULL ) {
		/* assume we're dealing with a syntax (e.g., UTF8String)
		 * which allows empty strings
		 */
		normalized->bv_len = 0;
		normalized->bv_val = NULL;
		return LDAP_SUCCESS;
	}

	flags = SLAP_MR_ASSOCIATED(mr, slap_schema.si_mr_caseExactMatch )
756
		? LDAP_UTF8_NOCASEFOLD : LDAP_UTF8_CASEFOLD;
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
	flags |= ( use & SLAP_MR_EQUALITY_APPROX == SLAP_MR_EQUALITY_APPROX )
		? LDAP_UTF8_APPROX : 0;

	val = UTF8bvnormalize( val, &tmp, flags );
	if( val == NULL ) {
		return LDAP_OTHER;
	}
	
	/* collapse spaces (in place) */
	nvalue.bv_len = 0;
	nvalue.bv_val = tmp.bv_val;

	wasspace=1; /* trim leading spaces */
	for( i=0; i<tmp.bv_len; i++) {
		if ( ASCII_SPACE( tmp.bv_val[i] )) {
			if( wasspace++ == 0 ) {
				/* trim repeated spaces */
				nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
			}
		} else {
			wasspace = 0;
			nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
		}
	}

	if( nvalue.bv_len ) {
		if( wasspace ) {
			/* last character was a space, trim it */
			--nvalue.bv_len;
		}
		nvalue.bv_val[nvalue.bv_len] = '\0';

	} else {
		/* string of all spaces is treated as one space */
		nvalue.bv_val[0] = ' ';
		nvalue.bv_val[1] = '\0';
		nvalue.bv_len = 1;
	}

796
	*normalized = nvalue;
797
798
799
	return LDAP_SUCCESS;
}
#else
800

801
static int
802
xUTF8StringNormalize(
803
804
	Syntax *syntax,
	struct berval *val,
805
	struct berval *normalized )
806
{
807
	char *p, *q, *s, *e;
808
	int len = 0;
809

Kurt Zeilenga's avatar
Kurt Zeilenga committed
810
811
812
	/* validator should have refused an empty string */
	assert( val->bv_len );

813
	p = val->bv_val;
814

815
	/* Ignore initial whitespace */
816
	/* All space is ASCII. All ASCII is 1 byte */
817
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
818

819
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
820
821
822
823
824
825

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

826
827
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
828
829
830
831

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
832
	s = NULL;
833

834
	while ( p < e ) {
835
836
837
838
839
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
840

841
			/* Ignore the extra whitespace */
842
843
			while ( ASCII_SPACE( *p ) ) {
				p++;
844
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
845
		} else {
846
847
848
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
849
		}
850
851
	}

852
	assert( normalized->bv_val <= p );
853
	assert( q+len <= p );
854

855
	/* cannot start with a space */
856
	assert( !ASCII_SPACE( normalized->bv_val[0] ) );
857
858
859
860
861
862
863
864

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
865
		len = q - s;
866
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
867
	}
868

869
	/* cannot end with a space */
870
871
872
	assert( !ASCII_SPACE( *q ) );

	q += len;
873
874
875
876

	/* null terminate */
	*q = '\0';

877
	normalized->bv_len = q - normalized->bv_val;
878

879
	return LDAP_SUCCESS;
880
881
}

882
/* Returns Unicode canonically normalized copy of a substring assertion
883
 * Skipping attribute description */
884
static SubstringsAssertion *
885
UTF8SubstringsAssertionNormalize(
886
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
887
	unsigned casefold )
888
889
890
891
{
	SubstringsAssertion *nsa;
	int i;

Julius Enarusai's avatar
   
Julius Enarusai committed
892
	nsa = (SubstringsAssertion *)SLAP_CALLOC( 1, sizeof(SubstringsAssertion) );
893
894
895
896
	if( nsa == NULL ) {
		return NULL;
	}

897
	if( sa->sa_initial.bv_val != NULL ) {
898
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
899
		if( nsa->sa_initial.bv_val == NULL ) {
900
901
902
903
904
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
905
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
906
907
			/* empty */
		}
908
		nsa->sa_any = (struct berval *)
Julius Enarusai's avatar
   
Julius Enarusai committed
909
910
911
912
			SLAP_MALLOC( (i + 1) * sizeof(struct berval) );
		if( nsa->sa_any == NULL ) {
				goto err;
		}
913

914
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
915
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
916
				casefold );
917
			if( nsa->sa_any[i].bv_val == NULL ) {
918
919
920
				goto err;
			}
		}
921
		nsa->sa_any[i].bv_val = NULL;
922
923
	}

924
	if( sa->sa_final.bv_val != NULL ) {
925
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
926
		if( nsa->sa_final.bv_val == NULL ) {
927
928
929
930
931
932
933
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
934
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
935
	if ( nsa->sa_any ) ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
936
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
937
938
939
940
	ch_free( nsa );
	return NULL;
}

941
#ifndef SLAPD_APPROX_OLDSINGLESTRING
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
960
961
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
962
963
	int i, count, len, nextchunk=0, nextavail=0;

964
	/* Yes, this is necessary */
965
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
966
	if( nval == NULL ) {
967
968
969
970
971
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
972
973
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
974
	if( assertv == NULL ) {
975
		ber_bvfree( nval );
976
977
978
		*matchp = 1;
		return LDAP_SUCCESS;
	}
979
980

	/* Isolate how many words there are */
981
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
982
983
984
985
986
987
988
989
990
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
991
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
992
993
994
995
		words[i] = c;
		values[i] = phonetic(c);
	}

996
	/* Work through the asserted value's words, to see if at least some
997
998
	   of the words are there, in the same order. */
	len = 0;
999
1000
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
1001
1002
1003
1004
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
1005
#if defined(SLAPD_APPROX_INITIALS)
1006
		else if( len == 1 ) {
1007
1008
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
1009
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
1010
					nextavail=i+1;
1011
					break;
1012
				}
1013
1014
		}
#endif
1015
		else {
1016
			/* Isolate the next word in the asserted value and phonetic it */
1017
1018
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
1019
1020
1021
1022
1023
1024
1025
1026

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
1027
			ch_free( val );
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
1049
	ber_bvfree( assertv );
1050
1051
1052
1053
1054
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
1055
	ber_bvfree( nval );
1056
1057
1058
1059

	return LDAP_SUCCESS;
}

1060
static int 
1061
1062
1063
1064
1065
1066
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1067
1068
	BerVarray values,
	BerVarray *keysp )
1069
{
1070
	char *c;
1071
	int i,j, len, wordcount, keycount=0;
1072
	struct berval *newkeys;
1073
	BerVarray keys=NULL;
1074

1075
	for( j=0; values[j].bv_val != NULL; j++ ) {
1076
		struct berval val = { 0, NULL };
1077
		/* Yes, this is necessary */
1078
1079
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
1080

1081
		/* Isolate how many words there are. There will be a key for each */
1082
		for( wordcount = 0, c = val.bv_val; *c; c++) {
1083
1084
1085
1086
1087
1088
1089
1090
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
1091
1092
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1093
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
1094
1095
1096
1097
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
1098
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
1099
1100
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
1101
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
1102
1103
1104
1105
			keycount++;
			i++;
		}

1106
		ber_memfree( val.bv_val );
1107
	}
1108
	keys[keycount].bv_val = NULL;
1109
1110
1111
1112
1113
	*keysp = keys;

	return LDAP_SUCCESS;
}

1114
static int 
1115
1116
1117
1118
1119
1120
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1121
	void * assertedValue,
1122
	BerVarray *keysp )
1123
{
1124
	char *c;
1125
	int i, count, len;
1126
	struct berval *val;
1127
	BerVarray keys;
1128

1129
	/* Yes, this is necessary */
1130
	val = UTF8bvnormalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1131
		NULL, LDAP_UTF8_APPROX );
1132
	if( val == NULL || val->bv_val == NULL ) {
1133
1134
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
1135
		*keysp = keys;
1136
		ber_bvfree( val );
1137
1138
1139
		return LDAP_SUCCESS;
	}

1140
	/* Isolate how many words there are. There will be a key for each */
1141
	for( count = 0,c = val->bv_val; *c; c++) {
1142
1143
1144
1145
1146
1147
1148
1149
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
1150
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
1151
1152

	/* Get a phonetic copy of each word */
1153
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
1154
1155
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
1156
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
1157
1158
1159
		i++;
	}

1160
	ber_bvfree( val );
1161

1162
	keys[count].bv_val = NULL;
1163
1164
1165
1166
1167
1168