schema_init.c 120 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2003 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

Kurt Zeilenga's avatar
Kurt Zeilenga committed
31
32
#define SLAP_NVALUES 1

Kurt Zeilenga's avatar
Kurt Zeilenga committed
33
#ifdef SLAP_NVALUES
34
35
36
/* TO BE DELETED */
#define SLAP_MR_DN_FOLD (0)

37
38
39
#define SLAP_MR_ASSOCIATED(mr, with) \
	((mr) == (with) || (mr)->smr_associated == (with))

40
41
42
43
44
45
46
#define xUTF8StringNormalize NULL
#define xIA5StringNormalize NULL
#define xtelephoneNumberNormalize NULL
#define xgeneralizedTimeNormalize NULL
#define xintegerNormalize NULL
#define xnumericStringNormalize NULL
#define xnameUIDNormalize NULL
47
#define xdnNormalize NULL
48

49
50
51
52
53
54
55
56
57
58
/* (new) normalization routines */
#define caseExactIA5Normalize						IA5StringNormalize
#define caseIgnoreIA5Normalize						IA5StringNormalize
#define caseExactNormalize							UTF8StringNormalize
#define caseIgnoreNormalize							UTF8StringNormalize

#define integerFirstComponentNormalize				NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL

59
#define distinguishedNameNormalize	dnNormalize
60
61
62
63
#define distinguishedNameMatch  	dnMatch
#define distinguishedNameIndexer	octetStringIndexer
#define distinguishedNameFilter		octetStringFilter

64
65
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		NULL
66
#define integerIndexer				octetStringIndexer
67
#define integerFilter				octetStringFilter
68
69
70
71

#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

72
#define uniqueMemberMatch			dnMatch /* FIXME! */
73
74
75
76
77

#define objectIdentifierMatch	octetStringMatch
#define objectIdentifierIndexer	octetStringIndexer
#define objectIdentifierFilter	octetStringFilter

78
79
#define OpenLDAPaciMatch						NULL

80
81
82
83
84
85
86
87
88
#define bitStringMatch			octetStringMatch
#define bitStringIndexer		octetStringIndexer
#define bitStringFilter			octetStringFilter

#define caseIgnoreMatch		octetStringMatch
#define caseIgnoreOrderingMatch		octetStringOrderingMatch
#define caseIgnoreIndexer	octetStringIndexer
#define caseIgnoreFilter	octetStringFilter

89
#define caseIgnoreSubstringsMatch		octetStringSubstringsMatch
90
91
#define caseIgnoreSubstringsIndexer		octetStringSubstringsIndexer
#define caseIgnoreSubstringsFilter		octetStringSubstringsFilter
92
93
94
95
96
97

#define caseExactMatch		octetStringMatch
#define caseExactOrderingMatch		octetStringOrderingMatch
#define caseExactIndexer	octetStringIndexer
#define caseExactFilter		octetStringFilter

98
#define caseExactSubstringsMatch		octetStringSubstringsMatch
99
100
#define caseExactSubstringsIndexer		octetStringSubstringsIndexer
#define caseExactSubstringsFilter		octetStringSubstringsFilter
101
102
103
104
105

#define caseExactIA5Match		octetStringMatch
#define caseExactIA5Indexer		octetStringIndexer
#define caseExactIA5Filter		octetStringFilter

106
#define caseExactIA5SubstringsMatch			octetStringSubstringsMatch
107
108
#define caseExactIA5SubstringsIndexer		octetStringSubstringsIndexer
#define caseExactIA5SubstringsFilter		octetStringSubstringsFilter
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

#define caseIgnoreIA5Match		octetStringMatch
#define caseIgnoreIA5Indexer	octetStringIndexer
#define caseIgnoreIA5Filter		octetStringFilter

#define caseIgnoreIA5SubstringsMatch		caseExactIA5SubstringsMatch
#define caseIgnoreIA5SubstringsIndexer		caseExactIA5SubstringsIndexer
#define caseIgnoreIA5SubstringsFilter		caseExactIA5SubstringsFilter

#define numericStringMatch		octetStringMatch
#define numericStringIndexer	octetStringIndexer
#define numericStringFilter		octetStringFilter

#define numericStringSubstringsMatch		caseExactIA5SubstringsMatch
#define numericStringSubstringsIndexer		caseExactIA5SubstringsIndexer
#define numericStringSubstringsFilter		caseExactIA5SubstringsFilter

#define telephoneNumberMatch		octetStringMatch
#define telephoneNumberIndexer		octetStringIndexer
#define telephoneNumberFilter		octetStringFilter

#define telephoneNumberSubstringsMatch		caseExactIA5SubstringsMatch
#define telephoneNumberSubstringsIndexer	caseExactIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseExactIA5SubstringsFilter

Kurt Zeilenga's avatar
Kurt Zeilenga committed
134
135
#define booleanIndexer					octetStringIndexer
#define booleanFilter					octetStringFilter
Kurt Zeilenga's avatar
Kurt Zeilenga committed
136
137
#endif

138
/* validatation routines */
139
#define berValidate						blobValidate
140

141
/* approx matching rules */
142
143
144
145
#ifdef SLAP_NVALUES
#define directoryStringApproxMatchOID	NULL
#define IA5StringApproxMatchOID			NULL
#else
146
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
147
148
149
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
150
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
151
#define IA5StringApproxMatch			approxMatch
152
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
153
#define IA5StringApproxFilter			approxFilter
154
#endif
155

156
157
#ifndef SLAP_NVALUES

158
159
#define xdnNormalize dnNormalize

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/* (new) normalization routines */
#define caseExactNormalize							NULL
#define caseExactIA5Normalize						NULL
#define caseIgnoreNormalize							NULL
#define caseIgnoreIA5Normalize						NULL
#define distinguishedNameNormalize					NULL
#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define uniqueMemberNormalize						NULL
#define telephoneNumberNormalize					NULL


176
177
178
179
/* matching routines */
#define bitStringMatch					octetStringMatch
#define bitStringIndexer				octetStringIndexer
#define bitStringFilter					octetStringFilter
180

181
182
183
184
185
#define numericStringMatch				caseIgnoreIA5Match
#define numericStringIndexer			NULL
#define numericStringFilter				NULL
#define numericStringSubstringsIndexer	NULL
#define numericStringSubstringsFilter	NULL
186

187
188
189
190
#define objectIdentifierMatch			octetStringMatch
#define objectIdentifierIndexer			caseIgnoreIA5Indexer
#define objectIdentifierFilter			caseIgnoreIA5Filter

191
192
#define octetStringSubstringsMatch		NULL
#define OpenLDAPaciMatch				NULL
193

194
195
196
197
198
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

#define uniqueMemberMatch				dnMatch
#define numericStringSubstringsMatch    NULL
199

200
201
202
203
204
205
206
207
208
209
210
211
#define caseExactIndexer				caseExactIgnoreIndexer
#define caseExactFilter					caseExactIgnoreFilter
#define caseExactOrderingMatch			caseExactMatch
#define caseExactSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseExactSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseExactSubstringsFilter		caseExactIgnoreSubstringsFilter
#define caseIgnoreIndexer				caseExactIgnoreIndexer
#define caseIgnoreFilter				caseExactIgnoreFilter
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseIgnoreSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseIgnoreSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseIgnoreSubstringsFilter		caseExactIgnoreSubstringsFilter
212

213
214
215
216
217
218
219
220
221
222
223
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		integerMatch

#define distinguishedNameMatch			dnMatch
#define distinguishedNameIndexer		caseExactIgnoreIndexer
#define distinguishedNameFilter			caseExactIgnoreFilter

#define telephoneNumberMatch			caseIgnoreIA5Match
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
#define telephoneNumberIndexer				caseIgnoreIA5Indexer
#define telephoneNumberFilter				caseIgnoreIA5Filter
224
225
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter
Kurt Zeilenga's avatar
Kurt Zeilenga committed
226
227
228

#define booleanIndexer					octetStringIndexer
#define booleanFilter					octetStringFilter
229
#endif
230

231

232
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
233
{
234
	ber_len_t i;
235
236
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
237
238

	if( c == 0 ) return NULL;
239
240
241
242
243
244
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
245
	}
246
247

	return NULL;
248
}
249

250
251
252
static int
octetStringMatch(
	int *matchp,
253
	slap_mask_t flags,
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

271
272
273
274
275
276
277
278
279
280
281
static int
octetStringOrderingMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	ber_len_t v_len  = value->bv_len;
	ber_len_t av_len = ((struct berval *) assertedValue)->bv_len;
282

283
284
285
	int match = memcmp( value->bv_val,
		((struct berval *) assertedValue)->bv_val,
		(v_len < av_len ? v_len : av_len) );
286
287
288

	if( match == 0 ) match = v_len - av_len;

289
290
291
292
	*matchp = match;
	return LDAP_SUCCESS;
}

293
/* Index generation function */
294
int octetStringIndexer(
295
296
	slap_mask_t use,
	slap_mask_t flags,
297
298
299
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
300
301
	BerVarray values,
	BerVarray *keysp )
302
303
304
{
	int i;
	size_t slen, mlen;
305
	BerVarray keys;
306
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
307
	unsigned char	HASHdigest[HASH_BYTES];
308
	struct berval digest;
309
310
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
311

312
	for( i=0; values[i].bv_val != NULL; i++ ) {
313
314
315
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
316
317
318
	/* we should have at least one value at this point */
	assert( i > 0 );

319
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
320

321
322
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
323

324
	for( i=0; values[i].bv_val != NULL; i++ ) {
325
		HASH_Init( &HASHcontext );
326
		if( prefix != NULL && prefix->bv_len > 0 ) {
327
			HASH_Update( &HASHcontext,
328
329
				prefix->bv_val, prefix->bv_len );
		}
330
		HASH_Update( &HASHcontext,
331
			syntax->ssyn_oid, slen );
332
		HASH_Update( &HASHcontext,
333
			mr->smr_oid, mlen );
334
		HASH_Update( &HASHcontext,
335
			values[i].bv_val, values[i].bv_len );
336
		HASH_Final( HASHdigest, &HASHcontext );
337

338
		ber_dupbv( &keys[i], &digest );
339
340
	}

341
	keys[i].bv_val = NULL;
342
	keys[i].bv_len = 0;
343
344
345
346
347
348
349

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
350
int octetStringFilter(
351
352
	slap_mask_t use,
	slap_mask_t flags,
353
354
355
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
356
	void * assertedValue,
357
	BerVarray *keysp )
358
359
{
	size_t slen, mlen;
360
	BerVarray keys;
361
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
362
	unsigned char	HASHdigest[HASH_BYTES];
363
	struct berval *value = (struct berval *) assertedValue;
364
	struct berval digest;
365
366
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
367

368
369
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
370

371
	keys = ch_malloc( sizeof( struct berval ) * 2 );
372

373
	HASH_Init( &HASHcontext );
374
	if( prefix != NULL && prefix->bv_len > 0 ) {
375
		HASH_Update( &HASHcontext,
376
377
			prefix->bv_val, prefix->bv_len );
	}
378
	HASH_Update( &HASHcontext,
379
		syntax->ssyn_oid, slen );
380
	HASH_Update( &HASHcontext,
381
		mr->smr_oid, mlen );
382
	HASH_Update( &HASHcontext,
383
		value->bv_val, value->bv_len );
384
	HASH_Final( HASHdigest, &HASHcontext );
385

386
387
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
388
	keys[1].bv_len = 0;
389
390
391
392
393

	*keysp = keys;

	return LDAP_SUCCESS;
}
394

395
396
397
398
399
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
400
401
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
402
403
}

404
static int
405
blobValidate(
406
407
408
409
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
410
	return LDAP_SUCCESS;
411
412
}

413
414
415
416
417
418
419
420
421
422
423
424
425
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
426

427
428
429
430
431
432
433
434
435
436
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
437
438
439
440
	{
		return LDAP_INVALID_SYNTAX;
	}

441
	for( i=in->bv_len-3; i>0; i-- ) {
442
443
444
445
446
447
448
449
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

490
491
492
493
494
495
496
497
498
#ifdef SLAP_NVALUES
static int
uniqueMemberNormalize(
	slap_mask_t usage,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
#else
499
static int
500
xnameUIDNormalize(
501
502
503
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
504
#endif
505
506
507
508
509
510
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
511
		struct berval uid = { 0, NULL };
512
513
514
515
516

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
517
			uid.bv_val = strrchr( out.bv_val, '#' );
518

519
			if( uid.bv_val == NULL ) {
520
521
522
523
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

524
525
			uid.bv_len = out.bv_len - (uid.bv_val - out.bv_val);
			out.bv_len -= uid.bv_len--;
526
527

			/* temporarily trim the UID */
528
			*(uid.bv_val++) = '\0';
529
530
531
532
533
534
535
536
537
		}

		rc = dnNormalize2( NULL, &out, normalized );

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

538
		if( uid.bv_len ) {
539
			normalized->bv_val = ch_realloc( normalized->bv_val,
540
				normalized->bv_len + uid.bv_len + sizeof("#") );
541
542
543
544
545
546

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
547
548
				uid.bv_val, uid.bv_len );
			normalized->bv_len += uid.bv_len;
549
550
551
552
553
554
555
556
557
558
559

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
591
	slap_mask_t flags,
592
593
594
595
596
597
598
599
600
601
602
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

603
604
605
606
607
/*-------------------------------------------------------------------
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Summary:
608
  StringSyntax		X.500	LDAP	Matching/Comments
609
610
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
611
  PrintableString	subset	subset	i/e + ignore insignificant spaces
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

627
  In LDAPv3, a directory string is a UTF-8 encoded UCS string.
628
  A directory string cannot be zero length.
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which
  would be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces
  and could be empty.  However, in X.500, all attribute values of
  numeric string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::=
	    NumericString (SIZE(1..ub-international-isdn-number))

  Unforunately, some assertion values are don't carry the same
  constraint (but its unclear how such an assertion could ever
  be true). In LDAP, there is one syntax (numericString) not two
  (numericString with constraint, numericString without constraint).
  This should be treated as numericString with non-empty constraint.
  Note that while someone may have no ISDN number, there are no ISDN
  numbers which are zero length.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters
  and can be empty.  In X.500, semantics much like NumericString (see
  serialNumber for a like example) excepting uses insignificant space
  handling instead of ignore all spaces.  

IA5String
  Basically same as PrintableString.  There are no examples in X.500,
  but same logic applies.  So we require them to be non-empty as
  well.

-------------------------------------------------------------------*/

675
676
677
678
679
680
681
682
683
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

684
685
686
687
	if( in->bv_len == 0 && syntax == slap_schema.si_syn_directoryString ) {
		/* directory strings cannot be empty */
		return LDAP_INVALID_SYNTAX;
	}
688

689
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
690
		/* get the length indicated by the first byte */
691
		len = LDAP_UTF8_CHARLEN2( u, len );
692

Kurt Zeilenga's avatar
Kurt Zeilenga committed
693
694
695
		/* very basic checks */
		switch( len ) {
			case 6:
696
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
697
698
699
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
700
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
701
702
703
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
704
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
705
706
707
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
708
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
709
710
711
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
712
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
713
714
715
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
716
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
717
718
719
720
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
721
722
723

		/* make sure len corresponds with the offset
			to the next character */
724
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
725
726
	}

727
728
729
	if( count != 0 ) {
		return LDAP_INVALID_SYNTAX;
	}
730

731
	return LDAP_SUCCESS;
732
733
}

734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
#ifdef SLAP_NVALUES
static int
UTF8StringNormalize(
	slap_mask_t use,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
{
	struct berval tmp, nvalue;
	int flags;
	int i, wasspace;

	if( val->bv_val == NULL ) {
		/* assume we're dealing with a syntax (e.g., UTF8String)
		 * which allows empty strings
		 */
		normalized->bv_len = 0;
		normalized->bv_val = NULL;
		return LDAP_SUCCESS;
	}

	flags = SLAP_MR_ASSOCIATED(mr, slap_schema.si_mr_caseExactMatch )
757
		? LDAP_UTF8_NOCASEFOLD : LDAP_UTF8_CASEFOLD;
758
	flags |= ( ( use & SLAP_MR_EQUALITY_APPROX ) == SLAP_MR_EQUALITY_APPROX )
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
		? LDAP_UTF8_APPROX : 0;

	val = UTF8bvnormalize( val, &tmp, flags );
	if( val == NULL ) {
		return LDAP_OTHER;
	}
	
	/* collapse spaces (in place) */
	nvalue.bv_len = 0;
	nvalue.bv_val = tmp.bv_val;

	wasspace=1; /* trim leading spaces */
	for( i=0; i<tmp.bv_len; i++) {
		if ( ASCII_SPACE( tmp.bv_val[i] )) {
			if( wasspace++ == 0 ) {
				/* trim repeated spaces */
				nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
			}
		} else {
			wasspace = 0;
			nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
		}
	}

	if( nvalue.bv_len ) {
		if( wasspace ) {
			/* last character was a space, trim it */
			--nvalue.bv_len;
		}
		nvalue.bv_val[nvalue.bv_len] = '\0';

	} else {
		/* string of all spaces is treated as one space */
		nvalue.bv_val[0] = ' ';
		nvalue.bv_val[1] = '\0';
		nvalue.bv_len = 1;
	}

797
	*normalized = nvalue;
798
799
800
	return LDAP_SUCCESS;
}
#else
801

802
static int
803
xUTF8StringNormalize(
804
805
	Syntax *syntax,
	struct berval *val,
806
	struct berval *normalized )
807
{
808
	char *p, *q, *s, *e;
809
	int len = 0;
810

Kurt Zeilenga's avatar
Kurt Zeilenga committed
811
812
813
	/* validator should have refused an empty string */
	assert( val->bv_len );

814
	p = val->bv_val;
815

816
	/* Ignore initial whitespace */
817
	/* All space is ASCII. All ASCII is 1 byte */
818
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
819

820
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
821
822
823
824
825
826

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

827
828
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
829
830
831
832

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
833
	s = NULL;
834

835
	while ( p < e ) {
836
837
838
839
840
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
841

842
			/* Ignore the extra whitespace */
843
844
			while ( ASCII_SPACE( *p ) ) {
				p++;
845
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
846
		} else {
847
848
849
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
850
		}
851
852
	}

853
	assert( normalized->bv_val <= p );
854
	assert( q+len <= p );
855

856
	/* cannot start with a space */
857
	assert( !ASCII_SPACE( normalized->bv_val[0] ) );
858
859
860
861
862
863
864
865

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
866
		len = q - s;
867
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
868
	}
869

870
	/* cannot end with a space */
871
872
873
	assert( !ASCII_SPACE( *q ) );

	q += len;
874
875
876
877

	/* null terminate */
	*q = '\0';

878
	normalized->bv_len = q - normalized->bv_val;
879

880
	return LDAP_SUCCESS;
881
882
}

883
/* Returns Unicode canonically normalized copy of a substring assertion
884
 * Skipping attribute description */
885
static SubstringsAssertion *
886
UTF8SubstringsAssertionNormalize(
887
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
888
	unsigned casefold )
889
890
891
892
{
	SubstringsAssertion *nsa;
	int i;

Julius Enarusai's avatar
   
Julius Enarusai committed
893
	nsa = (SubstringsAssertion *)SLAP_CALLOC( 1, sizeof(SubstringsAssertion) );
894
895
896
897
	if( nsa == NULL ) {
		return NULL;
	}

898
	if( sa->sa_initial.bv_val != NULL ) {
899
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
900
		if( nsa->sa_initial.bv_val == NULL ) {
901
902
903
904
905
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
906
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
907
908
			/* empty */
		}
909
		nsa->sa_any = (struct berval *)
Julius Enarusai's avatar
   
Julius Enarusai committed
910
911
912
913
			SLAP_MALLOC( (i + 1) * sizeof(struct berval) );
		if( nsa->sa_any == NULL ) {
				goto err;
		}
914

915
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
916
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
917
				casefold );
918
			if( nsa->sa_any[i].bv_val == NULL ) {
919
920
921
				goto err;
			}
		}
922
		nsa->sa_any[i].bv_val = NULL;
923
924
	}

925
	if( sa->sa_final.bv_val != NULL ) {
926
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
927
		if( nsa->sa_final.bv_val == NULL ) {
928
929
930
931
932
933
934
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
935
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
936
	if ( nsa->sa_any ) ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
937
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
938
939
940
941
	ch_free( nsa );
	return NULL;
}

942
#ifndef SLAPD_APPROX_OLDSINGLESTRING
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
961
962
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
963
964
	int i, count, len, nextchunk=0, nextavail=0;

965
	/* Yes, this is necessary */
966
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
967
	if( nval == NULL ) {
968
969
970
971
972
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
973
974
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
975
	if( assertv == NULL ) {
976
		ber_bvfree( nval );
977
978
979
		*matchp = 1;
		return LDAP_SUCCESS;
	}
980
981

	/* Isolate how many words there are */
982
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
983
984
985
986
987
988
989
990
991
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
992
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
993
994
995
996
		words[i] = c;
		values[i] = phonetic(c);
	}

997
	/* Work through the asserted value's words, to see if at least some
998
999
	   of the words are there, in the same order. */
	len = 0;
1000
1001
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
1002
1003
1004
1005
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
1006
#if defined(SLAPD_APPROX_INITIALS)
1007
		else if( len == 1 ) {
1008
1009
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
1010
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
1011
					nextavail=i+1;
1012
					break;
1013
				}
1014
1015
		}
#endif
1016
		else {
1017
			/* Isolate the next word in the asserted value and phonetic it */
1018
1019
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
1020
1021
1022
1023
1024
1025
1026
1027

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
1028
			ch_free( val );
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
1050
	ber_bvfree( assertv );
1051
1052
1053
1054
1055
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
1056
	ber_bvfree( nval );
1057
1058
1059
1060

	return LDAP_SUCCESS;
}

1061
static int 
1062
1063
1064
1065
1066
1067
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1068
1069
	BerVarray values,
	BerVarray *keysp )
1070
{
1071
	char *c;
1072
	int i,j, len, wordcount, keycount=0;
1073
	struct berval *newkeys;
1074
	BerVarray keys=NULL;
1075

1076
	for( j=0; values[j].bv_val != NULL; j++ ) {
1077
		struct berval val = { 0, NULL };
1078
		/* Yes, this is necessary */
1079
1080
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
1081

1082
		/* Isolate how many words there are. There will be a key for each */
1083
		for( wordcount = 0, c = val.bv_val; *c; c++) {
1084
1085
1086
1087
1088
1089
1090
1091
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
1092
1093
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1094
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
1095
1096
1097
1098
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
1099
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
1100
1101
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
1102
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
1103
1104
1105
1106
			keycount++;
			i++;
		}

1107
		ber_memfree( val.bv_val );
1108
	}
1109
	keys[keycount].bv_val = NULL;
1110
1111
1112
1113
1114
	*keysp = keys;

	return LDAP_SUCCESS;
}

1115
static int 
1116
1117
1118
1119
1120
1121
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1122
	void * assertedValue,
1123
	BerVarray *keysp )
1124
{
1125
	char *c;
1126
	int i, count, len;
1127
	struct berval *val;
1128
	BerVarray keys;
1129

1130
	/* Yes, this is necessary */
1131
	val = UTF8bvnormalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1132
		NULL, LDAP_UTF8_APPROX );
1133
	if( val == NULL || val->bv_val == NULL ) {
1134
1135
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
1136
		*keysp = keys;
1137
		ber_bvfree( val );
1138
1139
1140
		return LDAP_SUCCESS;
	}

1141
	/* Isolate how many words there are. There will be a key for each */
1142
	for( count = 0,c = val->bv_val; *c; c++) {