schema_init.c 77.5 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2003 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

Kurt Zeilenga's avatar
Kurt Zeilenga committed
31
32
#define SLAP_NVALUES 1

33
34
35
36
37
38
39
40
41
42
43
44
45
#define SLAP_MR_ASSOCIATED(mr, with) \
	((mr) == (with) || (mr)->smr_associated == (with))

/* (new) normalization routines */
#define caseExactIA5Normalize						IA5StringNormalize
#define caseIgnoreIA5Normalize						IA5StringNormalize
#define caseExactNormalize							UTF8StringNormalize
#define caseIgnoreNormalize							UTF8StringNormalize

#define integerFirstComponentNormalize				NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL

46
#define distinguishedNameNormalize	dnNormalize
47
48
49
50
#define distinguishedNameMatch  	dnMatch
#define distinguishedNameIndexer	octetStringIndexer
#define distinguishedNameFilter		octetStringFilter

51
52
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		NULL
53
#define integerIndexer				octetStringIndexer
54
#define integerFilter				octetStringFilter
55
56
57
58

#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

59
#define uniqueMemberMatch			dnMatch /* FIXME! */
60
61
62
63
64

#define objectIdentifierMatch	octetStringMatch
#define objectIdentifierIndexer	octetStringIndexer
#define objectIdentifierFilter	octetStringFilter

65
66
#define OpenLDAPaciMatch						NULL

67
68
69
70
71
72
73
74
75
#define bitStringMatch			octetStringMatch
#define bitStringIndexer		octetStringIndexer
#define bitStringFilter			octetStringFilter

#define caseIgnoreMatch		octetStringMatch
#define caseIgnoreOrderingMatch		octetStringOrderingMatch
#define caseIgnoreIndexer	octetStringIndexer
#define caseIgnoreFilter	octetStringFilter

76
#define caseIgnoreSubstringsMatch		octetStringSubstringsMatch
77
78
#define caseIgnoreSubstringsIndexer		octetStringSubstringsIndexer
#define caseIgnoreSubstringsFilter		octetStringSubstringsFilter
79
80
81
82
83
84

#define caseExactMatch		octetStringMatch
#define caseExactOrderingMatch		octetStringOrderingMatch
#define caseExactIndexer	octetStringIndexer
#define caseExactFilter		octetStringFilter

85
#define caseExactSubstringsMatch		octetStringSubstringsMatch
86
87
#define caseExactSubstringsIndexer		octetStringSubstringsIndexer
#define caseExactSubstringsFilter		octetStringSubstringsFilter
88
89
90
91
92

#define caseExactIA5Match		octetStringMatch
#define caseExactIA5Indexer		octetStringIndexer
#define caseExactIA5Filter		octetStringFilter

93
#define caseExactIA5SubstringsMatch			octetStringSubstringsMatch
94
95
#define caseExactIA5SubstringsIndexer		octetStringSubstringsIndexer
#define caseExactIA5SubstringsFilter		octetStringSubstringsFilter
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

#define caseIgnoreIA5Match		octetStringMatch
#define caseIgnoreIA5Indexer	octetStringIndexer
#define caseIgnoreIA5Filter		octetStringFilter

#define caseIgnoreIA5SubstringsMatch		caseExactIA5SubstringsMatch
#define caseIgnoreIA5SubstringsIndexer		caseExactIA5SubstringsIndexer
#define caseIgnoreIA5SubstringsFilter		caseExactIA5SubstringsFilter

#define numericStringMatch		octetStringMatch
#define numericStringIndexer	octetStringIndexer
#define numericStringFilter		octetStringFilter

#define numericStringSubstringsMatch		caseExactIA5SubstringsMatch
#define numericStringSubstringsIndexer		caseExactIA5SubstringsIndexer
#define numericStringSubstringsFilter		caseExactIA5SubstringsFilter

#define telephoneNumberMatch		octetStringMatch
#define telephoneNumberIndexer		octetStringIndexer
#define telephoneNumberFilter		octetStringFilter

#define telephoneNumberSubstringsMatch		caseExactIA5SubstringsMatch
#define telephoneNumberSubstringsIndexer	caseExactIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseExactIA5SubstringsFilter

Kurt Zeilenga's avatar
Kurt Zeilenga committed
121
122
#define booleanIndexer					octetStringIndexer
#define booleanFilter					octetStringFilter
Kurt Zeilenga's avatar
Kurt Zeilenga committed
123

124
/* validatation routines */
125
#define berValidate						blobValidate
126

127
/* approx matching rules */
128
129
130
131
#ifdef SLAP_NVALUES
#define directoryStringApproxMatchOID	NULL
#define IA5StringApproxMatchOID			NULL
#else
132
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
133
134
135
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
136
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
137
#define IA5StringApproxMatch			approxMatch
138
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
139
#define IA5StringApproxFilter			approxFilter
140
#endif
141

142
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
143
{
144
	ber_len_t i;
145
146
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
147
148

	if( c == 0 ) return NULL;
149
150
151
152
153
154
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
155
	}
156
157

	return NULL;
158
}
159

160
161
162
static int
octetStringMatch(
	int *matchp,
163
	slap_mask_t flags,
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

181
182
183
184
185
186
187
188
189
190
191
static int
octetStringOrderingMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	ber_len_t v_len  = value->bv_len;
	ber_len_t av_len = ((struct berval *) assertedValue)->bv_len;
192

193
194
195
	int match = memcmp( value->bv_val,
		((struct berval *) assertedValue)->bv_val,
		(v_len < av_len ? v_len : av_len) );
196
197
198

	if( match == 0 ) match = v_len - av_len;

199
200
201
202
	*matchp = match;
	return LDAP_SUCCESS;
}

203
/* Index generation function */
204
int octetStringIndexer(
205
206
	slap_mask_t use,
	slap_mask_t flags,
207
208
209
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
210
211
	BerVarray values,
	BerVarray *keysp )
212
213
214
{
	int i;
	size_t slen, mlen;
215
	BerVarray keys;
216
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
217
	unsigned char	HASHdigest[HASH_BYTES];
218
	struct berval digest;
219
220
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
221

222
	for( i=0; values[i].bv_val != NULL; i++ ) {
223
224
225
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
226
227
228
	/* we should have at least one value at this point */
	assert( i > 0 );

229
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
230

231
232
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
233

234
	for( i=0; values[i].bv_val != NULL; i++ ) {
235
		HASH_Init( &HASHcontext );
236
		if( prefix != NULL && prefix->bv_len > 0 ) {
237
			HASH_Update( &HASHcontext,
238
239
				prefix->bv_val, prefix->bv_len );
		}
240
		HASH_Update( &HASHcontext,
241
			syntax->ssyn_oid, slen );
242
		HASH_Update( &HASHcontext,
243
			mr->smr_oid, mlen );
244
		HASH_Update( &HASHcontext,
245
			values[i].bv_val, values[i].bv_len );
246
		HASH_Final( HASHdigest, &HASHcontext );
247

248
		ber_dupbv( &keys[i], &digest );
249
250
	}

251
	keys[i].bv_val = NULL;
252
	keys[i].bv_len = 0;
253
254
255
256
257
258
259

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
260
int octetStringFilter(
261
262
	slap_mask_t use,
	slap_mask_t flags,
263
264
265
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
266
	void * assertedValue,
267
	BerVarray *keysp )
268
269
{
	size_t slen, mlen;
270
	BerVarray keys;
271
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
272
	unsigned char	HASHdigest[HASH_BYTES];
273
	struct berval *value = (struct berval *) assertedValue;
274
	struct berval digest;
275
276
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
277

278
279
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
280

281
	keys = ch_malloc( sizeof( struct berval ) * 2 );
282

283
	HASH_Init( &HASHcontext );
284
	if( prefix != NULL && prefix->bv_len > 0 ) {
285
		HASH_Update( &HASHcontext,
286
287
			prefix->bv_val, prefix->bv_len );
	}
288
	HASH_Update( &HASHcontext,
289
		syntax->ssyn_oid, slen );
290
	HASH_Update( &HASHcontext,
291
		mr->smr_oid, mlen );
292
	HASH_Update( &HASHcontext,
293
		value->bv_val, value->bv_len );
294
	HASH_Final( HASHdigest, &HASHcontext );
295

296
297
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
298
	keys[1].bv_len = 0;
299
300
301
302
303

	*keysp = keys;

	return LDAP_SUCCESS;
}
304

305
306
307
308
309
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
310
311
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
312
313
}

314
static int
315
blobValidate(
316
317
318
319
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
320
	return LDAP_SUCCESS;
321
322
}

323
324
325
326
327
328
329
330
331
332
333
334
335
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
336

337
338
339
340
341
342
343
344
345
346
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
347
348
349
350
	{
		return LDAP_INVALID_SYNTAX;
	}

351
	for( i=in->bv_len-3; i>0; i-- ) {
352
353
354
355
356
357
358
359
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

400
401
402
403
404
405
406
static int
uniqueMemberNormalize(
	slap_mask_t usage,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
407
408
409
410
411
412
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
413
		struct berval uid = { 0, NULL };
414
415
416
417
418

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
419
			uid.bv_val = strrchr( out.bv_val, '#' );
420

421
			if( uid.bv_val == NULL ) {
422
423
424
425
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

426
427
			uid.bv_len = out.bv_len - (uid.bv_val - out.bv_val);
			out.bv_len -= uid.bv_len--;
428
429

			/* temporarily trim the UID */
430
			*(uid.bv_val++) = '\0';
431
432
433
434
435
436
437
438
439
		}

		rc = dnNormalize2( NULL, &out, normalized );

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

440
		if( uid.bv_len ) {
441
			normalized->bv_val = ch_realloc( normalized->bv_val,
442
				normalized->bv_len + uid.bv_len + sizeof("#") );
443
444
445
446
447
448

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
449
450
				uid.bv_val, uid.bv_len );
			normalized->bv_len += uid.bv_len;
451
452
453
454
455
456
457
458
459
460
461

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
493
	slap_mask_t flags,
494
495
496
497
498
499
500
501
502
503
504
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

505
506
507
508
509
/*-------------------------------------------------------------------
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Summary:
510
  StringSyntax		X.500	LDAP	Matching/Comments
511
512
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
513
  PrintableString	subset	subset	i/e + ignore insignificant spaces
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

529
  In LDAPv3, a directory string is a UTF-8 encoded UCS string.
530
  A directory string cannot be zero length.
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which
  would be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces
  and could be empty.  However, in X.500, all attribute values of
  numeric string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::=
	    NumericString (SIZE(1..ub-international-isdn-number))

  Unforunately, some assertion values are don't carry the same
  constraint (but its unclear how such an assertion could ever
  be true). In LDAP, there is one syntax (numericString) not two
  (numericString with constraint, numericString without constraint).
  This should be treated as numericString with non-empty constraint.
  Note that while someone may have no ISDN number, there are no ISDN
  numbers which are zero length.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters
  and can be empty.  In X.500, semantics much like NumericString (see
  serialNumber for a like example) excepting uses insignificant space
  handling instead of ignore all spaces.  

IA5String
  Basically same as PrintableString.  There are no examples in X.500,
  but same logic applies.  So we require them to be non-empty as
  well.

-------------------------------------------------------------------*/

577
578
579
580
581
582
583
584
585
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

586
587
588
589
	if( in->bv_len == 0 && syntax == slap_schema.si_syn_directoryString ) {
		/* directory strings cannot be empty */
		return LDAP_INVALID_SYNTAX;
	}
590

591
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
592
		/* get the length indicated by the first byte */
593
		len = LDAP_UTF8_CHARLEN2( u, len );
594

Kurt Zeilenga's avatar
Kurt Zeilenga committed
595
596
597
		/* very basic checks */
		switch( len ) {
			case 6:
598
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
599
600
601
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
602
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
603
604
605
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
606
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
607
608
609
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
610
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
611
612
613
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
614
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
615
616
617
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
618
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
619
620
621
622
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
623
624
625

		/* make sure len corresponds with the offset
			to the next character */
626
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
627
628
	}

629
630
631
	if( count != 0 ) {
		return LDAP_INVALID_SYNTAX;
	}
632

633
	return LDAP_SUCCESS;
634
635
}

636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
static int
UTF8StringNormalize(
	slap_mask_t use,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
{
	struct berval tmp, nvalue;
	int flags;
	int i, wasspace;

	if( val->bv_val == NULL ) {
		/* assume we're dealing with a syntax (e.g., UTF8String)
		 * which allows empty strings
		 */
		normalized->bv_len = 0;
		normalized->bv_val = NULL;
		return LDAP_SUCCESS;
	}

	flags = SLAP_MR_ASSOCIATED(mr, slap_schema.si_mr_caseExactMatch )
658
		? LDAP_UTF8_NOCASEFOLD : LDAP_UTF8_CASEFOLD;
659
	flags |= ( ( use & SLAP_MR_EQUALITY_APPROX ) == SLAP_MR_EQUALITY_APPROX )
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
		? LDAP_UTF8_APPROX : 0;

	val = UTF8bvnormalize( val, &tmp, flags );
	if( val == NULL ) {
		return LDAP_OTHER;
	}
	
	/* collapse spaces (in place) */
	nvalue.bv_len = 0;
	nvalue.bv_val = tmp.bv_val;

	wasspace=1; /* trim leading spaces */
	for( i=0; i<tmp.bv_len; i++) {
		if ( ASCII_SPACE( tmp.bv_val[i] )) {
			if( wasspace++ == 0 ) {
				/* trim repeated spaces */
				nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
			}
		} else {
			wasspace = 0;
			nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
		}
	}

	if( nvalue.bv_len ) {
		if( wasspace ) {
			/* last character was a space, trim it */
			--nvalue.bv_len;
		}
		nvalue.bv_val[nvalue.bv_len] = '\0';

	} else {
		/* string of all spaces is treated as one space */
		nvalue.bv_val[0] = ' ';
		nvalue.bv_val[1] = '\0';
		nvalue.bv_len = 1;
	}

698
	*normalized = nvalue;
699
700
	return LDAP_SUCCESS;
}
701

702
#ifndef SLAP_NVALUES
703

704
#ifndef SLAPD_APPROX_OLDSINGLESTRING
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
722
723
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
724
725
	int i, count, len, nextchunk=0, nextavail=0;

726
	/* Yes, this is necessary */
727
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
728
	if( nval == NULL ) {
729
730
731
732
733
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
734
735
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
736
	if( assertv == NULL ) {
737
		ber_bvfree( nval );
738
739
740
		*matchp = 1;
		return LDAP_SUCCESS;
	}
741
742

	/* Isolate how many words there are */
743
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
744
745
746
747
748
749
750
751
752
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
753
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
754
755
756
757
		words[i] = c;
		values[i] = phonetic(c);
	}

758
	/* Work through the asserted value's words, to see if at least some
759
760
	   of the words are there, in the same order. */
	len = 0;
761
762
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
763
764
765
766
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
767
#if defined(SLAPD_APPROX_INITIALS)
768
		else if( len == 1 ) {
769
770
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
771
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
772
					nextavail=i+1;
773
					break;
774
				}
775
776
		}
#endif
777
		else {
778
			/* Isolate the next word in the asserted value and phonetic it */
779
780
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
781
782
783
784
785
786
787
788

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
789
			ch_free( val );
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
811
	ber_bvfree( assertv );
812
813
814
815
816
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
817
	ber_bvfree( nval );
818
819
820
821

	return LDAP_SUCCESS;
}

822
static int 
823
824
825
826
827
828
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
829
830
	BerVarray values,
	BerVarray *keysp )
831
{
832
	char *c;
833
	int i,j, len, wordcount, keycount=0;
834
	struct berval *newkeys;
835
	BerVarray keys=NULL;
836

837
	for( j=0; values[j].bv_val != NULL; j++ ) {
838
		struct berval val = { 0, NULL };
839
		/* Yes, this is necessary */
840
841
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
842

843
		/* Isolate how many words there are. There will be a key for each */
844
		for( wordcount = 0, c = val.bv_val; *c; c++) {
845
846
847
848
849
850
851
852
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
853
854
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
855
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
856
857
858
859
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
860
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
861
862
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
863
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
864
865
866
867
			keycount++;
			i++;
		}

868
		ber_memfree( val.bv_val );
869
	}
870
	keys[keycount].bv_val = NULL;
871
872
873
874
875
	*keysp = keys;

	return LDAP_SUCCESS;
}

876
static int 
877
878
879
880
881
882
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
883
	void * assertedValue,
884
	BerVarray *keysp )
885
{
886
	char *c;
887
	int i, count, len;
888
	struct berval *val;
889
	BerVarray keys;
890

891
	/* Yes, this is necessary */
892
	val = UTF8bvnormalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
893
		NULL, LDAP_UTF8_APPROX );
894
	if( val == NULL || val->bv_val == NULL ) {
895
896
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
897
		*keysp = keys;
898
		ber_bvfree( val );
899
900
901
		return LDAP_SUCCESS;
	}

902
	/* Isolate how many words there are. There will be a key for each */
903
	for( count = 0,c = val->bv_val; *c; c++) {
904
905
906
907
908
909
910
911
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
912
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
913
914

	/* Get a phonetic copy of each word */
915
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
916
917
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
918
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
919
920
921
		i++;
	}

922
	ber_bvfree( val );
923

924
	keys[count].bv_val = NULL;
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
	*keysp = keys;

	return LDAP_SUCCESS;
}

#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
943
	char *s, *t;
944

945
	/* Yes, this is necessary */
946
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
947
948
949
950
951
952
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
953
	t = UTF8normalize( ((struct berval *)assertedValue),
954
955
956
957
958
959
960
961
962
963
964
965
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
966
967
968
969
970
971
972
973
974

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

975
static int 
976
977
978
979
980
981
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
982
983
	BerVarray values,
	BerVarray *keysp )
984
985
{
	int i;
986
	BerVarray *keys;
987
	char *s;
988

989
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
990
		/* empty - just count them */
991
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
992
993

	/* we should have at least one value at this point */
994
995
	assert( i > 0 );

996
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
997
998

	/* Copy each value and run it through phonetic() */
999
	for( i=0; values[i].bv_val != NULL; i++ ) {
1000
		/* Yes, this is necessary */
1001
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1002
1003

		/* strip 8-bit chars and run through phonetic() */
1004
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1005
		free( s );
1006
	}
1007
	keys[i].bv_val = NULL;
1008
1009
1010
1011
1012

	*keysp = keys;
	return LDAP_SUCCESS;
}

1013
static int 
1014
1015
1016
1017
1018
1019
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1020
	void * assertedValue,
1021
	BerVarray *keysp )
1022
{
1023
	BerVarray keys;
1024
	char *s;
1025

1026
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1027

1028
	/* Yes, this is necessary */
1029
	s = UTF8normalize( ((struct berval *)assertedValue),
1030
1031
1032
1033
1034
1035
1036
1037
1038
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1039
1040
1041
1042
1043

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif
1044
#endif /* !SLAP_NVALUES */
1045

1046
/* Substrings Index generation function */
1047
static int
1048
1049
octetStringSubstringsIndexer (
	slap_mask_t use,
1050
	slap_mask_t flags,
1051
1052
	Syntax *syntax,
	MatchingRule *mr,
1053
1054
1055
	struct berval *prefix,
	BerVarray values,
	BerVarray *keysp )
1056
{
1057
1058
1059
	ber_len_t i, j, nkeys;
	size_t slen, mlen;
	BerVarray keys;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1060

1061
1062
1063
1064
1065
	HASH_CONTEXT   HASHcontext;
	unsigned char	HASHdigest[HASH_BYTES];
	struct berval digest;
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
1066

1067
	nkeys=0;
1068

1069
1070
1071
1072
	for( i=0; values[i].bv_val != NULL; i++ ) {
		/* count number of indices to generate */
		if( values[i].bv_len < SLAP_INDEX_SUBSTR_MINLEN ) {
			continue;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1073
1074
		}

1075
1076
1077
1078
1079
1080
1081
		if( flags & SLAP_INDEX_SUBSTR_INITIAL ) {
			if( values[i].bv_len >= SLAP_INDEX_SUBSTR_MAXLEN ) {
				nkeys += SLAP_INDEX_SUBSTR_MAXLEN -
					( SLAP_INDEX_SUBSTR_MINLEN - 1);
			} else {
				nkeys += values[i].bv_len - ( SLAP_INDEX_SUBSTR_MINLEN - 1 );
			}
1082
1083
		}

1084
1085
1086
1087
		if( flags & SLAP_INDEX_SUBSTR_ANY ) {
			if( values[i].bv_len >= SLAP_INDEX_SUBSTR_MAXLEN ) {
				nkeys += values[i].bv_len - ( SLAP_INDEX_SUBSTR_MAXLEN - 1 );
			}
1088
		}
1089

1090
1091
1092
1093
1094
1095
1096
		if( flags & SLAP_INDEX_SUBSTR_FINAL ) {
			if( values[i].bv_len >= SLAP_INDEX_SUBSTR_MAXLEN ) {
				nkeys += SLAP_INDEX_SUBSTR_MAXLEN -
					( SLAP_INDEX_SUBSTR_MINLEN - 1);
			} else {
				nkeys += values[i].bv_len - ( SLAP_INDEX_SUBSTR_MINLEN - 1 );
			}
1097
1098
1099
		}
	}

1100
1101
1102
1103
1104
	if( nkeys == 0 ) {
		/* no keys to generate */
		*keysp = NULL;
		return LDAP_SUCCESS;
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1105

1106
	keys = ch_malloc( sizeof( struct berval ) * (nkeys+1) );
1107

1108
1109
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
1110

1111
1112
1113
	nkeys=0;
	for( i=0; values[i].bv_val != NULL; i++ ) {
		ber_len_t j,max;
1114

1115
		if( values[i].bv_len < SLAP_INDEX_SUBSTR_MINLEN ) continue;
1116

1117
1118
1119
1120
1121
		if( ( flags & SLAP_INDEX_SUBSTR_ANY ) &&
			( values[i].bv_len >= SLAP_INDEX_SUBSTR_MAXLEN ) )
		{
			char pre = SLAP_INDEX_SUBSTR_PREFIX;
			max = values[i].bv_len - ( SLAP_INDEX_SUBSTR_MAXLEN - 1);
1122

1123
			for( j=0; j<max; j++ ) {
1124
				HASH_Init( &HASHcontext );
1125
				if( prefix != NULL && prefix->bv_len > 0 ) {
1126
					HASH_Update( &HASHcontext,
1127
1128
						prefix->bv_val, prefix->bv_len );
				}
1129

1130
				HASH_Update( &HASHcontext,
1131
					&pre, sizeof( pre ) );
1132
				HASH_Update( &HASHcontext,
1133
					syntax->ssyn_oid, slen );
1134
				HASH_Update( &HASHcontext,
1135
					mr->smr_oid, mlen );
1136
				HASH_Update( &HASHcontext,
1137
					&values[i].bv_val[j],
1138
					SLAP_INDEX_SUBSTR_MAXLEN );
1139
				HASH_Final( HASHdigest, &HASHcontext );
1140

1141
				ber_dupbv( &keys[nkeys++], &digest );
1142
1143
1144
			}
		}

1145
1146
		max = SLAP_INDEX_SUBSTR_MAXLEN < values[i].bv_len
			? SLAP_INDEX_SUBSTR_MAXLEN : values[i].bv_len;
1147

1148
		for( j=SLAP_INDEX_SUBSTR_MINLEN; j<=max; j++ ) {
1149
1150
			char pre;

1151
			if( flags & SLAP_INDEX_SUBSTR_INITIAL ) {
1152
				pre = SLAP_INDEX_SUBSTR_INITIAL_PREFIX;
1153
				HASH_Init( &HASHcontext );
1154
				if( prefix != NULL && prefix->bv_len > 0 ) {
1155
					HASH_Update( &HASHcontext,