schema_init.c 113 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2003 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

Kurt Zeilenga's avatar
Kurt Zeilenga committed
31
32
33
34
#ifdef SLAP_NVALUES
#define SLAP_MR_DN_FOLD (0) /* TO BE DELETED */
#endif

35
/* validatation routines */
36
#define berValidate						blobValidate
37

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/* (new) normalization routines */
#define caseExactNormalize							NULL
#define caseExactIA5Normalize						NULL
#define caseIgnoreNormalize							NULL
#define caseIgnoreIA5Normalize						NULL
#define distinguishedNameNormalize					NULL
#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define uniqueMemberNormalize						NULL
#define bitStringNormalize							NULL
#define telephoneNumberNormalize					NULL
53

54
/* approx matching rules */
55
56
57
58
#ifdef SLAP_NVALUES
#define directoryStringApproxMatchOID	NULL
#define IA5StringApproxMatchOID			NULL
#else
59
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
60
61
62
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
63
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
64
#define IA5StringApproxMatch			approxMatch
65
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
66
#define IA5StringApproxFilter			approxFilter
67
#endif
68

69
70
71
72
/* matching routines */
#define bitStringMatch					octetStringMatch
#define bitStringIndexer				octetStringIndexer
#define bitStringFilter					octetStringFilter
73

74
75
76
77
78
#define numericStringMatch				caseIgnoreIA5Match
#define numericStringIndexer			NULL
#define numericStringFilter				NULL
#define numericStringSubstringsIndexer	NULL
#define numericStringSubstringsFilter	NULL
79

80
81
82
83
84
85
86
87
88
#define objectIdentifierMatch			octetStringMatch
#define objectIdentifierIndexer			caseIgnoreIA5Indexer
#define objectIdentifierFilter			caseIgnoreIA5Filter

#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

#define uniqueMemberMatch				dnMatch
#define numericStringSubstringsMatch    NULL
89

90
91
92
93
94
95
96
97
98
99
100
101
#define caseExactIndexer				caseExactIgnoreIndexer
#define caseExactFilter					caseExactIgnoreFilter
#define caseExactOrderingMatch			caseExactMatch
#define caseExactSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseExactSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseExactSubstringsFilter		caseExactIgnoreSubstringsFilter
#define caseIgnoreIndexer				caseExactIgnoreIndexer
#define caseIgnoreFilter				caseExactIgnoreFilter
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseIgnoreSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseIgnoreSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseIgnoreSubstringsFilter		caseExactIgnoreSubstringsFilter
102

103
104
105
106
107
108
109
110
111
112
113
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		integerMatch

#define distinguishedNameMatch			dnMatch
#define distinguishedNameIndexer		caseExactIgnoreIndexer
#define distinguishedNameFilter			caseExactIgnoreFilter

#define telephoneNumberMatch			caseIgnoreIA5Match
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
#define telephoneNumberIndexer				caseIgnoreIA5Indexer
#define telephoneNumberFilter				caseIgnoreIA5Filter
114
115
116
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

117

118
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
119
{
120
	ber_len_t i;
121
122
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
123
124

	if( c == 0 ) return NULL;
125
126
127
128
129
130
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
131
	}
132
133

	return NULL;
134
}
135

136
137
138
static int
octetStringMatch(
	int *matchp,
139
	slap_mask_t flags,
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

157
158
159
160
161
162
163
164
165
166
167
static int
octetStringOrderingMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	ber_len_t v_len  = value->bv_len;
	ber_len_t av_len = ((struct berval *) assertedValue)->bv_len;
168

169
170
171
	int match = memcmp( value->bv_val,
		((struct berval *) assertedValue)->bv_val,
		(v_len < av_len ? v_len : av_len) );
172
173
174

	if( match == 0 ) match = v_len - av_len;

175
176
177
178
	*matchp = match;
	return LDAP_SUCCESS;
}

179
/* Index generation function */
180
int octetStringIndexer(
181
182
	slap_mask_t use,
	slap_mask_t flags,
183
184
185
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
186
187
	BerVarray values,
	BerVarray *keysp )
188
189
190
{
	int i;
	size_t slen, mlen;
191
	BerVarray keys;
192
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
193
	unsigned char	HASHdigest[HASH_BYTES];
194
	struct berval digest;
195
196
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
197

198
	for( i=0; values[i].bv_val != NULL; i++ ) {
199
200
201
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
202
203
204
	/* we should have at least one value at this point */
	assert( i > 0 );

205
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
206

207
208
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
209

210
	for( i=0; values[i].bv_val != NULL; i++ ) {
211
		HASH_Init( &HASHcontext );
212
		if( prefix != NULL && prefix->bv_len > 0 ) {
213
			HASH_Update( &HASHcontext,
214
215
				prefix->bv_val, prefix->bv_len );
		}
216
		HASH_Update( &HASHcontext,
217
			syntax->ssyn_oid, slen );
218
		HASH_Update( &HASHcontext,
219
			mr->smr_oid, mlen );
220
		HASH_Update( &HASHcontext,
221
			values[i].bv_val, values[i].bv_len );
222
		HASH_Final( HASHdigest, &HASHcontext );
223

224
		ber_dupbv( &keys[i], &digest );
225
226
	}

227
	keys[i].bv_val = NULL;
228
	keys[i].bv_len = 0;
229
230
231
232
233
234
235

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
236
int octetStringFilter(
237
238
	slap_mask_t use,
	slap_mask_t flags,
239
240
241
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
242
	void * assertedValue,
243
	BerVarray *keysp )
244
245
{
	size_t slen, mlen;
246
	BerVarray keys;
247
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
248
	unsigned char	HASHdigest[HASH_BYTES];
249
	struct berval *value = (struct berval *) assertedValue;
250
	struct berval digest;
251
252
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
253

254
255
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
256

257
	keys = ch_malloc( sizeof( struct berval ) * 2 );
258

259
	HASH_Init( &HASHcontext );
260
	if( prefix != NULL && prefix->bv_len > 0 ) {
261
		HASH_Update( &HASHcontext,
262
263
			prefix->bv_val, prefix->bv_len );
	}
264
	HASH_Update( &HASHcontext,
265
		syntax->ssyn_oid, slen );
266
	HASH_Update( &HASHcontext,
267
		mr->smr_oid, mlen );
268
	HASH_Update( &HASHcontext,
269
		value->bv_val, value->bv_len );
270
	HASH_Final( HASHdigest, &HASHcontext );
271

272
273
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
274
	keys[1].bv_len = 0;
275
276
277
278
279

	*keysp = keys;

	return LDAP_SUCCESS;
}
280

281
282
283
284
285
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
286
287
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
288
289
}

290
static int
291
blobValidate(
292
293
294
295
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
296
	return LDAP_SUCCESS;
297
298
}

299
300
301
302
303
304
305
306
307
308
309
310
311
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
312

313
314
315
316
317
318
319
320
321
322
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
323
324
325
326
	{
		return LDAP_INVALID_SYNTAX;
	}

327
	for( i=in->bv_len-3; i>0; i-- ) {
328
329
330
331
332
333
334
335
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
377
xnameUIDNormalize(
378
379
380
381
382
383
384
385
386
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
387
		struct berval uid = { 0, NULL };
388
389
390
391
392

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
393
			uid.bv_val = strrchr( out.bv_val, '#' );
394

395
			if( uid.bv_val == NULL ) {
396
397
398
399
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

400
401
			uid.bv_len = out.bv_len - (uid.bv_val - out.bv_val);
			out.bv_len -= uid.bv_len--;
402
403

			/* temporarily trim the UID */
404
			*(uid.bv_val++) = '\0';
405
406
407
408
409
410
411
412
413
		}

		rc = dnNormalize2( NULL, &out, normalized );

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

414
		if( uid.bv_len ) {
415
			normalized->bv_val = ch_realloc( normalized->bv_val,
416
				normalized->bv_len + uid.bv_len + sizeof("#") );
417
418
419
420
421
422

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
423
424
				uid.bv_val, uid.bv_len );
			normalized->bv_len += uid.bv_len;
425
426
427
428
429
430
431
432
433
434
435

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
467
	slap_mask_t flags,
468
469
470
471
472
473
474
475
476
477
478
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
/*-------------------------------------------------------------------
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Summary:
  StringSyntax		X.500	LDAP	Matching
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

502
  In LDAPv3, a directory string is a UTF-8 encoded UCS string.
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which
  would be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces
  and could be empty.  However, in X.500, all attribute values of
  numeric string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::=
	    NumericString (SIZE(1..ub-international-isdn-number))

  Unforunately, some assertion values are don't carry the same
  constraint (but its unclear how such an assertion could ever
  be true). In LDAP, there is one syntax (numericString) not two
  (numericString with constraint, numericString without constraint).
  This should be treated as numericString with non-empty constraint.
  Note that while someone may have no ISDN number, there are no ISDN
  numbers which are zero length.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters
  and can be empty.  In X.500, semantics much like NumericString (see
  serialNumber for a like example) excepting uses insignificant space
  handling instead of ignore all spaces.  

IA5String
  Basically same as PrintableString.  There are no examples in X.500,
  but same logic applies.  So we require them to be non-empty as
  well.

-------------------------------------------------------------------*/

549
550
551
552
553
554
555
556
557
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

558
559
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

560
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
561
		/* get the length indicated by the first byte */
562
		len = LDAP_UTF8_CHARLEN2( u, len );
563

Kurt Zeilenga's avatar
Kurt Zeilenga committed
564
565
566
		/* very basic checks */
		switch( len ) {
			case 6:
567
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
568
569
570
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
571
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
572
573
574
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
575
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
576
577
578
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
579
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
580
581
582
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
583
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
584
585
586
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
587
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
588
589
590
591
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
592
593
594

		/* make sure len corresponds with the offset
			to the next character */
595
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
596
597
	}

598
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
599

600
	return LDAP_SUCCESS;
601
602
603
}

static int
604
xUTF8StringNormalize(
605
606
	Syntax *syntax,
	struct berval *val,
607
	struct berval *normalized )
608
{
609
	char *p, *q, *s, *e;
610
	int len = 0;
611

Kurt Zeilenga's avatar
Kurt Zeilenga committed
612
613
614
	/* validator should have refused an empty string */
	assert( val->bv_len );

615
	p = val->bv_val;
616

617
	/* Ignore initial whitespace */
618
	/* All space is ASCII. All ASCII is 1 byte */
619
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
620

621
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
622
623
624
625
626
627

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

628
629
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
630
631
632
633

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
634
	s = NULL;
635

636
	while ( p < e ) {
637
638
639
640
641
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
642

643
			/* Ignore the extra whitespace */
644
645
			while ( ASCII_SPACE( *p ) ) {
				p++;
646
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
647
		} else {
648
649
650
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
651
		}
652
653
	}

654
	assert( normalized->bv_val <= p );
655
	assert( q+len <= p );
656

657
	/* cannot start with a space */
658
	assert( !ASCII_SPACE( normalized->bv_val[0] ) );
659
660
661
662
663
664
665
666

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
667
		len = q - s;
668
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
669
	}
670

671
	/* cannot end with a space */
672
673
674
	assert( !ASCII_SPACE( *q ) );

	q += len;
675
676
677
678

	/* null terminate */
	*q = '\0';

679
	normalized->bv_len = q - normalized->bv_val;
680

681
	return LDAP_SUCCESS;
682
683
}

684
/* Returns Unicode canonically normalized copy of a substring assertion
685
 * Skipping attribute description */
686
static SubstringsAssertion *
687
UTF8SubstringsAssertionNormalize(
688
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
689
	unsigned casefold )
690
691
692
693
{
	SubstringsAssertion *nsa;
	int i;

Julius Enarusai's avatar
   
Julius Enarusai committed
694
	nsa = (SubstringsAssertion *)SLAP_CALLOC( 1, sizeof(SubstringsAssertion) );
695
696
697
698
	if( nsa == NULL ) {
		return NULL;
	}

699
	if( sa->sa_initial.bv_val != NULL ) {
700
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
701
		if( nsa->sa_initial.bv_val == NULL ) {
702
703
704
705
706
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
707
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
708
709
			/* empty */
		}
710
		nsa->sa_any = (struct berval *)
Julius Enarusai's avatar
   
Julius Enarusai committed
711
712
713
714
			SLAP_MALLOC( (i + 1) * sizeof(struct berval) );
		if( nsa->sa_any == NULL ) {
				goto err;
		}
715

716
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
717
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
718
				casefold );
719
			if( nsa->sa_any[i].bv_val == NULL ) {
720
721
722
				goto err;
			}
		}
723
		nsa->sa_any[i].bv_val = NULL;
724
725
	}

726
	if( sa->sa_final.bv_val != NULL ) {
727
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
728
		if( nsa->sa_final.bv_val == NULL ) {
729
730
731
732
733
734
735
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
736
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
737
	if ( nsa->sa_any ) ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
738
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
739
740
741
742
	ch_free( nsa );
	return NULL;
}

743
#ifndef SLAPD_APPROX_OLDSINGLESTRING
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
762
763
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
764
765
	int i, count, len, nextchunk=0, nextavail=0;

766
	/* Yes, this is necessary */
767
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
768
	if( nval == NULL ) {
769
770
771
772
773
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
774
775
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
776
	if( assertv == NULL ) {
777
		ber_bvfree( nval );
778
779
780
		*matchp = 1;
		return LDAP_SUCCESS;
	}
781
782

	/* Isolate how many words there are */
783
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
784
785
786
787
788
789
790
791
792
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
793
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
794
795
796
797
		words[i] = c;
		values[i] = phonetic(c);
	}

798
	/* Work through the asserted value's words, to see if at least some
799
800
	   of the words are there, in the same order. */
	len = 0;
801
802
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
803
804
805
806
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
807
#if defined(SLAPD_APPROX_INITIALS)
808
		else if( len == 1 ) {
809
810
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
811
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
812
					nextavail=i+1;
813
					break;
814
				}
815
816
		}
#endif
817
		else {
818
			/* Isolate the next word in the asserted value and phonetic it */
819
820
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
821
822
823
824
825
826
827
828

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
829
			ch_free( val );
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
851
	ber_bvfree( assertv );
852
853
854
855
856
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
857
	ber_bvfree( nval );
858
859
860
861

	return LDAP_SUCCESS;
}

862
static int 
863
864
865
866
867
868
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
869
870
	BerVarray values,
	BerVarray *keysp )
871
{
872
	char *c;
873
	int i,j, len, wordcount, keycount=0;
874
	struct berval *newkeys;
875
	BerVarray keys=NULL;
876

877
	for( j=0; values[j].bv_val != NULL; j++ ) {
878
		struct berval val = { 0, NULL };
879
		/* Yes, this is necessary */
880
881
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
882

883
		/* Isolate how many words there are. There will be a key for each */
884
		for( wordcount = 0, c = val.bv_val; *c; c++) {
885
886
887
888
889
890
891
892
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
893
894
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
895
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
896
897
898
899
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
900
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
901
902
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
903
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
904
905
906
907
			keycount++;
			i++;
		}

908
		ber_memfree( val.bv_val );
909
	}
910
	keys[keycount].bv_val = NULL;
911
912
913
914
915
	*keysp = keys;

	return LDAP_SUCCESS;
}

916
static int 
917
918
919
920
921
922
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
923
	void * assertedValue,
924
	BerVarray *keysp )
925
{
926
	char *c;
927
	int i, count, len;
928
	struct berval *val;
929
	BerVarray keys;
930

931
	/* Yes, this is necessary */
932
	val = UTF8bvnormalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
933
		NULL, LDAP_UTF8_APPROX );
934
	if( val == NULL || val->bv_val == NULL ) {
935
936
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
937
		*keysp = keys;
938
		ber_bvfree( val );
939
940
941
		return LDAP_SUCCESS;
	}

942
	/* Isolate how many words there are. There will be a key for each */
943
	for( count = 0,c = val->bv_val; *c; c++) {
944
945
946
947
948
949
950
951
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
952
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
953
954

	/* Get a phonetic copy of each word */
955
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
956
957
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
958
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
959
960
961
		i++;
	}

962
	ber_bvfree( val );
963

964
	keys[count].bv_val = NULL;
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
984
	char *s, *t;
985

986
	/* Yes, this is necessary */
987
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
988
989
990
991
992
993
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
994
	t = UTF8normalize( ((struct berval *)assertedValue),
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
1007
1008
1009
1010
1011
1012
1013
1014
1015

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

1016
static int 
1017
1018
1019
1020
1021
1022
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1023
1024
	BerVarray values,
	BerVarray *keysp )
1025
1026
{
	int i;
1027
	BerVarray *keys;
1028
	char *s;
1029

1030
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1031
		/* empty - just count them */
1032
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1033
1034

	/* we should have at least one value at this point */
1035
1036
	assert( i > 0 );

1037
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1038
1039

	/* Copy each value and run it through phonetic() */
1040
	for( i=0; values[i].bv_val != NULL; i++ ) {
1041
		/* Yes, this is necessary */
1042
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1043
1044

		/* strip 8-bit chars and run through phonetic() */
1045
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1046
		free( s );
1047
	}
1048
	keys[i].bv_val = NULL;
1049
1050
1051
1052
1053
1054

	*keysp = keys;
	return LDAP_SUCCESS;
}


1055
static int 
1056
1057
1058
1059
1060
1061
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1062
	void * assertedValue,
1063
	BerVarray *keysp )
1064
{
1065
	BerVarray keys;
1066
	char *s;
1067

1068
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1069

1070
	/* Yes, this is necessary */
1071
	s = UTF8normalize( ((struct berval *)assertedValue),
1072
1073
1074
1075
1076
1077
1078
1079
1080
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1081
1082
1083
1084
1085
1086
1087

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1088
static int
1089
caseExactMatch(
1090
	int *matchp,
1091
	slap_mask_t flags,
1092
1093
1094
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1095
	void *assertedValue )
1096
{
1097
1098
1099
	*