schema_init.c 108 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/****
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

  In LDPAv3, a directory string is a UTF-8 encoded UCS string.

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which would
  be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces and
  could be empty.  However, in X.500, all attribute values of numeric
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::= NumericString (SIZE(1..ub-international-isdn-number))

  Unfornately, some assertion values are don't carry the same constraint
  (but its unclear how such an assertion could ever be true). In LDAP,
  there is one syntax (numericString) not two (numericString with constraint,
  numericString without constraint).  This should be treated as numericString
  with non-empty constraint.  Note that while someone may have no
  ISDN number, there are no ISDN numbers which are zero length.
47
48
49
50
51

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters and
52
53
54
  can be empty.  In X.500, semantics much like NumericString (see serialNumber
  for a like example) excepting uses insignificant space handling instead of
  ignore all spaces.  
55
56

IA5String
57
58
  Basically same as PrintableString.  There are no examples in X.500, but
  same logic applies.  So we require them to be non-empty as well.
59
60
61
62
63
64

****/




65
66
67
#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
68
#include <limits.h>
69
70

#include <ac/ctype.h>
71
#include <ac/errno.h>
72
73
74
75
76
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
77
#include "lber_pvt.h"
78

79
80
#include "ldap_utf8.h"

81
82
83
84
85
86
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
87

88
/* recycled validatation routines */
89
#define berValidate						blobValidate
90
91

/* unimplemented pretters */
92
#define integerPretty					NULL
93
94

/* recycled matching routines */
95
#define bitStringMatch					octetStringMatch
96
97
98
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
99
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
100
101
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
102
#define uniqueMemberMatch				dnMatch
103
#define integerFirstComponentMatch		integerMatch
104

105
106
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
107
108
109
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
110
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
111
#define IA5StringApproxMatch			approxMatch
112
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
113
#define IA5StringApproxFilter			approxFilter
114

115
/* ordering matching rules */
116
117
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
118
#define integerOrderingMatch			integerMatch
119

120
/* unimplemented matching routines */
121
122
123
124
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
125
#ifdef SLAPD_ACI_ENABLED
126
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
127
128
#endif
#ifdef SLAPD_AUTHPASSWD
129
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
130
#endif
131
132

/* recycled indexing/filtering routines */
133
134
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
135
136
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
137

138
139
140
141
142
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

143
144
145
146
147
148
149
150
151
152
153
154
155
156
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

157

158
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
159
{
160
	ber_len_t i;
161
162
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
163
164

	if( c == 0 ) return NULL;
165
166
167
168
169
170
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
171
	}
172
173

	return NULL;
174
}
175

176
177
178
static int
octetStringMatch(
	int *matchp,
179
	slap_mask_t flags,
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
198
static int octetStringIndexer(
199
200
	slap_mask_t use,
	slap_mask_t flags,
201
202
203
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
204
205
	BerVarray values,
	BerVarray *keysp )
206
207
208
{
	int i;
	size_t slen, mlen;
209
	BerVarray keys;
210
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
211
	unsigned char	HASHdigest[HASH_BYTES];
212
	struct berval digest;
213
214
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
215

216
	for( i=0; values[i].bv_val != NULL; i++ ) {
217
218
219
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
220
221
222
	/* we should have at least one value at this point */
	assert( i > 0 );

223
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
224

225
226
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
227

228
	for( i=0; values[i].bv_val != NULL; i++ ) {
229
		HASH_Init( &HASHcontext );
230
		if( prefix != NULL && prefix->bv_len > 0 ) {
231
			HASH_Update( &HASHcontext,
232
233
				prefix->bv_val, prefix->bv_len );
		}
234
		HASH_Update( &HASHcontext,
235
			syntax->ssyn_oid, slen );
236
		HASH_Update( &HASHcontext,
237
			mr->smr_oid, mlen );
238
		HASH_Update( &HASHcontext,
239
			values[i].bv_val, values[i].bv_len );
240
		HASH_Final( HASHdigest, &HASHcontext );
241

242
		ber_dupbv( &keys[i], &digest );
243
244
	}

245
	keys[i].bv_val = NULL;
246
247
248
249
250
251
252

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
253
static int octetStringFilter(
254
255
	slap_mask_t use,
	slap_mask_t flags,
256
257
258
259
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
260
	BerVarray *keysp )
261
262
{
	size_t slen, mlen;
263
	BerVarray keys;
264
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
265
	unsigned char	HASHdigest[HASH_BYTES];
266
267
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
268
269
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
270

271
272
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
273

274
	keys = ch_malloc( sizeof( struct berval ) * 2 );
275

276
	HASH_Init( &HASHcontext );
277
	if( prefix != NULL && prefix->bv_len > 0 ) {
278
		HASH_Update( &HASHcontext,
279
280
			prefix->bv_val, prefix->bv_len );
	}
281
	HASH_Update( &HASHcontext,
282
		syntax->ssyn_oid, slen );
283
	HASH_Update( &HASHcontext,
284
		mr->smr_oid, mlen );
285
	HASH_Update( &HASHcontext,
286
		value->bv_val, value->bv_len );
287
	HASH_Final( HASHdigest, &HASHcontext );
288

289
290
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
291
292
293
294
295

	*keysp = keys;

	return LDAP_SUCCESS;
}
296

297
298
299
300
301
302
303
304
305
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

306
static int
307
blobValidate(
308
309
310
311
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
312
	return LDAP_SUCCESS;
313
314
}

315
316
317
318
319
320
321
322
323
324
325
326
327
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
328

329
330
331
332
333
334
335
336
337
338
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
339
340
341
342
	{
		return LDAP_INVALID_SYNTAX;
	}

343
	for( i=in->bv_len-3; i>0; i-- ) {
344
345
346
347
348
349
350
351
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

352
353
354
355
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
356
	struct berval *normalized )
357
358
{
	/*
359
	 * A normalized bitString is has no extaneous (leading) zero bits.
360
361
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
362
	 */
363
364
365
366
367
368
369
370
371
372
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
373
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
374
375
376
		goto done;
	}

377
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
378

379
380
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
381
382

	for( ; *p != '\0'; p++ ) {
383
		normalized->bv_val[normalized->bv_len++] = *p;
384
385
	}

386
	normalized->bv_val[normalized->bv_len] = '\0';
387
388
389
390
391

done:
	return LDAP_SUCCESS;
}

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
		struct berval uidin = { 0, NULL };
		struct berval uidout = { 0, NULL };

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
			uidin.bv_val = strrchr( out.bv_val, '#' );

			if( uidin.bv_val == NULL ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

			uidin.bv_len = out.bv_len - (uidin.bv_val - out.bv_val);
			out.bv_len -= uidin.bv_len--;

			/* temporarily trim the UID */
			*(uidin.bv_val++) = '\0';

			rc = bitStringNormalize( syntax, &uidin, &uidout );

			if( rc != LDAP_SUCCESS ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}
		}

#ifdef USE_DN_NORMALIZE
		rc = dnNormalize2( NULL, &out, normalized );
#else
		rc = dnPretty2( NULL, &out, normalized );
#endif

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			free( uidout.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		if( uidout.bv_len ) {
			normalized->bv_val = ch_realloc( normalized->bv_val,
				normalized->bv_len + uidout.bv_len + sizeof("#") );

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
				uidout.bv_val, uidout.bv_len );
			normalized->bv_len += uidout.bv_len;

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
536
	slap_mask_t flags,
537
538
539
540
541
542
543
544
545
546
547
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

548
549
550
551
552
553
554
555
556
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

557
558
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

559
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
560
		/* get the length indicated by the first byte */
561
		len = LDAP_UTF8_CHARLEN2( u, len );
562

Kurt Zeilenga's avatar
Kurt Zeilenga committed
563
564
565
		/* very basic checks */
		switch( len ) {
			case 6:
566
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
567
568
569
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
570
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
571
572
573
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
574
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
575
576
577
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
578
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
579
580
581
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
582
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
583
584
585
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
586
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
587
588
589
590
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
591
592
593

		/* make sure len corresponds with the offset
			to the next character */
594
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
595
596
	}

597
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
598

599
	return LDAP_SUCCESS;
600
601
602
603
604
605
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
606
	struct berval *normalized )
607
{
608
	char *p, *q, *s, *e;
609
	int len = 0;
610

611
	p = val->bv_val;
612

613
	/* Ignore initial whitespace */
614
	/* All space is ASCII. All ASCII is 1 byte */
615
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
616

617
618
619
	normalized->bv_len = val->bv_len - (p - val->bv_val);
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
620
621
622
623

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
624
	s = NULL;
625

626
	while ( p < e ) {
627
628
629
630
631
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
632

633
			/* Ignore the extra whitespace */
634
635
			while ( ASCII_SPACE( *p ) ) {
				p++;
636
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
637
		} else {
638
639
640
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
641
		}
642
643
	}

644
	assert( normalized->bv_val <= p );
645
	assert( q+len <= p );
646

647
	/* cannot start with a space */
648
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
649
650
651
652
653
654
655
656

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
657
		len = q - s;
658
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
659
	}
660

661
	/* cannot end with a space */
662
663
664
	assert( !ASCII_SPACE( *q ) );

	q += len;
665
666
667
668

	/* null terminate */
	*q = '\0';

669
	normalized->bv_len = q - normalized->bv_val;
670

671
	return LDAP_SUCCESS;
672
673
}

674
/* Returns Unicode canonically normalized copy of a substring assertion
675
 * Skipping attribute description */
676
static SubstringsAssertion *
677
678
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
679
	unsigned casefold )
680
681
682
683
684
685
686
687
688
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

689
	if( sa->sa_initial.bv_val != NULL ) {
690
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
691
		if( nsa->sa_initial.bv_val == NULL ) {
692
693
694
695
696
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
697
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
698
699
			/* empty */
		}
700
701
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
702
703
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
704
			if( nsa->sa_any[i].bv_val == NULL ) {
705
706
707
				goto err;
			}
		}
708
		nsa->sa_any[i].bv_val = NULL;
709
710
	}

711
	if( sa->sa_final.bv_val != NULL ) {
712
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
713
		if( nsa->sa_final.bv_val == NULL ) {
714
715
716
717
718
719
720
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
721
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
722
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
723
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
724
725
726
727
	ch_free( nsa );
	return NULL;
}

728
#ifndef SLAPD_APPROX_OLDSINGLESTRING
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
747
748
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
749
750
	int i, count, len, nextchunk=0, nextavail=0;

751
	/* Yes, this is necessary */
752
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
753
	if( nval == NULL ) {
754
755
756
757
758
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
759
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue), NULL, LDAP_UTF8_APPROX );
760
	if( assertv == NULL ) {
761
		ber_bvfree( nval );
762
763
764
		*matchp = 1;
		return LDAP_SUCCESS;
	}
765
766

	/* Isolate how many words there are */
767
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
768
769
770
771
772
773
774
775
776
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
777
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
778
779
780
781
		words[i] = c;
		values[i] = phonetic(c);
	}

782
	/* Work through the asserted value's words, to see if at least some
783
784
	   of the words are there, in the same order. */
	len = 0;
785
786
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
787
788
789
790
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
791
#if defined(SLAPD_APPROX_INITIALS)
792
		else if( len == 1 ) {
793
794
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
795
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
796
					nextavail=i+1;
797
					break;
798
				}
799
800
		}
#endif
801
		else {
802
			/* Isolate the next word in the asserted value and phonetic it */
803
804
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
805
806
807
808
809
810
811
812

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
813
			ch_free( val );
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
835
	ber_bvfree( assertv );
836
837
838
839
840
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
841
	ber_bvfree( nval );
842
843
844
845

	return LDAP_SUCCESS;
}

846
static int 
847
848
849
850
851
852
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
853
854
	BerVarray values,
	BerVarray *keysp )
855
{
856
	char *c;
857
	int i,j, len, wordcount, keycount=0;
858
	struct berval *newkeys;
859
	BerVarray keys=NULL;
860

861
	for( j=0; values[j].bv_val != NULL; j++ ) {
862
		struct berval val = { 0, NULL };
863
		/* Yes, this is necessary */
864
865
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
866

867
		/* Isolate how many words there are. There will be a key for each */
868
		for( wordcount = 0, c = val.bv_val; *c; c++) {
869
870
871
872
873
874
875
876
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
877
878
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
879
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
880
881
882
883
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
884
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
885
886
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
887
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
888
889
890
891
			keycount++;
			i++;
		}

892
		ber_memfree( val.bv_val );
893
	}
894
	keys[keycount].bv_val = NULL;
895
896
897
898
899
	*keysp = keys;

	return LDAP_SUCCESS;
}

900
static int 
901
902
903
904
905
906
907
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
908
	BerVarray *keysp )
909
{
910
	char *c;
911
	int i, count, len;
912
	struct berval *val;
913
	BerVarray keys;
914

915
	/* Yes, this is necessary */
916
917
	val = UTF8bvnormalize( ((struct berval *)assertValue), NULL, LDAP_UTF8_APPROX );
	if( val == NULL || val->bv_val == NULL ) {
918
919
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
920
		*keysp = keys;
921
		ber_bvfree( val );
922
923
924
		return LDAP_SUCCESS;
	}

925
	/* Isolate how many words there are. There will be a key for each */
926
	for( count = 0,c = val->bv_val; *c; c++) {
927
928
929
930
931
932
933
934
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
935
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
936
937

	/* Get a phonetic copy of each word */
938
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
939
940
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
941
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
942
943
944
		i++;
	}

945
	ber_bvfree( val );
946

947
	keys[count].bv_val = NULL;
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
967
	char *s, *t;
968

969
	/* Yes, this is necessary */
970
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
971
972
973
974
975
976
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
977
	t = UTF8normalize( ((struct berval *)assertedValue),
978
979
980
981
982
983
984
985
986
987
988
989
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
990
991
992
993
994
995
996
997
998

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

999
static int 
1000
1001
1002
1003
1004
1005
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1006
1007
	BerVarray values,
	BerVarray *keysp )
1008
1009
{
	int i;
1010
	BerVarray *keys;
1011
	char *s;
1012

1013
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1014
		/* empty - just count them */
1015
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1016
1017

	/* we should have at least one value at this point */
1018
1019
	assert( i > 0 );

1020
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1021
1022

	/* Copy each value and run it through phonetic() */
1023
	for( i=0; values[i].bv_val != NULL; i++ ) {
1024
		/* Yes, this is necessary */
1025
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1026
1027

		/* strip 8-bit chars and run through phonetic() */
1028
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1029
		free( s );
1030
	}
1031
	keys[i].bv_val = NULL;
1032
1033
1034
1035
1036
1037

	*keysp = keys;
	return LDAP_SUCCESS;
}


1038
static int 
1039
1040
1041
1042
1043
1044
1045
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
1046
	BerVarray *keysp )
1047
{
1048
	BerVarray keys;
1049
	char *s;
1050

1051
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1052

1053
	/* Yes, this is necessary */
1054
	s = UTF8normalize( ((struct berval *)assertValue),
1055
1056
1057
1058
1059
1060
1061
1062
1063
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1064
1065
1066
1067
1068
1069
1070

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1071
static int
1072
caseExactMatch(
1073
	int *matchp,
1074
	slap_mask_t flags,
1075
1076
1077
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1078
	void *assertedValue )
1079
{
1080
1081
1082
	*matchp = UTF8bvnormcmp( value,
		(struct berval *) assertedValue,
		LDAP_UTF8_NOCASEFOLD );
1083
	return LDAP_SUCCESS;
1084
1085
}

1086
static int
1087
caseExactIgnoreSubstringsMatch(
1088
	int *matchp,
1089
	slap_mask_t flags,
1090
1091
1092
1093
1094
1095
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = 0;
Pierangelo Masarati's avatar
Pierangelo Masarati committed
1096
	SubstringsAssertion *sub = NULL;
1097
	struct berval left = { 0, NULL };
1098
1099
	int i;
	ber_len_t inlen=0;
1100
	char *nav = NULL;