schema_init.c 108 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/****
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

  In LDPAv3, a directory string is a UTF-8 encoded UCS string.

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which would
  be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces and
  could be empty.  However, in X.500, all attribute values of numeric
  string carry a non-empty constraint.  Unfornately, some assertion
  values are don't carry this constraint (but its unclear how such
  an assertion could ever be true).  In LDAP, there is one syntax
  (numericString) not two (numericString with constraint, numericString
  without constraint).  This should be treated as numericString with
  non-empty constraint.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters and
  can be empty.  In X.500, semantics much like NumericString excepting
  uses insignificant space handling instead of ingore all spaces.

IA5String
  Basically same as PrintableString.

****/




54
55
56
#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
57
#include <limits.h>
58
59

#include <ac/ctype.h>
60
#include <ac/errno.h>
61
62
63
64
65
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
66
#include "lber_pvt.h"
67

68
69
#include "ldap_utf8.h"

70
71
72
73
74
75
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
76

77
/* recycled validatation routines */
78
#define berValidate						blobValidate
79
80

/* unimplemented pretters */
81
#define integerPretty					NULL
82
83

/* recycled matching routines */
84
#define bitStringMatch					octetStringMatch
85
86
87
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
88
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
89
90
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
91
#define uniqueMemberMatch				dnMatch
92
#define integerFirstComponentMatch		integerMatch
93

94
95
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
96
97
98
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
99
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
100
#define IA5StringApproxMatch			approxMatch
101
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
102
#define IA5StringApproxFilter			approxFilter
103

104
/* ordering matching rules */
105
106
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
107
#define integerOrderingMatch			integerMatch
108

109
/* unimplemented matching routines */
110
111
112
113
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
114
#ifdef SLAPD_ACI_ENABLED
115
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
116
117
#endif
#ifdef SLAPD_AUTHPASSWD
118
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
119
#endif
120
121

/* recycled indexing/filtering routines */
122
123
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
124
125
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
126

127
128
129
130
131
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

132
133
134
135
136
137
138
139
140
141
142
143
144
145
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

146

147
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
148
{
149
	ber_len_t i;
150
151
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
152
153

	if( c == 0 ) return NULL;
154
155
156
157
158
159
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
160
	}
161
162

	return NULL;
163
}
164

165
166
167
static int
octetStringMatch(
	int *matchp,
168
	slap_mask_t flags,
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
187
static int octetStringIndexer(
188
189
	slap_mask_t use,
	slap_mask_t flags,
190
191
192
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
193
194
	BerVarray values,
	BerVarray *keysp )
195
196
197
{
	int i;
	size_t slen, mlen;
198
	BerVarray keys;
199
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
200
	unsigned char	HASHdigest[HASH_BYTES];
201
	struct berval digest;
202
203
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
204

205
	for( i=0; values[i].bv_val != NULL; i++ ) {
206
207
208
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
209
210
211
	/* we should have at least one value at this point */
	assert( i > 0 );

212
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
213

214
215
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
216

217
	for( i=0; values[i].bv_val != NULL; i++ ) {
218
		HASH_Init( &HASHcontext );
219
		if( prefix != NULL && prefix->bv_len > 0 ) {
220
			HASH_Update( &HASHcontext,
221
222
				prefix->bv_val, prefix->bv_len );
		}
223
		HASH_Update( &HASHcontext,
224
			syntax->ssyn_oid, slen );
225
		HASH_Update( &HASHcontext,
226
			mr->smr_oid, mlen );
227
		HASH_Update( &HASHcontext,
228
			values[i].bv_val, values[i].bv_len );
229
		HASH_Final( HASHdigest, &HASHcontext );
230

231
		ber_dupbv( &keys[i], &digest );
232
233
	}

234
	keys[i].bv_val = NULL;
235
236
237
238
239
240
241

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
242
static int octetStringFilter(
243
244
	slap_mask_t use,
	slap_mask_t flags,
245
246
247
248
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
249
	BerVarray *keysp )
250
251
{
	size_t slen, mlen;
252
	BerVarray keys;
253
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
254
	unsigned char	HASHdigest[HASH_BYTES];
255
256
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
257
258
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
259

260
261
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
262

263
	keys = ch_malloc( sizeof( struct berval ) * 2 );
264

265
	HASH_Init( &HASHcontext );
266
	if( prefix != NULL && prefix->bv_len > 0 ) {
267
		HASH_Update( &HASHcontext,
268
269
			prefix->bv_val, prefix->bv_len );
	}
270
	HASH_Update( &HASHcontext,
271
		syntax->ssyn_oid, slen );
272
	HASH_Update( &HASHcontext,
273
		mr->smr_oid, mlen );
274
	HASH_Update( &HASHcontext,
275
		value->bv_val, value->bv_len );
276
	HASH_Final( HASHdigest, &HASHcontext );
277

278
279
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
280
281
282
283
284

	*keysp = keys;

	return LDAP_SUCCESS;
}
285

286
287
288
289
290
291
292
293
294
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

295
static int
296
blobValidate(
297
298
299
300
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
301
	return LDAP_SUCCESS;
302
303
}

304
305
306
307
308
309
310
311
312
313
314
315
316
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
317

318
319
320
321
322
323
324
325
326
327
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
328
329
330
331
	{
		return LDAP_INVALID_SYNTAX;
	}

332
	for( i=in->bv_len-3; i>0; i-- ) {
333
334
335
336
337
338
339
340
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

341
342
343
344
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
345
	struct berval *normalized )
346
347
{
	/*
348
	 * A normalized bitString is has no extaneous (leading) zero bits.
349
350
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
351
	 */
352
353
354
355
356
357
358
359
360
361
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
362
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
363
364
365
		goto done;
	}

366
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
367

368
369
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
370
371

	for( ; *p != '\0'; p++ ) {
372
		normalized->bv_val[normalized->bv_len++] = *p;
373
374
	}

375
	normalized->bv_val[normalized->bv_len] = '\0';
376
377
378
379
380

done:
	return LDAP_SUCCESS;
}

381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
		struct berval uidin = { 0, NULL };
		struct berval uidout = { 0, NULL };

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
			uidin.bv_val = strrchr( out.bv_val, '#' );

			if( uidin.bv_val == NULL ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

			uidin.bv_len = out.bv_len - (uidin.bv_val - out.bv_val);
			out.bv_len -= uidin.bv_len--;

			/* temporarily trim the UID */
			*(uidin.bv_val++) = '\0';

			rc = bitStringNormalize( syntax, &uidin, &uidout );

			if( rc != LDAP_SUCCESS ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}
		}

#ifdef USE_DN_NORMALIZE
		rc = dnNormalize2( NULL, &out, normalized );
#else
		rc = dnPretty2( NULL, &out, normalized );
#endif

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			free( uidout.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		if( uidout.bv_len ) {
			normalized->bv_val = ch_realloc( normalized->bv_val,
				normalized->bv_len + uidout.bv_len + sizeof("#") );

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
				uidout.bv_val, uidout.bv_len );
			normalized->bv_len += uidout.bv_len;

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
525
	slap_mask_t flags,
526
527
528
529
530
531
532
533
534
535
536
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

537
538
539
540
541
542
543
544
545
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

546
547
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

548
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
549
		/* get the length indicated by the first byte */
550
		len = LDAP_UTF8_CHARLEN2( u, len );
551

Kurt Zeilenga's avatar
Kurt Zeilenga committed
552
553
554
		/* very basic checks */
		switch( len ) {
			case 6:
555
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
556
557
558
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
559
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
560
561
562
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
563
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
564
565
566
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
567
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
568
569
570
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
571
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
572
573
574
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
575
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
576
577
578
579
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
580
581
582

		/* make sure len corresponds with the offset
			to the next character */
583
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
584
585
	}

586
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
587

588
	return LDAP_SUCCESS;
589
590
591
592
593
594
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
595
	struct berval *normalized )
596
{
597
	char *p, *q, *s, *e;
598
	int len = 0;
599

600
	p = val->bv_val;
601

602
	/* Ignore initial whitespace */
603
	/* All space is ASCII. All ASCII is 1 byte */
604
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
605

606
607
608
	normalized->bv_len = val->bv_len - (p - val->bv_val);
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
609
610
611
612

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
613
	s = NULL;
614

615
	while ( p < e ) {
616
617
618
619
620
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
621

622
			/* Ignore the extra whitespace */
623
624
			while ( ASCII_SPACE( *p ) ) {
				p++;
625
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
626
		} else {
627
628
629
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
630
		}
631
632
	}

633
	assert( normalized->bv_val <= p );
634
	assert( q+len <= p );
635

636
	/* cannot start with a space */
637
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
638
639
640
641
642
643
644
645

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
646
		len = q - s;
647
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
648
	}
649

650
	/* cannot end with a space */
651
652
653
	assert( !ASCII_SPACE( *q ) );

	q += len;
654
655
656
657

	/* null terminate */
	*q = '\0';

658
	normalized->bv_len = q - normalized->bv_val;
659

660
	return LDAP_SUCCESS;
661
662
}

663
/* Returns Unicode canonically normalized copy of a substring assertion
664
 * Skipping attribute description */
665
static SubstringsAssertion *
666
667
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
668
	unsigned casefold )
669
670
671
672
673
674
675
676
677
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

678
	if( sa->sa_initial.bv_val != NULL ) {
679
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
680
		if( nsa->sa_initial.bv_val == NULL ) {
681
682
683
684
685
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
686
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
687
688
			/* empty */
		}
689
690
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
691
692
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
693
			if( nsa->sa_any[i].bv_val == NULL ) {
694
695
696
				goto err;
			}
		}
697
		nsa->sa_any[i].bv_val = NULL;
698
699
	}

700
	if( sa->sa_final.bv_val != NULL ) {
701
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
702
		if( nsa->sa_final.bv_val == NULL ) {
703
704
705
706
707
708
709
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
710
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
711
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
712
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
713
714
715
716
	ch_free( nsa );
	return NULL;
}

717
#ifndef SLAPD_APPROX_OLDSINGLESTRING
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
736
737
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
738
739
	int i, count, len, nextchunk=0, nextavail=0;

740
	/* Yes, this is necessary */
741
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
742
	if( nval == NULL ) {
743
744
745
746
747
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
748
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue), NULL, LDAP_UTF8_APPROX );
749
	if( assertv == NULL ) {
750
		ber_bvfree( nval );
751
752
753
		*matchp = 1;
		return LDAP_SUCCESS;
	}
754
755

	/* Isolate how many words there are */
756
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
757
758
759
760
761
762
763
764
765
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
766
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
767
768
769
770
		words[i] = c;
		values[i] = phonetic(c);
	}

771
	/* Work through the asserted value's words, to see if at least some
772
773
	   of the words are there, in the same order. */
	len = 0;
774
775
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
776
777
778
779
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
780
#if defined(SLAPD_APPROX_INITIALS)
781
		else if( len == 1 ) {
782
783
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
784
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
785
					nextavail=i+1;
786
					break;
787
				}
788
789
		}
#endif
790
		else {
791
			/* Isolate the next word in the asserted value and phonetic it */
792
793
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
794
795
796
797
798
799
800
801

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
802
			ch_free( val );
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
824
	ber_bvfree( assertv );
825
826
827
828
829
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
830
	ber_bvfree( nval );
831
832
833
834

	return LDAP_SUCCESS;
}

835
static int 
836
837
838
839
840
841
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
842
843
	BerVarray values,
	BerVarray *keysp )
844
{
845
	char *c;
846
	int i,j, len, wordcount, keycount=0;
847
	struct berval *newkeys;
848
	BerVarray keys=NULL;
849

850
	for( j=0; values[j].bv_val != NULL; j++ ) {
851
		struct berval val = { 0, NULL };
852
		/* Yes, this is necessary */
853
854
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
855

856
		/* Isolate how many words there are. There will be a key for each */
857
		for( wordcount = 0, c = val.bv_val; *c; c++) {
858
859
860
861
862
863
864
865
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
866
867
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
868
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
869
870
871
872
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
873
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
874
875
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
876
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
877
878
879
880
			keycount++;
			i++;
		}

881
		ber_memfree( val.bv_val );
882
	}
883
	keys[keycount].bv_val = NULL;
884
885
886
887
888
	*keysp = keys;

	return LDAP_SUCCESS;
}

889
static int 
890
891
892
893
894
895
896
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
897
	BerVarray *keysp )
898
{
899
	char *c;
900
	int i, count, len;
901
	struct berval *val;
902
	BerVarray keys;
903

904
	/* Yes, this is necessary */
905
906
	val = UTF8bvnormalize( ((struct berval *)assertValue), NULL, LDAP_UTF8_APPROX );
	if( val == NULL || val->bv_val == NULL ) {
907
908
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
909
		*keysp = keys;
910
		ber_bvfree( val );
911
912
913
		return LDAP_SUCCESS;
	}

914
	/* Isolate how many words there are. There will be a key for each */
915
	for( count = 0,c = val->bv_val; *c; c++) {
916
917
918
919
920
921
922
923
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
924
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
925
926

	/* Get a phonetic copy of each word */
927
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
928
929
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
930
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
931
932
933
		i++;
	}

934
	ber_bvfree( val );
935

936
	keys[count].bv_val = NULL;
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
956
	char *s, *t;
957

958
	/* Yes, this is necessary */
959
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
960
961
962
963
964
965
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
966
	t = UTF8normalize( ((struct berval *)assertedValue),
967
968
969
970
971
972
973
974
975
976
977
978
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
979
980
981
982
983
984
985
986
987

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

988
static int 
989
990
991
992
993
994
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
995
996
	BerVarray values,
	BerVarray *keysp )
997
998
{
	int i;
999
	BerVarray *keys;
1000
	char *s;
1001

1002
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1003
		/* empty - just count them */
1004
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1005
1006

	/* we should have at least one value at this point */
1007
1008
	assert( i > 0 );

1009
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1010
1011

	/* Copy each value and run it through phonetic() */
1012
	for( i=0; values[i].bv_val != NULL; i++ ) {
1013
		/* Yes, this is necessary */
1014
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1015
1016

		/* strip 8-bit chars and run through phonetic() */
1017
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1018
		free( s );
1019
	}
1020
	keys[i].bv_val = NULL;
1021
1022
1023
1024
1025
1026

	*keysp = keys;
	return LDAP_SUCCESS;
}


1027
static int 
1028
1029
1030
1031
1032
1033
1034
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
1035
	BerVarray *keysp )
1036
{
1037
	BerVarray keys;
1038
	char *s;
1039

1040
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1041

1042
	/* Yes, this is necessary */
1043
	s = UTF8normalize( ((struct berval *)assertValue),
1044
1045
1046
1047
1048
1049
1050
1051
1052
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1053
1054
1055
1056
1057
1058
1059

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1060
static int
1061
caseExactMatch(
1062
	int *matchp,
1063
	slap_mask_t flags,
1064
1065
1066
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1067
	void *assertedValue )
1068
{
1069
1070
1071
	*matchp = UTF8bvnormcmp( value,
		(struct berval *) assertedValue,
		LDAP_UTF8_NOCASEFOLD );
1072
	return LDAP_SUCCESS;
1073
1074
}

1075
static int
1076
caseExactIgnoreSubstringsMatch(
1077
	int *matchp,
1078
	slap_mask_t flags,
1079
1080
1081
1082
1083
1084
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = 0;
Pierangelo Masarati's avatar
Pierangelo Masarati committed
1085
	SubstringsAssertion *sub = NULL;
1086
	struct berval left = { 0, NULL };
1087
1088
	int i;
	ber_len_t inlen=0;
1089
	char *nav = NULL;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1090
	unsigned casefold;
1091

1092
	casefold = ( mr != caseExactSubstringsMatchingRule )
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1093
		? LDAP_UTF8_CASEFOLD : LDAP_UTF8_NOCASEFOLD;
1094

1095
	if ( UTF8bvnormalize( value, &left, casefold ) == NULL ) {