schema_init.c 106 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

31
/* recycled validatation routines */
32
#define berValidate						blobValidate
33
34

/* unimplemented pretters */
35
#define integerPretty					NULL
36
37

/* recycled matching routines */
38
#define bitStringMatch					octetStringMatch
39
40
41
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
42
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
43
44
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
45
#define uniqueMemberMatch				dnMatch
46
#define integerFirstComponentMatch		integerMatch
47

48
49
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
50
51
52
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
53
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
54
#define IA5StringApproxMatch			approxMatch
55
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
56
#define IA5StringApproxFilter			approxFilter
57

58
/* ordering matching rules */
59
60
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
61
#define integerOrderingMatch			integerMatch
62

63
/* unimplemented matching routines */
64
65
66
67
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
68
#ifdef SLAPD_ACI_ENABLED
69
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
70
71
#endif
#ifdef SLAPD_AUTHPASSWD
72
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
73
#endif
74
75

/* recycled indexing/filtering routines */
76
77
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
78
79
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
80

81
82
83
84
85
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

86
87
88
89
90
91
92
93
94
95
96
97
98
99
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

100

101
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
102
{
103
	ber_len_t i;
104
105
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
106
107

	if( c == 0 ) return NULL;
108
109
110
111
112
113
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
114
	}
115
116

	return NULL;
117
}
118

119
120
121
static int
octetStringMatch(
	int *matchp,
122
	slap_mask_t flags,
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
141
static int octetStringIndexer(
142
143
	slap_mask_t use,
	slap_mask_t flags,
144
145
146
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
147
148
	BerVarray values,
	BerVarray *keysp )
149
150
151
{
	int i;
	size_t slen, mlen;
152
	BerVarray keys;
153
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
154
	unsigned char	HASHdigest[HASH_BYTES];
155
	struct berval digest;
156
157
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
158

159
	for( i=0; values[i].bv_val != NULL; i++ ) {
160
161
162
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
163
164
165
	/* we should have at least one value at this point */
	assert( i > 0 );

166
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
167

168
169
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
170

171
	for( i=0; values[i].bv_val != NULL; i++ ) {
172
		HASH_Init( &HASHcontext );
173
		if( prefix != NULL && prefix->bv_len > 0 ) {
174
			HASH_Update( &HASHcontext,
175
176
				prefix->bv_val, prefix->bv_len );
		}
177
		HASH_Update( &HASHcontext,
178
			syntax->ssyn_oid, slen );
179
		HASH_Update( &HASHcontext,
180
			mr->smr_oid, mlen );
181
		HASH_Update( &HASHcontext,
182
			values[i].bv_val, values[i].bv_len );
183
		HASH_Final( HASHdigest, &HASHcontext );
184

185
		ber_dupbv( &keys[i], &digest );
186
187
	}

188
	keys[i].bv_val = NULL;
189
190
191
192
193
194
195

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
196
static int octetStringFilter(
197
198
	slap_mask_t use,
	slap_mask_t flags,
199
200
201
202
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
203
	BerVarray *keysp )
204
205
{
	size_t slen, mlen;
206
	BerVarray keys;
207
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
208
	unsigned char	HASHdigest[HASH_BYTES];
209
210
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
211
212
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
213

214
215
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
216

217
	keys = ch_malloc( sizeof( struct berval ) * 2 );
218

219
	HASH_Init( &HASHcontext );
220
	if( prefix != NULL && prefix->bv_len > 0 ) {
221
		HASH_Update( &HASHcontext,
222
223
			prefix->bv_val, prefix->bv_len );
	}
224
	HASH_Update( &HASHcontext,
225
		syntax->ssyn_oid, slen );
226
	HASH_Update( &HASHcontext,
227
		mr->smr_oid, mlen );
228
	HASH_Update( &HASHcontext,
229
		value->bv_val, value->bv_len );
230
	HASH_Final( HASHdigest, &HASHcontext );
231

232
233
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
234
235
236
237
238

	*keysp = keys;

	return LDAP_SUCCESS;
}
239

240
241
242
243
244
245
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
246
	struct berval dn;
247
248
249

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

250
251
	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;
252

253
254
	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
255
	{
256
257
258
		/* assume presence of optional UID */
		ber_len_t i;

259
260
		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
261
262
263
				break;
			}
		}
264
265
266
		if( dn.bv_val[i] != '\'' ||
		    dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
267
268
269
			return LDAP_INVALID_SYNTAX;
		}

270
		/* trim the UID to allow use of dnValidate */
271
272
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
273
274
	}

275
	rc = dnValidate( NULL, &dn );
276

Howard Chu's avatar
Howard Chu committed
277
	ber_memfree( dn.bv_val );
278
279
280
281
282
283
284
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
285
	struct berval *normalized )
286
{
287
	struct berval out;
Howard Chu's avatar
Howard Chu committed
288
	int rc;
289

290
291
	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
292
293
294
295
		ber_len_t dnlen;
		char *uid = NULL;
		ber_len_t uidlen = 0;

296
		if( out.bv_val[out.bv_len-1] == '\'' ) {
297
			/* assume presence of optional UID */
298
			uid = strrchr( out.bv_val, '#' );
299
300

			if( uid == NULL ) {
301
				free( out.bv_val );
302
303
304
				return LDAP_INVALID_SYNTAX;
			}

305
			uidlen = out.bv_len - (uid - out.bv_val);
306
307
			/* temporarily trim the UID */
			*uid = '\0';
308
			out.bv_len -= uidlen;
309
310
311
		}

#ifdef USE_DN_NORMALIZE
312
		rc = dnNormalize2( NULL, &out, normalized );
313
#else
314
		rc = dnPretty2( NULL, &out, normalized );
315
316
#endif

Howard Chu's avatar
Howard Chu committed
317
		if( rc != LDAP_SUCCESS ) {
318
			free( out.bv_val );
319
320
321
			return LDAP_INVALID_SYNTAX;
		}

322
		dnlen = normalized->bv_len;
323
324

		if( uidlen ) {
325
326
			struct berval b2;
			b2.bv_val = ch_malloc(dnlen + uidlen + 1);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
327
			AC_MEMCPY( b2.bv_val, normalized->bv_val, dnlen );
Howard Chu's avatar
Howard Chu committed
328

329
330
331
			/* restore the separator */
			*uid = '#';
			/* shift the UID */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
332
			AC_MEMCPY( normalized->bv_val+dnlen, uid, uidlen );
333
334
335
			b2.bv_len = dnlen + uidlen;
			normalized->bv_val[dnlen+uidlen] = '\0';
			free(normalized->bv_val);
Howard Chu's avatar
Howard Chu committed
336
			*normalized = b2;
337
		}
338
		free( out.bv_val );
339
340
341
342
343
	}

	return LDAP_SUCCESS;
}

344
345
346
347
348
349
350
351
352
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

353
static int
354
blobValidate(
355
356
357
358
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
359
	return LDAP_SUCCESS;
360
361
}

362
363
364
365
366
367
368
369
370
371
372
373
374
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
375

376
377
378
379
380
381
382
383
384
385
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
386
387
388
389
	{
		return LDAP_INVALID_SYNTAX;
	}

390
	for( i=in->bv_len-3; i>0; i-- ) {
391
392
393
394
395
396
397
398
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

399
400
401
402
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
403
	struct berval *normalized )
404
405
{
	/*
406
	 * A normalized bitString is has no extaneous (leading) zero bits.
407
408
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
409
	 */
410
411
412
413
414
415
416
417
418
419
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
420
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
421
422
423
		goto done;
	}

424
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
425

426
427
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
428
429

	for( ; *p != '\0'; p++ ) {
430
		normalized->bv_val[normalized->bv_len++] = *p;
431
432
	}

433
	normalized->bv_val[normalized->bv_len] = '\0';
434
435
436
437
438

done:
	return LDAP_SUCCESS;
}

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
470
	slap_mask_t flags,
471
472
473
474
475
476
477
478
479
480
481
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

482
483
484
485
486
487
488
489
490
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

491
492
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

493
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
494
		/* get the length indicated by the first byte */
495
		len = LDAP_UTF8_CHARLEN2( u, len );
496

Kurt Zeilenga's avatar
Kurt Zeilenga committed
497
498
499
		/* very basic checks */
		switch( len ) {
			case 6:
500
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
501
502
503
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
504
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
505
506
507
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
508
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
509
510
511
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
512
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
513
514
515
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
516
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
517
518
519
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
520
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
521
522
523
524
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
525
526
527

		/* make sure len corresponds with the offset
			to the next character */
528
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
529
530
	}

531
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
532

533
	return LDAP_SUCCESS;
534
535
536
537
538
539
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
540
	struct berval *normalized )
541
{
542
	char *p, *q, *s, *e;
543
	int len = 0;
544

545
	p = val->bv_val;
546

547
	/* Ignore initial whitespace */
548
	/* All space is ASCII. All ASCII is 1 byte */
549
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
550

551
552
553
	normalized->bv_len = val->bv_len - (p - val->bv_val);
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
554
555
556
557

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
558
	s = NULL;
559

560
	while ( p < e ) {
561
562
563
564
565
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
566

567
			/* Ignore the extra whitespace */
568
569
			while ( ASCII_SPACE( *p ) ) {
				p++;
570
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
571
		} else {
572
573
574
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
575
		}
576
577
	}

578
	assert( normalized->bv_val <= p );
579
	assert( q+len <= p );
580

581
	/* cannot start with a space */
582
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
583
584
585
586
587
588
589
590

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
591
		len = q - s;
592
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
593
	}
594

595
	/* cannot end with a space */
596
597
598
	assert( !ASCII_SPACE( *q ) );

	q += len;
599
600
601
602

	/* null terminate */
	*q = '\0';

603
	normalized->bv_len = q - normalized->bv_val;
604

605
	return LDAP_SUCCESS;
606
607
}

608
/* Returns Unicode canonically normalized copy of a substring assertion
609
 * Skipping attribute description */
610
static SubstringsAssertion *
611
612
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
613
	unsigned casefold )
614
615
616
617
618
619
620
621
622
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

623
	if( sa->sa_initial.bv_val != NULL ) {
624
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
625
		if( nsa->sa_initial.bv_val == NULL ) {
626
627
628
629
630
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
631
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
632
633
			/* empty */
		}
634
635
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
636
637
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
638
			if( nsa->sa_any[i].bv_val == NULL ) {
639
640
641
				goto err;
			}
		}
642
		nsa->sa_any[i].bv_val = NULL;
643
644
	}

645
	if( sa->sa_final.bv_val != NULL ) {
646
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
647
		if( nsa->sa_final.bv_val == NULL ) {
648
649
650
651
652
653
654
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
655
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
656
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
657
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
658
659
660
661
	ch_free( nsa );
	return NULL;
}

662
#ifndef SLAPD_APPROX_OLDSINGLESTRING
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
681
682
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
683
684
	int i, count, len, nextchunk=0, nextavail=0;

685
	/* Yes, this is necessary */
686
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
687
	if( nval == NULL ) {
688
689
690
691
692
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
693
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue), NULL, LDAP_UTF8_APPROX );
694
	if( assertv == NULL ) {
695
		ber_bvfree( nval );
696
697
698
		*matchp = 1;
		return LDAP_SUCCESS;
	}
699
700

	/* Isolate how many words there are */
701
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
702
703
704
705
706
707
708
709
710
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
711
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
712
713
714
715
		words[i] = c;
		values[i] = phonetic(c);
	}

716
	/* Work through the asserted value's words, to see if at least some
717
718
	   of the words are there, in the same order. */
	len = 0;
719
720
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
721
722
723
724
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
725
#if defined(SLAPD_APPROX_INITIALS)
726
		else if( len == 1 ) {
727
728
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
729
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
730
					nextavail=i+1;
731
					break;
732
				}
733
734
		}
#endif
735
		else {
736
			/* Isolate the next word in the asserted value and phonetic it */
737
738
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
739
740
741
742
743
744
745
746

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
747
			ch_free( val );
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
769
	ber_bvfree( assertv );
770
771
772
773
774
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
775
	ber_bvfree( nval );
776
777
778
779

	return LDAP_SUCCESS;
}

780
static int 
781
782
783
784
785
786
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
787
788
	BerVarray values,
	BerVarray *keysp )
789
{
790
	char *c;
791
	int i,j, len, wordcount, keycount=0;
792
	struct berval *newkeys;
793
	BerVarray keys=NULL;
794

795
	for( j=0; values[j].bv_val != NULL; j++ ) {
796
		struct berval val = { 0, NULL };
797
		/* Yes, this is necessary */
798
799
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
800

801
		/* Isolate how many words there are. There will be a key for each */
802
		for( wordcount = 0, c = val.bv_val; *c; c++) {
803
804
805
806
807
808
809
810
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
811
812
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
813
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
814
815
816
817
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
818
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
819
820
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
821
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
822
823
824
825
			keycount++;
			i++;
		}

826
		ber_memfree( val.bv_val );
827
	}
828
	keys[keycount].bv_val = NULL;
829
830
831
832
833
	*keysp = keys;

	return LDAP_SUCCESS;
}

834
static int 
835
836
837
838
839
840
841
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
842
	BerVarray *keysp )
843
{
844
	char *c;
845
	int i, count, len;
846
	struct berval *val;
847
	BerVarray keys;
848

849
	/* Yes, this is necessary */
850
851
	val = UTF8bvnormalize( ((struct berval *)assertValue), NULL, LDAP_UTF8_APPROX );
	if( val == NULL || val->bv_val == NULL ) {
852
853
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
854
		*keysp = keys;
855
		ber_bvfree( val );
856
857
858
		return LDAP_SUCCESS;
	}

859
	/* Isolate how many words there are. There will be a key for each */
860
	for( count = 0,c = val->bv_val; *c; c++) {
861
862
863
864
865
866
867
868
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
869
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
870
871

	/* Get a phonetic copy of each word */
872
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
873
874
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
875
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
876
877
878
		i++;
	}

879
	ber_bvfree( val );
880

881
	keys[count].bv_val = NULL;
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
901
	char *s, *t;
902

903
	/* Yes, this is necessary */
904
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
905
906
907
908
909
910
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
911
	t = UTF8normalize( ((struct berval *)assertedValue),
912
913
914
915
916
917
918
919
920
921
922
923
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
924
925
926
927
928
929
930
931
932

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

933
static int 
934
935
936
937
938
939
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
940
941
	BerVarray values,
	BerVarray *keysp )
942
943
{
	int i;
944
	BerVarray *keys;
945
	char *s;
946

947
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
948
		/* empty - just count them */
949
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
950
951

	/* we should have at least one value at this point */
952
953
	assert( i > 0 );

954
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
955
956

	/* Copy each value and run it through phonetic() */
957
	for( i=0; values[i].bv_val != NULL; i++ ) {
958
		/* Yes, this is necessary */
959
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
960
961

		/* strip 8-bit chars and run through phonetic() */
962
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
963
		free( s );
964
	}
965
	keys[i].bv_val = NULL;
966
967
968
969
970
971

	*keysp = keys;
	return LDAP_SUCCESS;
}


972
static int 
973
974
975
976
977
978
979
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
980
	BerVarray *keysp )
981
{
982
	BerVarray keys;
983
	char *s;
984

985
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
986

987
	/* Yes, this is necessary */
988
	s = UTF8normalize( ((struct berval *)assertValue),
989
990
991
992
993
994
995
996
997
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
998
999
1000
1001
1002
1003
1004

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1005
static int
1006
caseExactMatch(
1007
	int *matchp,
1008
	slap_mask_t flags,
1009
1010
1011
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,