schema_init.c 104 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
20

21
22
#include "ldap_utf8.h"

23
24
25
26
27
28
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
29

30
/* recycled validatation routines */
31
#define berValidate						blobValidate
32
33

/* unimplemented pretters */
34
#define integerPretty					NULL
35
36

/* recycled matching routines */
37
#define bitStringMatch					octetStringMatch
38
39
40
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
41
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
42
43
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
44
#define uniqueMemberMatch				dnMatch
45

46
47
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
48
49
50
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
51
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
52
#define IA5StringApproxMatch			approxMatch
53
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
54
#define IA5StringApproxFilter			approxFilter
55

56
57
58
59
/* orderring matching rules */
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch

60
/* unimplemented matching routines */
61
62
63
64
65
66
67
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL
#define integerFirstComponentMatch		NULL

#define OpenLDAPaciMatch				NULL
#define authPasswordMatch				NULL
68
69

/* recycled indexing/filtering routines */
70
71
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
72
73
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
74

75
76
77
78
79
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

80
81
82
83
/* must match OIDs below */
#define caseExactMatchOID			"2.5.13.5"
#define caseExactSubstringsMatchOID		"2.5.13.7"

84
static char *bvcasechr( struct berval *bv, int c, ber_len_t *len )
85
{
86
87
88
	ber_len_t i;
	int lower = TOLOWER( c );
	int upper = TOUPPER( c );
89
90

	if( c == 0 ) return NULL;
91
92
93
94
95
96
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
97
	}
98
99

	return NULL;
100
}
101

102
103
104
static int
octetStringMatch(
	int *matchp,
105
	slap_mask_t flags,
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
124
static int octetStringIndexer(
125
126
	slap_mask_t use,
	slap_mask_t flags,
127
128
129
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
130
131
	BerVarray values,
	BerVarray *keysp )
132
133
134
{
	int i;
	size_t slen, mlen;
135
	BerVarray keys;
136
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
137
	unsigned char	HASHdigest[HASH_BYTES];
138
	struct berval digest;
139
140
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
141

142
	for( i=0; values[i].bv_val != NULL; i++ ) {
143
144
145
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
146
147
148
	/* we should have at least one value at this point */
	assert( i > 0 );

149
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
150

151
152
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
153

154
	for( i=0; values[i].bv_val != NULL; i++ ) {
155
		HASH_Init( &HASHcontext );
156
		if( prefix != NULL && prefix->bv_len > 0 ) {
157
			HASH_Update( &HASHcontext,
158
159
				prefix->bv_val, prefix->bv_len );
		}
160
		HASH_Update( &HASHcontext,
161
			syntax->ssyn_oid, slen );
162
		HASH_Update( &HASHcontext,
163
			mr->smr_oid, mlen );
164
		HASH_Update( &HASHcontext,
165
			values[i].bv_val, values[i].bv_len );
166
		HASH_Final( HASHdigest, &HASHcontext );
167

168
		ber_dupbv( &keys[i], &digest );
169
170
	}

171
	keys[i].bv_val = NULL;
172
173
174
175
176
177
178

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
179
static int octetStringFilter(
180
181
	slap_mask_t use,
	slap_mask_t flags,
182
183
184
185
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
186
	BerVarray *keysp )
187
188
{
	size_t slen, mlen;
189
	BerVarray keys;
190
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
191
	unsigned char	HASHdigest[HASH_BYTES];
192
193
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
194
195
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
196

197
198
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
199

200
	keys = ch_malloc( sizeof( struct berval ) * 2 );
201

202
	HASH_Init( &HASHcontext );
203
	if( prefix != NULL && prefix->bv_len > 0 ) {
204
		HASH_Update( &HASHcontext,
205
206
			prefix->bv_val, prefix->bv_len );
	}
207
	HASH_Update( &HASHcontext,
208
		syntax->ssyn_oid, slen );
209
	HASH_Update( &HASHcontext,
210
		mr->smr_oid, mlen );
211
	HASH_Update( &HASHcontext,
212
		value->bv_val, value->bv_len );
213
	HASH_Final( HASHdigest, &HASHcontext );
214

215
216
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
217
218
219
220
221

	*keysp = keys;

	return LDAP_SUCCESS;
}
222

223
224
225
226
227
228
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
229
	struct berval dn;
230
231
232

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

233
234
	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;
235

236
237
	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
238
	{
239
240
241
		/* assume presence of optional UID */
		ber_len_t i;

242
243
		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
244
245
246
				break;
			}
		}
247
248
249
		if( dn.bv_val[i] != '\'' ||
		    dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
250
251
252
			return LDAP_INVALID_SYNTAX;
		}

253
		/* trim the UID to allow use of dnValidate */
254
255
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
256
257
	}

258
	rc = dnValidate( NULL, &dn );
259

260
	ber_memfree( &dn );
261
262
263
264
265
266
267
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
268
	struct berval *normalized )
269
{
270
	struct berval out;
Howard Chu's avatar
Howard Chu committed
271
	int rc;
272

273
274
	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
275
276
277
278
		ber_len_t dnlen;
		char *uid = NULL;
		ber_len_t uidlen = 0;

279
		if( out.bv_val[out.bv_len-1] == '\'' ) {
280
			/* assume presence of optional UID */
281
			uid = strrchr( out.bv_val, '#' );
282
283

			if( uid == NULL ) {
284
				free( out.bv_val );
285
286
287
				return LDAP_INVALID_SYNTAX;
			}

288
			uidlen = out.bv_len - (uid - out.bv_val);
289
290
			/* temporarily trim the UID */
			*uid = '\0';
291
			out.bv_len -= uidlen;
292
293
294
		}

#ifdef USE_DN_NORMALIZE
295
		rc = dnNormalize2( NULL, &out, normalized );
296
#else
297
		rc = dnPretty2( NULL, &out, normalized );
298
299
#endif

Howard Chu's avatar
Howard Chu committed
300
		if( rc != LDAP_SUCCESS ) {
301
			free( out.bv_val );
302
303
304
			return LDAP_INVALID_SYNTAX;
		}

305
		dnlen = normalized->bv_len;
306
307

		if( uidlen ) {
308
309
			struct berval b2;
			b2.bv_val = ch_malloc(dnlen + uidlen + 1);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
310
			AC_MEMCPY( b2.bv_val, normalized->bv_val, dnlen );
Howard Chu's avatar
Howard Chu committed
311

312
313
314
			/* restore the separator */
			*uid = '#';
			/* shift the UID */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
315
			AC_MEMCPY( normalized->bv_val+dnlen, uid, uidlen );
316
317
318
			b2.bv_len = dnlen + uidlen;
			normalized->bv_val[dnlen+uidlen] = '\0';
			free(normalized->bv_val);
Howard Chu's avatar
Howard Chu committed
319
			*normalized = b2;
320
		}
321
		free( out.bv_val );
322
323
324
325
326
	}

	return LDAP_SUCCESS;
}

327
328
329
330
331
332
333
334
335
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

336
static int
337
blobValidate(
338
339
340
341
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
342
	return LDAP_SUCCESS;
343
344
}

345
346
347
348
349
350
351
352
353
354
355
356
357
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
358

359
360
361
362
363
364
365
366
367
368
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
369
370
371
372
	{
		return LDAP_INVALID_SYNTAX;
	}

373
	for( i=in->bv_len-3; i>0; i-- ) {
374
375
376
377
378
379
380
381
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

382
383
384
385
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
386
	struct berval *normalized )
387
388
{
	/*
389
	 * A normalized bitString is has no extaneous (leading) zero bits.
390
391
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
392
	 */
393
394
395
396
397
398
399
400
401
402
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
403
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
404
405
406
		goto done;
	}

407
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
408

409
410
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
411
412

	for( ; *p != '\0'; p++ ) {
413
		normalized->bv_val[normalized->bv_len++] = *p;
414
415
	}

416
	normalized->bv_val[normalized->bv_len] = '\0';
417
418
419
420
421

done:
	return LDAP_SUCCESS;
}

422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
453
	slap_mask_t flags,
454
455
456
457
458
459
460
461
462
463
464
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

465
466
467
468
469
470
471
472
473
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

474
475
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

476
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
477
		/* get the length indicated by the first byte */
478
		len = LDAP_UTF8_CHARLEN2( u, len );
479

Kurt Zeilenga's avatar
Kurt Zeilenga committed
480
481
482
		/* very basic checks */
		switch( len ) {
			case 6:
483
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
484
485
486
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
487
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
488
489
490
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
491
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
492
493
494
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
495
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
496
497
498
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
499
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
500
501
502
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
503
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
504
505
506
507
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
508
509
510

		/* make sure len corresponds with the offset
			to the next character */
511
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
512
513
	}

514
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
515

516
	return LDAP_SUCCESS;
517
518
519
520
521
522
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
523
	struct berval *normalized )
524
{
525
	char *p, *q, *s, *e;
526
	int len = 0;
527

528
	p = val->bv_val;
529

530
	/* Ignore initial whitespace */
531
532
533
	/* All space is ASCII. All ASCII is 1 byte */
	while ( ASCII_SPACE( *p ) ) {
		p++;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
534
	}
535

536
	if( *p == '\0' ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
537
538
		return LDAP_INVALID_SYNTAX;
	}
539

540
541
	ber_mem2bv( p, val->bv_len - (p - val->bv_val), 1, normalized );
	e = normalized->bv_val + val->bv_len - (p - val->bv_val);
542
543
544
545

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
546
	s = NULL;
547

548
	while ( p < e ) {
549
550
551
552
553
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
554

555
			/* Ignore the extra whitespace */
556
557
			while ( ASCII_SPACE( *p ) ) {
				p++;
558
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
559
		} else {
560
561
562
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
563
		}
564
565
	}

566
	assert( normalized->bv_val < p );
567
	assert( q+len <= p );
568

569
	/* cannot start with a space */
570
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
571
572
573
574
575
576
577
578

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
579
		len = q - s;
580
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
581
	}
582

583
	/* cannot end with a space */
584
585
586
	assert( !ASCII_SPACE( *q ) );

	q += len;
587
588
589
590

	/* null terminate */
	*q = '\0';

591
	normalized->bv_len = q - normalized->bv_val;
592

593
	return LDAP_SUCCESS;
594
595
}

596
/* Returns Unicode canonically normalized copy of a substring assertion
597
 * Skipping attribute description */
598
static SubstringsAssertion *
599
600
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
601
	unsigned casefold )
602
603
604
605
606
607
608
609
610
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

611
	if( sa->sa_initial.bv_val != NULL ) {
612
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
613
		if( nsa->sa_initial.bv_val == NULL ) {
614
615
616
617
618
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
619
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
620
621
			/* empty */
		}
622
623
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
624
625
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
626
			if( nsa->sa_any[i].bv_val == NULL ) {
627
628
629
				goto err;
			}
		}
630
		nsa->sa_any[i].bv_val = NULL;
631
632
	}

633
	if( sa->sa_final.bv_val != NULL ) {
634
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
635
		if( nsa->sa_final.bv_val == NULL ) {
636
637
638
639
640
641
642
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
643
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
644
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
645
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
646
647
648
649
	ch_free( nsa );
	return NULL;
}

650
/* Strip characters with the 8th bit set */
651
static char *
652
653
654
655
656
657
658
659
660
661
662
663
664
665
strip8bitChars(
	char *in )      
{
	char *p = in, *q;
  
	if( in == NULL ) {
		return NULL;
	}
	while( *p ) {
		if( *p & 0x80 ) {
			q = p;
			while( *++q & 0x80 ) {
				/* empty */
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
666
			p = AC_MEMCPY(p, q, strlen(q) + 1);
667
668
669
670
671
672
673
		} else {
			p++;
		}
	}
	return in;
}

674
#ifndef SLAPD_APPROX_OLDSINGLESTRING
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
693
	char *val, *nval, *assertv, **values, **words, *c;
694
	int i, count, len, nextchunk=0, nextavail=0;
695
	size_t avlen;
696

697
	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
698
	nval = UTF8normalize( value, LDAP_UTF8_NOCASEFOLD );
699
	if( nval == NULL ) {
700
701
702
		*matchp = 1;
		return LDAP_SUCCESS;
	}
703
	strip8bitChars( nval );
704
705

	/* Yes, this is necessary */
706
	assertv = UTF8normalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
707
		LDAP_UTF8_NOCASEFOLD );
708
	if( assertv == NULL ) {
709
		ch_free( nval );
710
711
712
713
714
		*matchp = 1;
		return LDAP_SUCCESS;
	}
	strip8bitChars( assertv );
	avlen = strlen( assertv );
715
716

	/* Isolate how many words there are */
717
	for( c=nval,count=1; *c; c++ ) {
718
719
720
721
722
723
724
725
726
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
727
	for( c=nval,i=0;  i<count;  i++,c+=strlen(c)+1 ) {
728
729
730
731
		words[i] = c;
		values[i] = phonetic(c);
	}

732
	/* Work through the asserted value's words, to see if at least some
733
734
	   of the words are there, in the same order. */
	len = 0;
735
	while ( (size_t) nextchunk < avlen ) {
736
		len = strcspn( assertv + nextchunk, SLAPD_APPROX_DELIMITER);
737
738
739
740
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
741
#if defined(SLAPD_APPROX_INITIALS)
742
		else if( len == 1 ) {
743
744
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
745
746
				if( !strncasecmp( assertv+nextchunk, words[i], 1 )) {
					nextavail=i+1;
747
					break;
748
				}
749
750
		}
#endif
751
		else {
752
753
754
755
756
757
758
759
760
761
762
			/* Isolate the next word in the asserted value and phonetic it */
			assertv[nextchunk+len] = '\0';
			val = phonetic( assertv + nextchunk );

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
763
			ch_free( val );
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
785
	free( assertv );
786
787
788
789
790
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
791
	ch_free( nval );
792
793
794
795

	return LDAP_SUCCESS;
}

796
static int 
797
798
799
800
801
802
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
803
804
	BerVarray values,
	BerVarray *keysp )
805
806
807
{
	char *val, *c;
	int i,j, len, wordcount, keycount=0;
808
	struct berval *newkeys;
809
	BerVarray keys=NULL;
810

811
	for( j=0; values[j].bv_val != NULL; j++ ) {
812
		/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
813
		val = UTF8normalize( &values[j], LDAP_UTF8_NOCASEFOLD );
814
815
		strip8bitChars( val );

816
		/* Isolate how many words there are. There will be a key for each */
Gary Williams's avatar
Gary Williams committed
817
		for( wordcount=0,c=val;	 *c;  c++) {
818
819
820
821
822
823
824
825
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
826
827
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
828
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
829
830
831
832
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
Gary Williams's avatar
Gary Williams committed
833
		for( c=val,i=0;	 i<wordcount;  c+=len+1	 ) {
834
835
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
836
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
837
838
839
840
			keycount++;
			i++;
		}

841
		free( val );
842
	}
843
	keys[keycount].bv_val = NULL;
844
845
846
847
848
	*keysp = keys;

	return LDAP_SUCCESS;
}

849
static int 
850
851
852
853
854
855
856
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
857
	BerVarray *keysp )
858
859
860
{
	char *val, *c;
	int i, count, len;
861
	BerVarray keys;
862

863
	/* Yes, this is necessary */
864
	val = UTF8normalize( ((struct berval *)assertValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
865
		LDAP_UTF8_NOCASEFOLD );
866
	if( val == NULL ) {
867
868
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
869
870
871
872
873
		*keysp = keys;
		return LDAP_SUCCESS;
	}
	strip8bitChars( val );

874
875
876
877
878
879
880
881
882
883
	/* Isolate how many words there are. There will be a key for each */
	for( count=0,c=val;  *c;  c++) {
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
884
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
885
886

	/* Get a phonetic copy of each word */
Gary Williams's avatar
Gary Williams committed
887
	for( c=val,i=0;	 i<count; c+=len+1 ) {
888
889
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
890
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
891
892
893
		i++;
	}

894
	free( val );
895

896
	keys[count].bv_val = NULL;
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
916
	char *s, *t;
917

918
	/* Yes, this is necessary */
919
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
920
921
922
923
924
925
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
926
	t = UTF8normalize( ((struct berval *)assertedValue),
927
928
929
930
931
932
933
934
935
936
937
938
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
939
940
941
942
943
944
945
946
947

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

948
static int 
949
950
951
952
953
954
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
955
956
	BerVarray values,
	BerVarray *keysp )
957
958
{
	int i;
959
	BerVarray *keys;
960
	char *s;
961

962
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
963
		/* empty - just count them */
964
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
965
966

	/* we should have at least one value at this point */
967
968
	assert( i > 0 );

969
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
970
971

	/* Copy each value and run it through phonetic() */
972
	for( i=0; values[i].bv_val != NULL; i++ ) {
973
		/* Yes, this is necessary */
974
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
975
976

		/* strip 8-bit chars and run through phonetic() */
977
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
978
		free( s );
979
	}
980
	keys[i].bv_val = NULL;
981
982
983
984
985
986

	*keysp = keys;
	return LDAP_SUCCESS;
}


987
static int 
988
989
990
991
992
993
994
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
995
	BerVarray *keysp )
996
{
997
	BerVarray keys;
998
	char *s;
999

1000
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1001

1002
	/* Yes, this is necessary */
1003
	s = UTF8normalize( ((struct berval *)assertValue),
1004
1005
1006
1007
1008
1009
1010
1011
1012
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1013
1014
1015
1016
1017
1018
1019