schema_init.c 110 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

8
9
10
11
/****
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

12
13
14
15
16
17
18
19
20
21
22
23
24
Summary:
  StringSyntax		X.500	LDAP	Matching
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

  In LDPAv3, a directory string is a UTF-8 encoded UCS string.

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which would
  be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces and
  could be empty.  However, in X.500, all attribute values of numeric
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::= NumericString (SIZE(1..ub-international-isdn-number))

  Unfornately, some assertion values are don't carry the same constraint
  (but its unclear how such an assertion could ever be true). In LDAP,
  there is one syntax (numericString) not two (numericString with constraint,
  numericString without constraint).  This should be treated as numericString
  with non-empty constraint.  Note that while someone may have no
  ISDN number, there are no ISDN numbers which are zero length.
60
61
62
63
64

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters and
65
66
67
  can be empty.  In X.500, semantics much like NumericString (see serialNumber
  for a like example) excepting uses insignificant space handling instead of
  ignore all spaces.  
68
69

IA5String
70
71
  Basically same as PrintableString.  There are no examples in X.500, but
  same logic applies.  So we require them to be non-empty as well.
72
73
74
75

****/


76
77
78
#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
79
#include <limits.h>
80
81

#include <ac/ctype.h>
82
#include <ac/errno.h>
83
84
85
86
87
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
88
#include "lber_pvt.h"
89

90
91
#include "ldap_utf8.h"

92
93
94
95
96
97
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
98

99
/* recycled validatation routines */
100
#define berValidate						blobValidate
101
102

/* unimplemented pretters */
103
#define integerPretty					NULL
104
105

/* recycled matching routines */
106
#define bitStringMatch					octetStringMatch
107
108
109
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
110
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
111
112
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
113
#define uniqueMemberMatch				dnMatch
114
#define integerFirstComponentMatch		integerMatch
115

116
117
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
118
119
120
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
121
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
122
#define IA5StringApproxMatch			approxMatch
123
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
124
#define IA5StringApproxFilter			approxFilter
125

126
/* ordering matching rules */
127
128
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
129
#define integerOrderingMatch			integerMatch
130

131
/* unimplemented matching routines */
132
133
134
135
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
136
#ifdef SLAPD_ACI_ENABLED
137
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
138
139
#endif
#ifdef SLAPD_AUTHPASSWD
140
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
141
#endif
142
143

/* recycled indexing/filtering routines */
144
145
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
146
147
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
148

149
150
151
152
153
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

154
155
156
157
158
159
160
161
162
163
164
165
166
167
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

168

169
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
170
{
171
	ber_len_t i;
172
173
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
174
175

	if( c == 0 ) return NULL;
176
177
178
179
180
181
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
182
	}
183
184

	return NULL;
185
}
186

187
188
189
static int
octetStringMatch(
	int *matchp,
190
	slap_mask_t flags,
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
209
static int octetStringIndexer(
210
211
	slap_mask_t use,
	slap_mask_t flags,
212
213
214
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
215
216
	BerVarray values,
	BerVarray *keysp )
217
218
219
{
	int i;
	size_t slen, mlen;
220
	BerVarray keys;
221
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
222
	unsigned char	HASHdigest[HASH_BYTES];
223
	struct berval digest;
224
225
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
226

227
	for( i=0; values[i].bv_val != NULL; i++ ) {
228
229
230
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
231
232
233
	/* we should have at least one value at this point */
	assert( i > 0 );

234
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
235

236
237
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
238

239
	for( i=0; values[i].bv_val != NULL; i++ ) {
240
		HASH_Init( &HASHcontext );
241
		if( prefix != NULL && prefix->bv_len > 0 ) {
242
			HASH_Update( &HASHcontext,
243
244
				prefix->bv_val, prefix->bv_len );
		}
245
		HASH_Update( &HASHcontext,
246
			syntax->ssyn_oid, slen );
247
		HASH_Update( &HASHcontext,
248
			mr->smr_oid, mlen );
249
		HASH_Update( &HASHcontext,
250
			values[i].bv_val, values[i].bv_len );
251
		HASH_Final( HASHdigest, &HASHcontext );
252

253
		ber_dupbv( &keys[i], &digest );
254
255
	}

256
	keys[i].bv_val = NULL;
257
258
259
260
261
262
263

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
264
static int octetStringFilter(
265
266
	slap_mask_t use,
	slap_mask_t flags,
267
268
269
270
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
271
	BerVarray *keysp )
272
273
{
	size_t slen, mlen;
274
	BerVarray keys;
275
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
276
	unsigned char	HASHdigest[HASH_BYTES];
277
278
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
279
280
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
281

282
283
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
284

285
	keys = ch_malloc( sizeof( struct berval ) * 2 );
286

287
	HASH_Init( &HASHcontext );
288
	if( prefix != NULL && prefix->bv_len > 0 ) {
289
		HASH_Update( &HASHcontext,
290
291
			prefix->bv_val, prefix->bv_len );
	}
292
	HASH_Update( &HASHcontext,
293
		syntax->ssyn_oid, slen );
294
	HASH_Update( &HASHcontext,
295
		mr->smr_oid, mlen );
296
	HASH_Update( &HASHcontext,
297
		value->bv_val, value->bv_len );
298
	HASH_Final( HASHdigest, &HASHcontext );
299

300
301
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
302
303
304
305
306

	*keysp = keys;

	return LDAP_SUCCESS;
}
307

308
309
310
311
312
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
313
314
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
315
316
}

317
static int
318
blobValidate(
319
320
321
322
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
323
	return LDAP_SUCCESS;
324
325
}

326
327
328
329
330
331
332
333
334
335
336
337
338
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
339

340
341
342
343
344
345
346
347
348
349
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
350
351
352
353
	{
		return LDAP_INVALID_SYNTAX;
	}

354
	for( i=in->bv_len-3; i>0; i-- ) {
355
356
357
358
359
360
361
362
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

363
364
365
366
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
367
	struct berval *normalized )
368
369
{
	/*
370
	 * A normalized bitString is has no extaneous (leading) zero bits.
371
372
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
373
	 */
374
375
376
377
378
379
380
381
382
383
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
384
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
385
386
387
		goto done;
	}

388
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
389

390
391
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
392
393

	for( ; *p != '\0'; p++ ) {
394
		normalized->bv_val[normalized->bv_len++] = *p;
395
396
	}

397
	normalized->bv_val[normalized->bv_len] = '\0';
398
399
400
401
402

done:
	return LDAP_SUCCESS;
}

403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
		struct berval uidin = { 0, NULL };
		struct berval uidout = { 0, NULL };

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
			uidin.bv_val = strrchr( out.bv_val, '#' );

			if( uidin.bv_val == NULL ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

			uidin.bv_len = out.bv_len - (uidin.bv_val - out.bv_val);
			out.bv_len -= uidin.bv_len--;

			/* temporarily trim the UID */
			*(uidin.bv_val++) = '\0';

			rc = bitStringNormalize( syntax, &uidin, &uidout );

			if( rc != LDAP_SUCCESS ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}
		}

#ifdef USE_DN_NORMALIZE
		rc = dnNormalize2( NULL, &out, normalized );
#else
		rc = dnPretty2( NULL, &out, normalized );
#endif

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			free( uidout.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		if( uidout.bv_len ) {
			normalized->bv_val = ch_realloc( normalized->bv_val,
				normalized->bv_len + uidout.bv_len + sizeof("#") );

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
				uidout.bv_val, uidout.bv_len );
			normalized->bv_len += uidout.bv_len;

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
547
	slap_mask_t flags,
548
549
550
551
552
553
554
555
556
557
558
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

559
560
561
562
563
564
565
566
567
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

568
569
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

570
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
571
		/* get the length indicated by the first byte */
572
		len = LDAP_UTF8_CHARLEN2( u, len );
573

Kurt Zeilenga's avatar
Kurt Zeilenga committed
574
575
576
		/* very basic checks */
		switch( len ) {
			case 6:
577
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
578
579
580
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
581
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
582
583
584
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
585
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
586
587
588
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
589
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
590
591
592
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
593
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
594
595
596
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
597
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
598
599
600
601
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
602
603
604

		/* make sure len corresponds with the offset
			to the next character */
605
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
606
607
	}

608
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
609

610
	return LDAP_SUCCESS;
611
612
613
614
615
616
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
617
	struct berval *normalized )
618
{
619
	char *p, *q, *s, *e;
620
	int len = 0;
621

Kurt Zeilenga's avatar
Kurt Zeilenga committed
622
623
624
	/* validator should have refused an empty string */
	assert( val->bv_len );

625
	p = val->bv_val;
626

627
	/* Ignore initial whitespace */
628
	/* All space is ASCII. All ASCII is 1 byte */
629
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
630

631
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
632
633
634
635
636
637

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

638
639
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
640
641
642
643

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
644
	s = NULL;
645

646
	while ( p < e ) {
647
648
649
650
651
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
652

653
			/* Ignore the extra whitespace */
654
655
			while ( ASCII_SPACE( *p ) ) {
				p++;
656
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
657
		} else {
658
659
660
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
661
		}
662
663
	}

664
	assert( normalized->bv_val <= p );
665
	assert( q+len <= p );
666

667
	/* cannot start with a space */
668
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
669
670
671
672
673
674
675
676

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
677
		len = q - s;
678
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
679
	}
680

681
	/* cannot end with a space */
682
683
684
	assert( !ASCII_SPACE( *q ) );

	q += len;
685
686
687
688

	/* null terminate */
	*q = '\0';

689
	normalized->bv_len = q - normalized->bv_val;
690

691
	return LDAP_SUCCESS;
692
693
}

694
/* Returns Unicode canonically normalized copy of a substring assertion
695
 * Skipping attribute description */
696
static SubstringsAssertion *
697
698
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
699
	unsigned casefold )
700
701
702
703
704
705
706
707
708
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

709
	if( sa->sa_initial.bv_val != NULL ) {
710
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
711
		if( nsa->sa_initial.bv_val == NULL ) {
712
713
714
715
716
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
717
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
718
719
			/* empty */
		}
720
721
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
722
723
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
724
			if( nsa->sa_any[i].bv_val == NULL ) {
725
726
727
				goto err;
			}
		}
728
		nsa->sa_any[i].bv_val = NULL;
729
730
	}

731
	if( sa->sa_final.bv_val != NULL ) {
732
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
733
		if( nsa->sa_final.bv_val == NULL ) {
734
735
736
737
738
739
740
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
741
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
742
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
743
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
744
745
746
747
	ch_free( nsa );
	return NULL;
}

748
#ifndef SLAPD_APPROX_OLDSINGLESTRING
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
767
768
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
769
770
	int i, count, len, nextchunk=0, nextavail=0;

771
	/* Yes, this is necessary */
772
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
773
	if( nval == NULL ) {
774
775
776
777
778
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
779
780
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
781
	if( assertv == NULL ) {
782
		ber_bvfree( nval );
783
784
785
		*matchp = 1;
		return LDAP_SUCCESS;
	}
786
787

	/* Isolate how many words there are */
788
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
789
790
791
792
793
794
795
796
797
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
798
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
799
800
801
802
		words[i] = c;
		values[i] = phonetic(c);
	}

803
	/* Work through the asserted value's words, to see if at least some
804
805
	   of the words are there, in the same order. */
	len = 0;
806
807
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
808
809
810
811
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
812
#if defined(SLAPD_APPROX_INITIALS)
813
		else if( len == 1 ) {
814
815
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
816
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
817
					nextavail=i+1;
818
					break;
819
				}
820
821
		}
#endif
822
		else {
823
			/* Isolate the next word in the asserted value and phonetic it */
824
825
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
826
827
828
829
830
831
832
833

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
834
			ch_free( val );
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
856
	ber_bvfree( assertv );
857
858
859
860
861
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
862
	ber_bvfree( nval );
863
864
865
866

	return LDAP_SUCCESS;
}

867
static int 
868
869
870
871
872
873
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
874
875
	BerVarray values,
	BerVarray *keysp )
876
{
877
	char *c;
878
	int i,j, len, wordcount, keycount=0;
879
	struct berval *newkeys;
880
	BerVarray keys=NULL;
881

882
	for( j=0; values[j].bv_val != NULL; j++ ) {
883
		struct berval val = { 0, NULL };
884
		/* Yes, this is necessary */
885
886
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
887

888
		/* Isolate how many words there are. There will be a key for each */
889
		for( wordcount = 0, c = val.bv_val; *c; c++) {
890
891
892
893
894
895
896
897
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
898
899
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
900
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
901
902
903
904
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
905
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
906
907
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
908
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
909
910
911
912
			keycount++;
			i++;
		}

913
		ber_memfree( val.bv_val );
914
	}
915
	keys[keycount].bv_val = NULL;
916
917
918
919
920
	*keysp = keys;

	return LDAP_SUCCESS;
}

921
static int 
922
923
924
925
926
927
928
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
929
	BerVarray *keysp )
930
{
931
	char *c;
932
	int i, count, len;
933
	struct berval *val;
934
	BerVarray keys;
935

936
	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
937
938
	val = UTF8bvnormalize( ((struct berval *)assertValue),
		NULL, LDAP_UTF8_APPROX );
939
	if( val == NULL || val->bv_val == NULL ) {
940
941
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
942
		*keysp = keys;
943
		ber_bvfree( val );
944
945
946
		return LDAP_SUCCESS;
	}

947
	/* Isolate how many words there are. There will be a key for each */
948
	for( count = 0,c = val->bv_val; *c; c++) {
949
950
951
952
953
954
955
956
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
957
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
958
959

	/* Get a phonetic copy of each word */
960
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
961
962
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
963
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
964
965
966
		i++;
	}

967
	ber_bvfree( val );
968

969
	keys[count].bv_val = NULL;
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
989
	char *s, *t;
990

991
	/* Yes, this is necessary */
992
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
993
994
995
996
997
998
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
999
	t = UTF8normalize( ((struct berval *)assertedValue),
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
1012
1013
1014
1015
1016
1017
1018
1019
1020

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

1021
static int 
1022
1023
1024
1025
1026
1027
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1028
1029
	BerVarray values,
	BerVarray *keysp )
1030
1031
{
	int i;
1032
	BerVarray *keys;
1033
	char *s;
1034

1035
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1036
		/* empty - just count them */
1037
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1038
1039

	/* we should have at least one value at this point */
1040
1041
	assert( i > 0 );

1042
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1043
1044

	/* Copy each value and run it through phonetic() */
1045
	for( i=0; values[i].bv_val != NULL; i++ ) {
1046
		/* Yes, this is necessary */
1047
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1048
1049

		/* strip 8-bit chars and run through phonetic() */
1050
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1051
		free( s );
1052
	}
1053
	keys[i].bv_val = NULL;
1054
1055
1056
1057
1058
1059

	*keysp = keys;
	return LDAP_SUCCESS;
}


1060
static int 
1061
1062
1063
1064
1065
1066
1067
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
1068
	BerVarray *keysp )
1069
{
1070
	BerVarray keys;
1071
	char *s;
1072

1073
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1074

1075
	/* Yes, this is necessary */
1076
	s = UTF8normalize( ((struct berval *)assertValue),
1077
1078
1079
1080
1081
1082
1083
1084
1085
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1086
1087
1088
1089
1090
1091
1092

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1093
static int
1094
caseExactMatch(
1095
	int *matchp,
1096
	slap_mask_t flags,
1097
1098
1099
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1100
	void *assertedValue )