schema_init.c 110 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

8
9
10
11
/****
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

12
13
14
15
16
17
18
19
20
21
22
23
24
Summary:
  StringSyntax		X.500	LDAP	Matching
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

  In LDPAv3, a directory string is a UTF-8 encoded UCS string.

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which would
  be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces and
  could be empty.  However, in X.500, all attribute values of numeric
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::= NumericString (SIZE(1..ub-international-isdn-number))

  Unfornately, some assertion values are don't carry the same constraint
  (but its unclear how such an assertion could ever be true). In LDAP,
  there is one syntax (numericString) not two (numericString with constraint,
  numericString without constraint).  This should be treated as numericString
  with non-empty constraint.  Note that while someone may have no
  ISDN number, there are no ISDN numbers which are zero length.
60
61
62
63
64

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters and
65
66
67
  can be empty.  In X.500, semantics much like NumericString (see serialNumber
  for a like example) excepting uses insignificant space handling instead of
  ignore all spaces.  
68
69

IA5String
70
71
  Basically same as PrintableString.  There are no examples in X.500, but
  same logic applies.  So we require them to be non-empty as well.
72
73
74
75

****/


76
77
78
#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
79
#include <limits.h>
80
81

#include <ac/ctype.h>
82
#include <ac/errno.h>
83
84
85
86
87
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
88
#include "lber_pvt.h"
89

90
91
#include "ldap_utf8.h"

92
93
94
95
96
97
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
98

99
/* recycled validatation routines */
100
#define berValidate						blobValidate
101
102

/* unimplemented pretters */
103
#define integerPretty					NULL
104
105

/* recycled matching routines */
106
#define bitStringMatch					octetStringMatch
107
108
109
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
110
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
111
112
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
113
#define uniqueMemberMatch				dnMatch
114
#define integerFirstComponentMatch		integerMatch
115

116
117
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
118
119
120
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
121
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
122
#define IA5StringApproxMatch			approxMatch
123
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
124
#define IA5StringApproxFilter			approxFilter
125

126
/* ordering matching rules */
127
128
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
129
#define integerOrderingMatch			integerMatch
130

131
/* unimplemented matching routines */
132
133
134
135
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
136
#ifdef SLAPD_ACI_ENABLED
137
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
138
139
#endif
#ifdef SLAPD_AUTHPASSWD
140
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
141
#endif
142
143

/* recycled indexing/filtering routines */
144
145
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
146
147
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
148

149
150
151
152
153
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

154
155
156
157
158
159
160
161
162
163
164
165
166
167
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

168

169
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
170
{
171
	ber_len_t i;
172
173
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
174
175

	if( c == 0 ) return NULL;
176
177
178
179
180
181
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
182
	}
183
184

	return NULL;
185
}
186

187
188
189
static int
octetStringMatch(
	int *matchp,
190
	slap_mask_t flags,
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
209
static int octetStringIndexer(
210
211
	slap_mask_t use,
	slap_mask_t flags,
212
213
214
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
215
216
	BerVarray values,
	BerVarray *keysp )
217
218
219
{
	int i;
	size_t slen, mlen;
220
	BerVarray keys;
221
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
222
	unsigned char	HASHdigest[HASH_BYTES];
223
	struct berval digest;
224
225
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
226

227
	for( i=0; values[i].bv_val != NULL; i++ ) {
228
229
230
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
231
232
233
	/* we should have at least one value at this point */
	assert( i > 0 );

234
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
235

236
237
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
238

239
	for( i=0; values[i].bv_val != NULL; i++ ) {
240
		HASH_Init( &HASHcontext );
241
		if( prefix != NULL && prefix->bv_len > 0 ) {
242
			HASH_Update( &HASHcontext,
243
244
				prefix->bv_val, prefix->bv_len );
		}
245
		HASH_Update( &HASHcontext,
246
			syntax->ssyn_oid, slen );
247
		HASH_Update( &HASHcontext,
248
			mr->smr_oid, mlen );
249
		HASH_Update( &HASHcontext,
250
			values[i].bv_val, values[i].bv_len );
251
		HASH_Final( HASHdigest, &HASHcontext );
252

253
		ber_dupbv( &keys[i], &digest );
254
255
	}

256
	keys[i].bv_val = NULL;
257
258
259
260
261
262
263

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
264
static int octetStringFilter(
265
266
	slap_mask_t use,
	slap_mask_t flags,
267
268
269
270
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
271
	BerVarray *keysp )
272
273
{
	size_t slen, mlen;
274
	BerVarray keys;
275
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
276
	unsigned char	HASHdigest[HASH_BYTES];
277
278
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
279
280
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
281

282
283
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
284

285
	keys = ch_malloc( sizeof( struct berval ) * 2 );
286

287
	HASH_Init( &HASHcontext );
288
	if( prefix != NULL && prefix->bv_len > 0 ) {
289
		HASH_Update( &HASHcontext,
290
291
			prefix->bv_val, prefix->bv_len );
	}
292
	HASH_Update( &HASHcontext,
293
		syntax->ssyn_oid, slen );
294
	HASH_Update( &HASHcontext,
295
		mr->smr_oid, mlen );
296
	HASH_Update( &HASHcontext,
297
		value->bv_val, value->bv_len );
298
	HASH_Final( HASHdigest, &HASHcontext );
299

300
301
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
302
303
304
305
306

	*keysp = keys;

	return LDAP_SUCCESS;
}
307

308
309
310
311
312
313
314
315
316
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

317
static int
318
blobValidate(
319
320
321
322
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
323
	return LDAP_SUCCESS;
324
325
}

326
327
328
329
330
331
332
333
334
335
336
337
338
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
339

340
341
342
343
344
345
346
347
348
349
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
350
351
352
353
	{
		return LDAP_INVALID_SYNTAX;
	}

354
	for( i=in->bv_len-3; i>0; i-- ) {
355
356
357
358
359
360
361
362
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

363
364
365
366
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
367
	struct berval *normalized )
368
369
{
	/*
370
	 * A normalized bitString is has no extaneous (leading) zero bits.
371
372
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
373
	 */
374
375
376
377
378
379
380
381
382
383
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
384
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
385
386
387
		goto done;
	}

388
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
389

390
391
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
392
393

	for( ; *p != '\0'; p++ ) {
394
		normalized->bv_val[normalized->bv_len++] = *p;
395
396
	}

397
	normalized->bv_val[normalized->bv_len] = '\0';
398
399
400
401
402

done:
	return LDAP_SUCCESS;
}

403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
		struct berval uidin = { 0, NULL };
		struct berval uidout = { 0, NULL };

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
			uidin.bv_val = strrchr( out.bv_val, '#' );

			if( uidin.bv_val == NULL ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

			uidin.bv_len = out.bv_len - (uidin.bv_val - out.bv_val);
			out.bv_len -= uidin.bv_len--;

			/* temporarily trim the UID */
			*(uidin.bv_val++) = '\0';

			rc = bitStringNormalize( syntax, &uidin, &uidout );

			if( rc != LDAP_SUCCESS ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}
		}

#ifdef USE_DN_NORMALIZE
		rc = dnNormalize2( NULL, &out, normalized );
#else
		rc = dnPretty2( NULL, &out, normalized );
#endif

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			free( uidout.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		if( uidout.bv_len ) {
			normalized->bv_val = ch_realloc( normalized->bv_val,
				normalized->bv_len + uidout.bv_len + sizeof("#") );

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
				uidout.bv_val, uidout.bv_len );
			normalized->bv_len += uidout.bv_len;

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
547
	slap_mask_t flags,
548
549
550
551
552
553
554
555
556
557
558
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

559
560
561
562
563
564
565
566
567
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

568
569
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

570
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
571
		/* get the length indicated by the first byte */
572
		len = LDAP_UTF8_CHARLEN2( u, len );
573

Kurt Zeilenga's avatar
Kurt Zeilenga committed
574
575
576
		/* very basic checks */
		switch( len ) {
			case 6:
577
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
578
579
580
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
581
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
582
583
584
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
585
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
586
587
588
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
589
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
590
591
592
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
593
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
594
595
596
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
597
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
598
599
600
601
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
602
603
604

		/* make sure len corresponds with the offset
			to the next character */
605
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
606
607
	}

608
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
609

610
	return LDAP_SUCCESS;
611
612
613
614
615
616
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
617
	struct berval *normalized )
618
{
619
	char *p, *q, *s, *e;
620
	int len = 0;
621

Kurt Zeilenga's avatar
Kurt Zeilenga committed
622
623
624
	/* validator should have refused an empty string */
	assert( val->bv_len );

625
	p = val->bv_val;
626

627
	/* Ignore initial whitespace */
628
	/* All space is ASCII. All ASCII is 1 byte */
629
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
630

631
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
632
633
634
635
636
637

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

638
639
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
640
641
642
643

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
644
	s = NULL;
645

646
	while ( p < e ) {
647
648
649
650
651
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
652

653
			/* Ignore the extra whitespace */
654
655
			while ( ASCII_SPACE( *p ) ) {
				p++;
656
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
657
		} else {
658
659
660
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
661
		}
662
663
	}

664
	assert( normalized->bv_val <= p );
665
	assert( q+len <= p );
666

667
	/* cannot start with a space */
668
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
669
670
671
672
673
674
675
676

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
677
		len = q - s;
678
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
679
	}
680

681
	/* cannot end with a space */
682
683
684
	assert( !ASCII_SPACE( *q ) );

	q += len;
685
686
687
688

	/* null terminate */
	*q = '\0';

689
	normalized->bv_len = q - normalized->bv_val;
690

691
	return LDAP_SUCCESS;
692
693
}

694
/* Returns Unicode canonically normalized copy of a substring assertion
695
 * Skipping attribute description */
696
static SubstringsAssertion *
697
698
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
699
	unsigned casefold )
700
701
702
703
704
705
706
707
708
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

709
	if( sa->sa_initial.bv_val != NULL ) {
710
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
711
		if( nsa->sa_initial.bv_val == NULL ) {
712
713
714
715
716
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
717
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
718
719
			/* empty */
		}
720
721
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
722
723
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
724
			if( nsa->sa_any[i].bv_val == NULL ) {
725
726
727
				goto err;
			}
		}
728
		nsa->sa_any[i].bv_val = NULL;
729
730
	}

731
	if( sa->sa_final.bv_val != NULL ) {
732
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
733
		if( nsa->sa_final.bv_val == NULL ) {
734
735
736
737
738
739
740
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
741
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
742
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
743
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
744
745
746
747
	ch_free( nsa );
	return NULL;
}

748
#ifndef SLAPD_APPROX_OLDSINGLESTRING
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
767
768
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
769
770
	int i, count, len, nextchunk=0, nextavail=0;

771
	/* Yes, this is necessary */
772
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
773
	if( nval == NULL ) {
774
775
776
777
778
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
779
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue), NULL, LDAP_UTF8_APPROX );
780
	if( assertv == NULL ) {
781
		ber_bvfree( nval );
782
783
784
		*matchp = 1;
		return LDAP_SUCCESS;
	}
785
786

	/* Isolate how many words there are */
787
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
788
789
790
791
792
793
794
795
796
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
797
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
798
799
800
801
		words[i] = c;
		values[i] = phonetic(c);
	}

802
	/* Work through the asserted value's words, to see if at least some
803
804
	   of the words are there, in the same order. */
	len = 0;
805
806
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
807
808
809
810
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
811
#if defined(SLAPD_APPROX_INITIALS)
812
		else if( len == 1 ) {
813
814
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
815
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
816
					nextavail=i+1;
817
					break;
818
				}
819
820
		}
#endif
821
		else {
822
			/* Isolate the next word in the asserted value and phonetic it */
823
824
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
825
826
827
828
829
830
831
832

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
833
			ch_free( val );
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
855
	ber_bvfree( assertv );
856
857
858
859
860
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
861
	ber_bvfree( nval );
862
863
864
865

	return LDAP_SUCCESS;
}

866
static int 
867
868
869
870
871
872
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
873
874
	BerVarray values,
	BerVarray *keysp )
875
{
876
	char *c;
877
	int i,j, len, wordcount, keycount=0;
878
	struct berval *newkeys;
879
	BerVarray keys=NULL;
880

881
	for( j=0; values[j].bv_val != NULL; j++ ) {
882
		struct berval val = { 0, NULL };
883
		/* Yes, this is necessary */
884
885
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
886

887
		/* Isolate how many words there are. There will be a key for each */
888
		for( wordcount = 0, c = val.bv_val; *c; c++) {
889
890
891
892
893
894
895
896
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
897
898
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
899
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
900
901
902
903
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
904
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
905
906
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
907
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
908
909
910
911
			keycount++;
			i++;
		}

912
		ber_memfree( val.bv_val );
913
	}
914
	keys[keycount].bv_val = NULL;
915
916
917
918
919
	*keysp = keys;

	return LDAP_SUCCESS;
}

920
static int 
921
922
923
924
925
926
927
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
928
	BerVarray *keysp )
929
{
930
	char *c;
931
	int i, count, len;
932
	struct berval *val;
933
	BerVarray keys;
934

935
	/* Yes, this is necessary */
936
937
	val = UTF8bvnormalize( ((struct berval *)assertValue), NULL, LDAP_UTF8_APPROX );
	if( val == NULL || val->bv_val == NULL ) {
938
939
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
940
		*keysp = keys;
941
		ber_bvfree( val );
942
943
944
		return LDAP_SUCCESS;
	}

945
	/* Isolate how many words there are. There will be a key for each */
946
	for( count = 0,c = val->bv_val; *c; c++) {
947
948
949
950
951
952
953
954
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
955
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
956
957

	/* Get a phonetic copy of each word */
958
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
959
960
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
961
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
962
963
964
		i++;
	}

965
	ber_bvfree( val );
966

967
	keys[count].bv_val = NULL;
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
987
	char *s, *t;
988

989
	/* Yes, this is necessary */
990
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
991
992
993
994
995
996
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
997
	t = UTF8normalize( ((struct berval *)assertedValue),
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
1010
1011
1012
1013
1014
1015
1016
1017
1018

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

1019
static int 
1020
1021
1022
1023
1024
1025
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1026
1027
	BerVarray values,
	BerVarray *keysp )
1028
1029
{
	int i;
1030
	BerVarray *keys;
1031
	char *s;
1032

1033
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1034
		/* empty - just count them */
1035
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1036
1037

	/* we should have at least one value at this point */
1038
1039
	assert( i > 0 );

1040
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1041
1042

	/* Copy each value and run it through phonetic() */
1043
	for( i=0; values[i].bv_val != NULL; i++ ) {
1044
		/* Yes, this is necessary */
1045
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1046
1047

		/* strip 8-bit chars and run through phonetic() */
1048
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1049
		free( s );
1050
	}
1051
	keys[i].bv_val = NULL;
1052
1053
1054
1055
1056
1057

	*keysp = keys;
	return LDAP_SUCCESS;
}


1058
static int 
1059
1060
1061
1062
1063
1064
1065
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
1066
	BerVarray *keysp )
1067
{
1068
	BerVarray keys;
1069
	char *s;
1070

1071
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1072

1073
	/* Yes, this is necessary */
1074
	s = UTF8normalize( ((struct berval *)assertValue),
1075
1076
1077
1078
1079
1080
1081
1082
1083
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1084
1085
1086
1087
1088
1089
1090

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1091
static int
1092
caseExactMatch(
1093
	int *matchp,
1094
	slap_mask_t flags,
1095
1096
1097
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1098
	void *assertedValue )
1099
{
1100
1101
1102
	*matchp = UTF8bvnormcmp( value,
		(struct berval *) assertedValue,
		LDAP_UTF8_NOCASEFOLD );
1103
	return LDAP_SUCCESS;
1104
1105
}

1106
static int
1107
caseExactIgnoreSubstringsMatch(