schema_init.c 109 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

8
9
10
11
/****
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

12
13
14
15
16
17
18
19
20
21
22
23
24
Summary:
  StringSyntax		X.500	LDAP	Matching
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

  In LDPAv3, a directory string is a UTF-8 encoded UCS string.

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which would
  be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces and
  could be empty.  However, in X.500, all attribute values of numeric
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::= NumericString (SIZE(1..ub-international-isdn-number))

  Unfornately, some assertion values are don't carry the same constraint
  (but its unclear how such an assertion could ever be true). In LDAP,
  there is one syntax (numericString) not two (numericString with constraint,
  numericString without constraint).  This should be treated as numericString
  with non-empty constraint.  Note that while someone may have no
  ISDN number, there are no ISDN numbers which are zero length.
60
61
62
63
64

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters and
65
66
67
  can be empty.  In X.500, semantics much like NumericString (see serialNumber
  for a like example) excepting uses insignificant space handling instead of
  ignore all spaces.  
68
69

IA5String
70
71
  Basically same as PrintableString.  There are no examples in X.500, but
  same logic applies.  So we require them to be non-empty as well.
72
73
74
75

****/


76
77
78
#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
79
#include <limits.h>
80
81

#include <ac/ctype.h>
82
#include <ac/errno.h>
83
84
85
86
87
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
88
#include "lber_pvt.h"
89

90
91
#include "ldap_utf8.h"

92
93
94
95
96
97
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
98

99
/* recycled validatation routines */
100
#define berValidate						blobValidate
101
102

/* unimplemented pretters */
103
#define integerPretty					NULL
104
105

/* recycled matching routines */
106
#define bitStringMatch					octetStringMatch
107
108
109
#define numericStringMatch				caseIgnoreIA5Match
#define objectIdentifierMatch			caseIgnoreIA5Match
#define telephoneNumberMatch			caseIgnoreIA5Match
110
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
111
112
#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match
113
#define uniqueMemberMatch				dnMatch
114
#define integerFirstComponentMatch		integerMatch
115

116
117
/* approx matching rules */
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
118
119
120
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
121
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
122
#define IA5StringApproxMatch			approxMatch
123
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
124
#define IA5StringApproxFilter			approxFilter
125

126
/* ordering matching rules */
127
128
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseExactOrderingMatch			caseExactMatch
129
#define integerOrderingMatch			integerMatch
130

131
/* unimplemented matching routines */
132
133
134
135
#define caseIgnoreListMatch				NULL
#define caseIgnoreListSubstringsMatch	NULL
#define protocolInformationMatch		NULL

Kurt Zeilenga's avatar
Kurt Zeilenga committed
136
#ifdef SLAPD_ACI_ENABLED
137
#define OpenLDAPaciMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
138
139
#endif
#ifdef SLAPD_AUTHPASSWD
140
#define authPasswordMatch				NULL
Kurt Zeilenga's avatar
Kurt Zeilenga committed
141
#endif
142
143

/* recycled indexing/filtering routines */
144
145
#define dnIndexer				caseExactIgnoreIndexer
#define dnFilter				caseExactIgnoreFilter
146
147
#define bitStringFilter			octetStringFilter
#define bitStringIndexer		octetStringIndexer
148

149
150
151
152
153
#define telephoneNumberIndexer			caseIgnoreIA5Indexer
#define telephoneNumberFilter			caseIgnoreIA5Filter
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter

154
155
156
157
158
159
160
161
162
163
164
165
166
167
static MatchingRule *caseExactMatchingRule;
static MatchingRule *caseExactSubstringsMatchingRule;
static MatchingRule *integerFirstComponentMatchingRule;

static const struct MatchingRulePtr {
	const char   *oid;
	MatchingRule **mr;
} mr_ptr [] = {
	/* must match OIDs below */
	{ "2.5.13.5",  &caseExactMatchingRule },
	{ "2.5.13.7",  &caseExactSubstringsMatchingRule },
	{ "2.5.13.29", &integerFirstComponentMatchingRule }
};

168

169
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
170
{
171
	ber_len_t i;
172
173
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
174
175

	if( c == 0 ) return NULL;
176
177
178
179
180
181
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
182
	}
183
184

	return NULL;
185
}
186

187
188
189
static int
octetStringMatch(
	int *matchp,
190
	slap_mask_t flags,
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

/* Index generation function */
209
static int octetStringIndexer(
210
211
	slap_mask_t use,
	slap_mask_t flags,
212
213
214
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
215
216
	BerVarray values,
	BerVarray *keysp )
217
218
219
{
	int i;
	size_t slen, mlen;
220
	BerVarray keys;
221
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
222
	unsigned char	HASHdigest[HASH_BYTES];
223
	struct berval digest;
224
225
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
226

227
	for( i=0; values[i].bv_val != NULL; i++ ) {
228
229
230
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
231
232
233
	/* we should have at least one value at this point */
	assert( i > 0 );

234
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
235

236
237
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
238

239
	for( i=0; values[i].bv_val != NULL; i++ ) {
240
		HASH_Init( &HASHcontext );
241
		if( prefix != NULL && prefix->bv_len > 0 ) {
242
			HASH_Update( &HASHcontext,
243
244
				prefix->bv_val, prefix->bv_len );
		}
245
		HASH_Update( &HASHcontext,
246
			syntax->ssyn_oid, slen );
247
		HASH_Update( &HASHcontext,
248
			mr->smr_oid, mlen );
249
		HASH_Update( &HASHcontext,
250
			values[i].bv_val, values[i].bv_len );
251
		HASH_Final( HASHdigest, &HASHcontext );
252

253
		ber_dupbv( &keys[i], &digest );
254
255
	}

256
	keys[i].bv_val = NULL;
257
258
259
260
261
262
263

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
264
static int octetStringFilter(
265
266
	slap_mask_t use,
	slap_mask_t flags,
267
268
269
270
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
271
	BerVarray *keysp )
272
273
{
	size_t slen, mlen;
274
	BerVarray keys;
275
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
276
	unsigned char	HASHdigest[HASH_BYTES];
277
278
	struct berval *value = (struct berval *) assertValue;
	struct berval digest;
279
280
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
281

282
283
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
284

285
	keys = ch_malloc( sizeof( struct berval ) * 2 );
286

287
	HASH_Init( &HASHcontext );
288
	if( prefix != NULL && prefix->bv_len > 0 ) {
289
		HASH_Update( &HASHcontext,
290
291
			prefix->bv_val, prefix->bv_len );
	}
292
	HASH_Update( &HASHcontext,
293
		syntax->ssyn_oid, slen );
294
	HASH_Update( &HASHcontext,
295
		mr->smr_oid, mlen );
296
	HASH_Update( &HASHcontext,
297
		value->bv_val, value->bv_len );
298
	HASH_Final( HASHdigest, &HASHcontext );
299

300
301
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
302
303
304
305
306

	*keysp = keys;

	return LDAP_SUCCESS;
}
307

308
309
310
311
312
313
314
315
316
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
	return LDAP_OTHER;
}

317
static int
318
blobValidate(
319
320
321
322
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
323
	return LDAP_SUCCESS;
324
325
}

326
327
328
329
330
331
332
333
334
335
336
337
338
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
339

340
341
342
343
344
345
346
347
348
349
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
350
351
352
353
	{
		return LDAP_INVALID_SYNTAX;
	}

354
	for( i=in->bv_len-3; i>0; i-- ) {
355
356
357
358
359
360
361
362
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

363
364
365
366
static int
bitStringNormalize(
	Syntax *syntax,
	struct berval *val,
367
	struct berval *normalized )
368
369
{
	/*
370
	 * A normalized bitString is has no extaneous (leading) zero bits.
371
372
	 * That is, '00010'B is normalized to '10'B
	 * However, as a special case, '0'B requires no normalization.
373
	 */
374
375
376
377
378
379
380
381
382
383
	char *p;

	/* start at the first bit */
	p = &val->bv_val[1];

	/* Find the first non-zero bit */
	while ( *p == '0' ) p++;

	if( *p == '\'' ) {
		/* no non-zero bits */
384
		ber_str2bv( "\'0\'B", sizeof("\'0\'B") - 1, 1, normalized );
385
386
387
		goto done;
	}

388
	normalized->bv_val = ch_malloc( val->bv_len + 1 );
389

390
391
	normalized->bv_val[0] = '\'';
	normalized->bv_len = 1;
392
393

	for( ; *p != '\0'; p++ ) {
394
		normalized->bv_val[normalized->bv_len++] = *p;
395
396
	}

397
	normalized->bv_val[normalized->bv_len] = '\0';
398
399
400
401
402

done:
	return LDAP_SUCCESS;
}

403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

static int
nameUIDNormalize(
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
		struct berval uidin = { 0, NULL };
		struct berval uidout = { 0, NULL };

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
			uidin.bv_val = strrchr( out.bv_val, '#' );

			if( uidin.bv_val == NULL ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

			uidin.bv_len = out.bv_len - (uidin.bv_val - out.bv_val);
			out.bv_len -= uidin.bv_len--;

			/* temporarily trim the UID */
			*(uidin.bv_val++) = '\0';

			rc = bitStringNormalize( syntax, &uidin, &uidout );

			if( rc != LDAP_SUCCESS ) {
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}
		}

#ifdef USE_DN_NORMALIZE
		rc = dnNormalize2( NULL, &out, normalized );
#else
		rc = dnPretty2( NULL, &out, normalized );
#endif

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			free( uidout.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		if( uidout.bv_len ) {
			normalized->bv_val = ch_realloc( normalized->bv_val,
				normalized->bv_len + uidout.bv_len + sizeof("#") );

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
				uidout.bv_val, uidout.bv_len );
			normalized->bv_len += uidout.bv_len;

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
547
	slap_mask_t flags,
548
549
550
551
552
553
554
555
556
557
558
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

559
560
561
562
563
564
565
566
567
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

568
569
	if( !in->bv_len ) return LDAP_INVALID_SYNTAX;

570
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
571
		/* get the length indicated by the first byte */
572
		len = LDAP_UTF8_CHARLEN2( u, len );
573

Kurt Zeilenga's avatar
Kurt Zeilenga committed
574
575
576
		/* very basic checks */
		switch( len ) {
			case 6:
577
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
578
579
580
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
581
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
582
583
584
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
585
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
586
587
588
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
589
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
590
591
592
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
593
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
594
595
596
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
597
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
598
599
600
601
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
602
603
604

		/* make sure len corresponds with the offset
			to the next character */
605
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
606
607
	}

608
	if( count != 0 ) return LDAP_INVALID_SYNTAX;
609

610
	return LDAP_SUCCESS;
611
612
613
614
615
616
}

static int
UTF8StringNormalize(
	Syntax *syntax,
	struct berval *val,
617
	struct berval *normalized )
618
{
619
	char *p, *q, *s, *e;
620
	int len = 0;
621

622
	p = val->bv_val;
623

624
	/* Ignore initial whitespace */
625
	/* All space is ASCII. All ASCII is 1 byte */
626
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
627

628
629
630
	normalized->bv_len = val->bv_len - (p - val->bv_val);
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
631
632
633
634

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
635
	s = NULL;
636

637
	while ( p < e ) {
638
639
640
641
642
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
643

644
			/* Ignore the extra whitespace */
645
646
			while ( ASCII_SPACE( *p ) ) {
				p++;
647
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
648
		} else {
649
650
651
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
652
		}
653
654
	}

655
	assert( normalized->bv_val <= p );
656
	assert( q+len <= p );
657

658
	/* cannot start with a space */
659
	assert( !ASCII_SPACE(normalized->bv_val[0]) );
660
661
662
663
664
665
666
667

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
668
		len = q - s;
669
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
670
	}
671

672
	/* cannot end with a space */
673
674
675
	assert( !ASCII_SPACE( *q ) );

	q += len;
676
677
678
679

	/* null terminate */
	*q = '\0';

680
	normalized->bv_len = q - normalized->bv_val;
681

682
	return LDAP_SUCCESS;
683
684
}

685
/* Returns Unicode canonically normalized copy of a substring assertion
686
 * Skipping attribute description */
687
static SubstringsAssertion *
688
689
UTF8SubstringsassertionNormalize(
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
690
	unsigned casefold )
691
692
693
694
695
696
697
698
699
{
	SubstringsAssertion *nsa;
	int i;

	nsa = (SubstringsAssertion *)ch_calloc( 1, sizeof(SubstringsAssertion) );
	if( nsa == NULL ) {
		return NULL;
	}

700
	if( sa->sa_initial.bv_val != NULL ) {
701
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
702
		if( nsa->sa_initial.bv_val == NULL ) {
703
704
705
706
707
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
708
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
709
710
			/* empty */
		}
711
712
		nsa->sa_any = (struct berval *)ch_malloc( (i + 1) * sizeof(struct berval) );
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
713
714
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
					casefold );
715
			if( nsa->sa_any[i].bv_val == NULL ) {
716
717
718
				goto err;
			}
		}
719
		nsa->sa_any[i].bv_val = NULL;
720
721
	}

722
	if( sa->sa_final.bv_val != NULL ) {
723
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
724
		if( nsa->sa_final.bv_val == NULL ) {
725
726
727
728
729
730
731
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
732
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
733
	if ( nsa->sa_any )ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
734
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
735
736
737
738
	ch_free( nsa );
	return NULL;
}

739
#ifndef SLAPD_APPROX_OLDSINGLESTRING
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
758
759
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
760
761
	int i, count, len, nextchunk=0, nextavail=0;

762
	/* Yes, this is necessary */
763
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
764
	if( nval == NULL ) {
765
766
767
768
769
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
770
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue), NULL, LDAP_UTF8_APPROX );
771
	if( assertv == NULL ) {
772
		ber_bvfree( nval );
773
774
775
		*matchp = 1;
		return LDAP_SUCCESS;
	}
776
777

	/* Isolate how many words there are */
778
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
779
780
781
782
783
784
785
786
787
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
788
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
789
790
791
792
		words[i] = c;
		values[i] = phonetic(c);
	}

793
	/* Work through the asserted value's words, to see if at least some
794
795
	   of the words are there, in the same order. */
	len = 0;
796
797
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
798
799
800
801
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
802
#if defined(SLAPD_APPROX_INITIALS)
803
		else if( len == 1 ) {
804
805
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
806
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
807
					nextavail=i+1;
808
					break;
809
				}
810
811
		}
#endif
812
		else {
813
			/* Isolate the next word in the asserted value and phonetic it */
814
815
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
816
817
818
819
820
821
822
823

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
824
			ch_free( val );
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
846
	ber_bvfree( assertv );
847
848
849
850
851
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
852
	ber_bvfree( nval );
853
854
855
856

	return LDAP_SUCCESS;
}

857
static int 
858
859
860
861
862
863
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
864
865
	BerVarray values,
	BerVarray *keysp )
866
{
867
	char *c;
868
	int i,j, len, wordcount, keycount=0;
869
	struct berval *newkeys;
870
	BerVarray keys=NULL;
871

872
	for( j=0; values[j].bv_val != NULL; j++ ) {
873
		struct berval val = { 0, NULL };
874
		/* Yes, this is necessary */
875
876
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
877

878
		/* Isolate how many words there are. There will be a key for each */
879
		for( wordcount = 0, c = val.bv_val; *c; c++) {
880
881
882
883
884
885
886
887
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
888
889
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
890
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
891
892
893
894
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
895
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
896
897
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
898
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
899
900
901
902
			keycount++;
			i++;
		}

903
		ber_memfree( val.bv_val );
904
	}
905
	keys[keycount].bv_val = NULL;
906
907
908
909
910
	*keysp = keys;

	return LDAP_SUCCESS;
}

911
static int 
912
913
914
915
916
917
918
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
919
	BerVarray *keysp )
920
{
921
	char *c;
922
	int i, count, len;
923
	struct berval *val;
924
	BerVarray keys;
925

926
	/* Yes, this is necessary */
927
928
	val = UTF8bvnormalize( ((struct berval *)assertValue), NULL, LDAP_UTF8_APPROX );
	if( val == NULL || val->bv_val == NULL ) {
929
930
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
931
		*keysp = keys;
932
		ber_bvfree( val );
933
934
935
		return LDAP_SUCCESS;
	}

936
	/* Isolate how many words there are. There will be a key for each */
937
	for( count = 0,c = val->bv_val; *c; c++) {
938
939
940
941
942
943
944
945
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
946
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
947
948

	/* Get a phonetic copy of each word */
949
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
950
951
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
952
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
953
954
955
		i++;
	}

956
	ber_bvfree( val );
957

958
	keys[count].bv_val = NULL;
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
978
	char *s, *t;
979

980
	/* Yes, this is necessary */
981
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
982
983
984
985
986
987
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
988
	t = UTF8normalize( ((struct berval *)assertedValue),
989
990
991
992
993
994
995
996
997
998
999
1000
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
1001
1002
1003
1004
1005
1006
1007
1008
1009

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

1010
static int 
1011
1012
1013
1014
1015
1016
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1017
1018
	BerVarray values,
	BerVarray *keysp )
1019
1020
{
	int i;
1021
	BerVarray *keys;
1022
	char *s;
1023

1024
	for( i=0; values[i].bv_val != NULL; i++ ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1025
		/* empty - just count them */
1026
	}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1027
1028

	/* we should have at least one value at this point */
1029
1030
	assert( i > 0 );

1031
	keys = (struct berval *)ch_malloc( sizeof( struct berval ) * (i+1) );
1032
1033

	/* Copy each value and run it through phonetic() */
1034
	for( i=0; values[i].bv_val != NULL; i++ ) {
1035
		/* Yes, this is necessary */
1036
		s = UTF8normalize( &values[i], UTF8_NOCASEFOLD );
1037
1038

		/* strip 8-bit chars and run through phonetic() */
1039
		ber_str2bv( phonetic( strip8bitChars( s ) ), 0, 0, &keys[i] );
1040
		free( s );
1041
	}
1042
	keys[i].bv_val = NULL;
1043
1044
1045
1046
1047
1048

	*keysp = keys;
	return LDAP_SUCCESS;
}


1049
static int 
1050
1051
1052
1053
1054
1055
1056
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
	void * assertValue,
1057
	BerVarray *keysp )
1058
{
1059
	BerVarray keys;
1060
	char *s;
1061

1062
	keys = (struct berval *)ch_malloc( sizeof( struct berval * ) * 2 );
1063

1064
	/* Yes, this is necessary */
1065
	s = UTF8normalize( ((struct berval *)assertValue),
1066
1067
1068
1069
1070
1071
1072
1073
1074
			     UTF8_NOCASEFOLD );
	if( s == NULL ) {
		keys[0] = NULL;
	} else {
		/* strip 8-bit chars and run through phonetic() */
		keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) );
		free( s );
		keys[1] = NULL;
	}
1075
1076
1077
1078
1079
1080
1081

	*keysp = keys;
	return LDAP_SUCCESS;
}
#endif


1082
static int
1083
caseExactMatch(
1084
	int *matchp,
1085
	slap_mask_t flags,
1086
1087
1088
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
1089
	void *assertedValue )
1090
{
1091
1092
1093
	*matchp = UTF8bvnormcmp( value,
		(struct berval *) assertedValue,
		LDAP_UTF8_NOCASEFOLD );
1094
	return LDAP_SUCCESS;
1095
1096
}

1097
static int
1098
caseExactIgnoreSubstringsMatch(
1099
	int *matchp,
1100
	slap_mask_t flags,
1101
1102
1103
1104
1105
1106
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = 0;
Pierangelo Masarati's avatar
Pierangelo Masarati committed
1107