schema_init.c 118 KB
Newer Older
1
2
3
/* schema_init.c - init builtin schema */
/* $OpenLDAP$ */
/*
Kurt Zeilenga's avatar
Kurt Zeilenga committed
4
 * Copyright 1998-2003 The OpenLDAP Foundation, All Rights Reserved.
5
6
7
8
9
10
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */

#include "portable.h"

#include <stdio.h>
Kurt Zeilenga's avatar
Kurt Zeilenga committed
11
#include <limits.h>
12
13

#include <ac/ctype.h>
14
#include <ac/errno.h>
15
16
17
18
19
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"
#include "ldap_pvt.h"
Pierangelo Masarati's avatar
Pierangelo Masarati committed
20
#include "lber_pvt.h"
21

22
23
#include "ldap_utf8.h"

24
25
26
27
28
29
#include "lutil_hash.h"
#define HASH_BYTES				LUTIL_HASH_BYTES
#define HASH_CONTEXT			lutil_HASH_CTX
#define HASH_Init(c)			lutil_HASHInit(c)
#define HASH_Update(c,buf,len)	lutil_HASHUpdate(c,buf,len)
#define HASH_Final(d,c)			lutil_HASHFinal(d,c)
30

Kurt Zeilenga's avatar
Kurt Zeilenga committed
31
#ifdef SLAP_NVALUES
32
33
34
/* TO BE DELETED */
#define SLAP_MR_DN_FOLD (0)

35
36
37
#define SLAP_MR_ASSOCIATED(mr, with) \
	((mr) == (with) || (mr)->smr_associated == (with))

38
39
40
41
42
43
44
45
#define xUTF8StringNormalize NULL
#define xIA5StringNormalize NULL
#define xtelephoneNumberNormalize NULL
#define xgeneralizedTimeNormalize NULL
#define xintegerNormalize NULL
#define xnumericStringNormalize NULL
#define xnameUIDNormalize NULL

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* (new) normalization routines */
#define caseExactIA5Normalize						IA5StringNormalize
#define caseIgnoreIA5Normalize						IA5StringNormalize
#define caseExactNormalize							UTF8StringNormalize
#define caseIgnoreNormalize							UTF8StringNormalize

#define distinguishedNameNormalize					NULL
#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define uniqueMemberNormalize						NULL
#define bitStringNormalize							NULL
#define telephoneNumberNormalize					NULL

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#define distinguishedNameMatch  	dnMatch
#define distinguishedNameIndexer	octetStringIndexer
#define distinguishedNameFilter		octetStringFilter

#define uniqueMemberMatch				dnMatch

#define objectIdentifierMatch	octetStringMatch
#define objectIdentifierIndexer	octetStringIndexer
#define objectIdentifierFilter	octetStringFilter

#define bitStringMatch			octetStringMatch
#define bitStringIndexer		octetStringIndexer
#define bitStringFilter			octetStringFilter

#define integerMatch NULL
#define integerOrderingMatch NULL
#define integerIndexer NULL
#define integerFilter NULL

#define generalizedTimeMatch	NULL
#define generalizedTimeOrderingMatch	NULL

#define caseIgnoreMatch		octetStringMatch
#define caseIgnoreOrderingMatch		octetStringOrderingMatch
#define caseIgnoreIndexer	octetStringIndexer
#define caseIgnoreFilter	octetStringFilter

#define caseIgnoreSubstringsMatch		NULL
#define caseIgnoreSubstringsIndexer		NULL
#define caseIgnoreSubstringsFilter		NULL

#define caseExactMatch		octetStringMatch
#define caseExactOrderingMatch		octetStringOrderingMatch
#define caseExactIndexer	octetStringIndexer
#define caseExactFilter		octetStringFilter

#define caseExactSubstringsMatch		NULL
#define caseExactSubstringsIndexer		NULL
#define caseExactSubstringsFilter		NULL

#define caseExactIA5Match		octetStringMatch
#define caseExactIA5Indexer		octetStringIndexer
#define caseExactIA5Filter		octetStringFilter

#define caseExactIA5SubstringsMatch			NULL
#define caseExactIA5SubstringsIndexer		NULL
#define caseExactIA5SubstringsFilter		NULL

#define caseIgnoreIA5Match		octetStringMatch
#define caseIgnoreIA5Indexer	octetStringIndexer
#define caseIgnoreIA5Filter		octetStringFilter

#define caseIgnoreIA5SubstringsMatch		caseExactIA5SubstringsMatch
#define caseIgnoreIA5SubstringsIndexer		caseExactIA5SubstringsIndexer
#define caseIgnoreIA5SubstringsFilter		caseExactIA5SubstringsFilter

#define numericStringMatch		octetStringMatch
#define numericStringIndexer	octetStringIndexer
#define numericStringFilter		octetStringFilter

#define numericStringSubstringsMatch		caseExactIA5SubstringsMatch
#define numericStringSubstringsIndexer		caseExactIA5SubstringsIndexer
#define numericStringSubstringsFilter		caseExactIA5SubstringsFilter

#define telephoneNumberMatch		octetStringMatch
#define telephoneNumberIndexer		octetStringIndexer
#define telephoneNumberFilter		octetStringFilter

#define telephoneNumberSubstringsMatch		caseExactIA5SubstringsMatch
#define telephoneNumberSubstringsIndexer	caseExactIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseExactIA5SubstringsFilter

Kurt Zeilenga's avatar
Kurt Zeilenga committed
135
136
#endif

137
/* validatation routines */
138
#define berValidate						blobValidate
139

140
/* approx matching rules */
141
142
143
144
#ifdef SLAP_NVALUES
#define directoryStringApproxMatchOID	NULL
#define IA5StringApproxMatchOID			NULL
#else
145
#define directoryStringApproxMatchOID	"1.3.6.1.4.1.4203.666.4.4"
Gary Williams's avatar
Gary Williams committed
146
147
148
#define directoryStringApproxMatch	approxMatch
#define directoryStringApproxIndexer	approxIndexer
#define directoryStringApproxFilter	approxFilter
149
#define IA5StringApproxMatchOID			"1.3.6.1.4.1.4203.666.4.5"
Gary Williams's avatar
Gary Williams committed
150
#define IA5StringApproxMatch			approxMatch
151
#define IA5StringApproxIndexer			approxIndexer
Gary Williams's avatar
Gary Williams committed
152
#define IA5StringApproxFilter			approxFilter
153
#endif
154

155
156
#ifndef SLAP_NVALUES

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* (new) normalization routines */
#define caseExactNormalize							NULL
#define caseExactIA5Normalize						NULL
#define caseIgnoreNormalize							NULL
#define caseIgnoreIA5Normalize						NULL
#define distinguishedNameNormalize					NULL
#define integerNormalize							NULL
#define integerFirstComponentNormalize				NULL
#define numericStringNormalize						NULL
#define objectIdentifierNormalize					NULL
#define objectIdentifierFirstComponentNormalize		NULL
#define generalizedTimeNormalize					NULL
#define uniqueMemberNormalize						NULL
#define bitStringNormalize							NULL
#define telephoneNumberNormalize					NULL


174
175
176
177
/* matching routines */
#define bitStringMatch					octetStringMatch
#define bitStringIndexer				octetStringIndexer
#define bitStringFilter					octetStringFilter
178

179
180
181
182
183
#define numericStringMatch				caseIgnoreIA5Match
#define numericStringIndexer			NULL
#define numericStringFilter				NULL
#define numericStringSubstringsIndexer	NULL
#define numericStringSubstringsFilter	NULL
184

185
186
187
188
189
190
191
192
193
#define objectIdentifierMatch			octetStringMatch
#define objectIdentifierIndexer			caseIgnoreIA5Indexer
#define objectIdentifierFilter			caseIgnoreIA5Filter

#define generalizedTimeMatch			caseIgnoreIA5Match
#define generalizedTimeOrderingMatch	caseIgnoreIA5Match

#define uniqueMemberMatch				dnMatch
#define numericStringSubstringsMatch    NULL
194

195
196
197
198
199
200
201
202
203
204
205
206
#define caseExactIndexer				caseExactIgnoreIndexer
#define caseExactFilter					caseExactIgnoreFilter
#define caseExactOrderingMatch			caseExactMatch
#define caseExactSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseExactSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseExactSubstringsFilter		caseExactIgnoreSubstringsFilter
#define caseIgnoreIndexer				caseExactIgnoreIndexer
#define caseIgnoreFilter				caseExactIgnoreFilter
#define caseIgnoreOrderingMatch			caseIgnoreMatch
#define caseIgnoreSubstringsMatch		caseExactIgnoreSubstringsMatch
#define caseIgnoreSubstringsIndexer		caseExactIgnoreSubstringsIndexer
#define caseIgnoreSubstringsFilter		caseExactIgnoreSubstringsFilter
207

208
209
210
211
212
213
214
215
216
217
218
#define integerOrderingMatch			integerMatch
#define integerFirstComponentMatch		integerMatch

#define distinguishedNameMatch			dnMatch
#define distinguishedNameIndexer		caseExactIgnoreIndexer
#define distinguishedNameFilter			caseExactIgnoreFilter

#define telephoneNumberMatch			caseIgnoreIA5Match
#define telephoneNumberSubstringsMatch	caseIgnoreIA5SubstringsMatch
#define telephoneNumberIndexer				caseIgnoreIA5Indexer
#define telephoneNumberFilter				caseIgnoreIA5Filter
219
220
#define telephoneNumberSubstringsIndexer	caseIgnoreIA5SubstringsIndexer
#define telephoneNumberSubstringsFilter		caseIgnoreIA5SubstringsFilter
221
#endif
222

223

224
static char *bvcasechr( struct berval *bv, unsigned char c, ber_len_t *len )
225
{
226
	ber_len_t i;
227
228
	char lower = TOLOWER( c );
	char upper = TOUPPER( c );
229
230

	if( c == 0 ) return NULL;
231
232
233
234
235
236
	
	for( i=0; i < bv->bv_len; i++ ) {
		if( upper == bv->bv_val[i] || lower == bv->bv_val[i] ) {
			*len = i;
			return &bv->bv_val[i];
		}
237
	}
238
239

	return NULL;
240
}
241

242
243
244
static int
octetStringMatch(
	int *matchp,
245
	slap_mask_t flags,
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	int match = value->bv_len - ((struct berval *) assertedValue)->bv_len;

	if( match == 0 ) {
		match = memcmp( value->bv_val,
			((struct berval *) assertedValue)->bv_val,
			value->bv_len );
	}

	*matchp = match;
	return LDAP_SUCCESS;
}

263
264
265
266
267
268
269
270
271
272
273
static int
octetStringOrderingMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	ber_len_t v_len  = value->bv_len;
	ber_len_t av_len = ((struct berval *) assertedValue)->bv_len;
274

275
276
277
	int match = memcmp( value->bv_val,
		((struct berval *) assertedValue)->bv_val,
		(v_len < av_len ? v_len : av_len) );
278
279
280

	if( match == 0 ) match = v_len - av_len;

281
282
283
284
	*matchp = match;
	return LDAP_SUCCESS;
}

285
/* Index generation function */
286
int octetStringIndexer(
287
288
	slap_mask_t use,
	slap_mask_t flags,
289
290
291
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
292
293
	BerVarray values,
	BerVarray *keysp )
294
295
296
{
	int i;
	size_t slen, mlen;
297
	BerVarray keys;
298
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
299
	unsigned char	HASHdigest[HASH_BYTES];
300
	struct berval digest;
301
302
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
303

304
	for( i=0; values[i].bv_val != NULL; i++ ) {
305
306
307
		/* just count them */
	}

Kurt Zeilenga's avatar
Kurt Zeilenga committed
308
309
310
	/* we should have at least one value at this point */
	assert( i > 0 );

311
	keys = ch_malloc( sizeof( struct berval ) * (i+1) );
312

313
314
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
315

316
	for( i=0; values[i].bv_val != NULL; i++ ) {
317
		HASH_Init( &HASHcontext );
318
		if( prefix != NULL && prefix->bv_len > 0 ) {
319
			HASH_Update( &HASHcontext,
320
321
				prefix->bv_val, prefix->bv_len );
		}
322
		HASH_Update( &HASHcontext,
323
			syntax->ssyn_oid, slen );
324
		HASH_Update( &HASHcontext,
325
			mr->smr_oid, mlen );
326
		HASH_Update( &HASHcontext,
327
			values[i].bv_val, values[i].bv_len );
328
		HASH_Final( HASHdigest, &HASHcontext );
329

330
		ber_dupbv( &keys[i], &digest );
331
332
	}

333
	keys[i].bv_val = NULL;
334
	keys[i].bv_len = 0;
335
336
337
338
339
340
341

	*keysp = keys;

	return LDAP_SUCCESS;
}

/* Index generation function */
342
int octetStringFilter(
343
344
	slap_mask_t use,
	slap_mask_t flags,
345
346
347
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
348
	void * assertedValue,
349
	BerVarray *keysp )
350
351
{
	size_t slen, mlen;
352
	BerVarray keys;
353
	HASH_CONTEXT   HASHcontext;
Gary Williams's avatar
Gary Williams committed
354
	unsigned char	HASHdigest[HASH_BYTES];
355
	struct berval *value = (struct berval *) assertedValue;
356
	struct berval digest;
357
358
	digest.bv_val = HASHdigest;
	digest.bv_len = sizeof(HASHdigest);
359

360
361
	slen = syntax->ssyn_oidlen;
	mlen = mr->smr_oidlen;
362

363
	keys = ch_malloc( sizeof( struct berval ) * 2 );
364

365
	HASH_Init( &HASHcontext );
366
	if( prefix != NULL && prefix->bv_len > 0 ) {
367
		HASH_Update( &HASHcontext,
368
369
			prefix->bv_val, prefix->bv_len );
	}
370
	HASH_Update( &HASHcontext,
371
		syntax->ssyn_oid, slen );
372
	HASH_Update( &HASHcontext,
373
		mr->smr_oid, mlen );
374
	HASH_Update( &HASHcontext,
375
		value->bv_val, value->bv_len );
376
	HASH_Final( HASHdigest, &HASHcontext );
377

378
379
	ber_dupbv( keys, &digest );
	keys[1].bv_val = NULL;
380
	keys[1].bv_len = 0;
381
382
383
384
385

	*keysp = keys;

	return LDAP_SUCCESS;
}
386

387
388
389
390
391
static int
inValidate(
	Syntax *syntax,
	struct berval *in )
{
Kurt Zeilenga's avatar
Kurt Zeilenga committed
392
393
	/* no value allowed */
	return LDAP_INVALID_SYNTAX;
394
395
}

396
static int
397
blobValidate(
398
399
400
401
	Syntax *syntax,
	struct berval *in )
{
	/* any value allowed */
402
	return LDAP_SUCCESS;
403
404
}

405
406
407
408
409
410
411
412
413
414
415
416
417
static int
bitStringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t i;

	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */
	if( in->bv_len < 3 ) {
		return LDAP_INVALID_SYNTAX;
	}
418

419
420
421
422
423
424
425
426
427
428
	/*
	 * rfc 2252 section 6.3 Bit String
	 * bitstring = "'" *binary-digit "'"
	 * binary-digit = "0" / "1"
	 * example: '0101111101'B
	 */
	
	if( in->bv_val[0] != '\'' ||
		in->bv_val[in->bv_len-2] != '\'' ||
		in->bv_val[in->bv_len-1] != 'B' )
429
430
431
432
	{
		return LDAP_INVALID_SYNTAX;
	}

433
	for( i=in->bv_len-3; i>0; i-- ) {
434
435
436
437
438
439
440
441
		if( in->bv_val[i] != '0' && in->bv_val[i] != '1' ) {
			return LDAP_INVALID_SYNTAX;
		}
	}

	return LDAP_SUCCESS;
}

442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
static int
nameUIDValidate(
	Syntax *syntax,
	struct berval *in )
{
	int rc;
	struct berval dn;

	if( in->bv_len == 0 ) return LDAP_SUCCESS;

	ber_dupbv( &dn, in );
	if( !dn.bv_val ) return LDAP_OTHER;

	if( dn.bv_val[dn.bv_len-1] == 'B'
		&& dn.bv_val[dn.bv_len-2] == '\'' )
	{
		/* assume presence of optional UID */
		ber_len_t i;

		for(i=dn.bv_len-3; i>1; i--) {
			if( dn.bv_val[i] != '0' &&	dn.bv_val[i] != '1' ) {
				break;
			}
		}
		if( dn.bv_val[i] != '\'' || dn.bv_val[i-1] != '#' ) {
			ber_memfree( dn.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

		/* trim the UID to allow use of dnValidate */
		dn.bv_val[i-1] = '\0';
		dn.bv_len = i-1;
	}

	rc = dnValidate( NULL, &dn );

	ber_memfree( dn.bv_val );
	return rc;
}

482
483
#ifndef SLAP_NVALUES

484
static int
485
xnameUIDNormalize(
486
487
488
489
490
491
492
493
494
	Syntax *syntax,
	struct berval *val,
	struct berval *normalized )
{
	struct berval out;
	int rc;

	ber_dupbv( &out, val );
	if( out.bv_len != 0 ) {
495
		struct berval uid = { 0, NULL };
496
497
498
499
500

		if( out.bv_val[out.bv_len-1] == 'B'
			&& out.bv_val[out.bv_len-2] == '\'' )
		{
			/* assume presence of optional UID */
501
			uid.bv_val = strrchr( out.bv_val, '#' );
502

503
			if( uid.bv_val == NULL ) {
504
505
506
507
				free( out.bv_val );
				return LDAP_INVALID_SYNTAX;
			}

508
509
			uid.bv_len = out.bv_len - (uid.bv_val - out.bv_val);
			out.bv_len -= uid.bv_len--;
510
511

			/* temporarily trim the UID */
512
			*(uid.bv_val++) = '\0';
513
514
515
516
517
518
519
520
521
		}

		rc = dnNormalize2( NULL, &out, normalized );

		if( rc != LDAP_SUCCESS ) {
			free( out.bv_val );
			return LDAP_INVALID_SYNTAX;
		}

522
		if( uid.bv_len ) {
523
			normalized->bv_val = ch_realloc( normalized->bv_val,
524
				normalized->bv_len + uid.bv_len + sizeof("#") );
525
526
527
528
529
530

			/* insert the separator */
			normalized->bv_val[normalized->bv_len++] = '#';

			/* append the UID */
			AC_MEMCPY( &normalized->bv_val[normalized->bv_len],
531
532
				uid.bv_val, uid.bv_len );
			normalized->bv_len += uid.bv_len;
533
534
535
536
537
538
539
540
541
542
543

			/* terminate */
			normalized->bv_val[normalized->bv_len] = '\0';
		}

		free( out.bv_val );
	}

	return LDAP_SUCCESS;
}

544
#endif
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
/*
 * Handling boolean syntax and matching is quite rigid.
 * A more flexible approach would be to allow a variety
 * of strings to be normalized and prettied into TRUE
 * and FALSE.
 */
static int
booleanValidate(
	Syntax *syntax,
	struct berval *in )
{
	/* very unforgiving validation, requires no normalization
	 * before simplistic matching
	 */

	if( in->bv_len == 4 ) {
		if( !memcmp( in->bv_val, "TRUE", 4 ) ) {
			return LDAP_SUCCESS;
		}
	} else if( in->bv_len == 5 ) {
		if( !memcmp( in->bv_val, "FALSE", 5 ) ) {
			return LDAP_SUCCESS;
		}
	}

	return LDAP_INVALID_SYNTAX;
}

static int
booleanMatch(
	int *matchp,
576
	slap_mask_t flags,
577
578
579
580
581
582
583
584
585
586
587
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	/* simplistic matching allowed by rigid validation */
	struct berval *asserted = (struct berval *) assertedValue;
	*matchp = value->bv_len != asserted->bv_len;
	return LDAP_SUCCESS;
}

588
589
590
591
592
/*-------------------------------------------------------------------
LDAP/X.500 string syntax / matching rules have a few oddities.  This
comment attempts to detail how slapd(8) treats them.

Summary:
593
  StringSyntax		X.500	LDAP	Matching/Comments
594
595
  DirectoryString	CHOICE	UTF8	i/e + ignore insignificant spaces
  PrintableString	subset	subset	i/e + ignore insignificant spaces
596
  PrintableString	subset	subset	i/e + ignore insignificant spaces
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
  NumericString		subset	subset  ignore all spaces
  IA5String			ASCII	ASCII	i/e + ignore insignificant spaces
  TeletexString		T.61	T.61	i/e + ignore insignificant spaces

  TelephoneNumber subset  subset  i + ignore all spaces and "-"

  See draft-ietf-ldapbis-strpro for details (once published).


Directory String -
  In X.500(93), a directory string can be either a PrintableString,
  a bmpString, or a UniversalString (e.g., UCS (a subset of Unicode)).
  In later versions, more CHOICEs were added.  In all cases the string
  must be non-empty.

612
  In LDAPv3, a directory string is a UTF-8 encoded UCS string.
613
  A directory string cannot be zero length.
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659

  For matching, there are both case ignore and exact rules.  Both
  also require that "insignificant" spaces be ignored.
	spaces before the first non-space are ignored;
	spaces after the last non-space are ignored;
	spaces after a space are ignored.
  Note: by these rules (and as clarified in X.520), a string of only
  spaces is to be treated as if held one space, not empty (which
  would be a syntax error).

NumericString
  In ASN.1, numeric string is just a string of digits and spaces
  and could be empty.  However, in X.500, all attribute values of
  numeric string carry a non-empty constraint.  For example:

	internationalISDNNumber ATTRIBUTE ::= {
		WITH SYNTAX InternationalISDNNumber
		EQUALITY MATCHING RULE numericStringMatch
		SUBSTRINGS MATCHING RULE numericStringSubstringsMatch
		ID id-at-internationalISDNNumber }
	InternationalISDNNumber ::=
	    NumericString (SIZE(1..ub-international-isdn-number))

  Unforunately, some assertion values are don't carry the same
  constraint (but its unclear how such an assertion could ever
  be true). In LDAP, there is one syntax (numericString) not two
  (numericString with constraint, numericString without constraint).
  This should be treated as numericString with non-empty constraint.
  Note that while someone may have no ISDN number, there are no ISDN
  numbers which are zero length.

  In matching, spaces are ignored.

PrintableString
  In ASN.1, Printable string is just a string of printable characters
  and can be empty.  In X.500, semantics much like NumericString (see
  serialNumber for a like example) excepting uses insignificant space
  handling instead of ignore all spaces.  

IA5String
  Basically same as PrintableString.  There are no examples in X.500,
  but same logic applies.  So we require them to be non-empty as
  well.

-------------------------------------------------------------------*/

660
661
662
663
664
665
666
667
668
static int
UTF8StringValidate(
	Syntax *syntax,
	struct berval *in )
{
	ber_len_t count;
	int len;
	unsigned char *u = in->bv_val;

669
670
671
672
	if( in->bv_len == 0 && syntax == slap_schema.si_syn_directoryString ) {
		/* directory strings cannot be empty */
		return LDAP_INVALID_SYNTAX;
	}
673

674
	for( count = in->bv_len; count > 0; count-=len, u+=len ) {
675
		/* get the length indicated by the first byte */
676
		len = LDAP_UTF8_CHARLEN2( u, len );
677

Kurt Zeilenga's avatar
Kurt Zeilenga committed
678
679
680
		/* very basic checks */
		switch( len ) {
			case 6:
681
				if( (u[5] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
682
683
684
					return LDAP_INVALID_SYNTAX;
				}
			case 5:
685
				if( (u[4] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
686
687
688
					return LDAP_INVALID_SYNTAX;
				}
			case 4:
689
				if( (u[3] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
690
691
692
					return LDAP_INVALID_SYNTAX;
				}
			case 3:
693
				if( (u[2] & 0xC0 )!= 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
694
695
696
					return LDAP_INVALID_SYNTAX;
				}
			case 2:
697
				if( (u[1] & 0xC0) != 0x80 ) {
Kurt Zeilenga's avatar
Kurt Zeilenga committed
698
699
700
					return LDAP_INVALID_SYNTAX;
				}
			case 1:
701
				/* CHARLEN already validated it */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
702
703
704
705
				break;
			default:
				return LDAP_INVALID_SYNTAX;
		}
706
707
708

		/* make sure len corresponds with the offset
			to the next character */
709
		if( LDAP_UTF8_OFFSET( u ) != len ) return LDAP_INVALID_SYNTAX;
710
711
	}

712
713
714
	if( count != 0 ) {
		return LDAP_INVALID_SYNTAX;
	}
715

716
	return LDAP_SUCCESS;
717
718
}

719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
#ifdef SLAP_NVALUES
static int
UTF8StringNormalize(
	slap_mask_t use,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *val,
	struct berval *normalized )
{
	struct berval tmp, nvalue;
	int flags;
	int i, wasspace;

	if( val->bv_val == NULL ) {
		/* assume we're dealing with a syntax (e.g., UTF8String)
		 * which allows empty strings
		 */
		normalized->bv_len = 0;
		normalized->bv_val = NULL;
		return LDAP_SUCCESS;
	}

	flags = SLAP_MR_ASSOCIATED(mr, slap_schema.si_mr_caseExactMatch )
		? LDAP_UTF8_CASEFOLD : LDAP_UTF8_NOCASEFOLD;
	flags |= ( use & SLAP_MR_EQUALITY_APPROX == SLAP_MR_EQUALITY_APPROX )
		? LDAP_UTF8_APPROX : 0;

	val = UTF8bvnormalize( val, &tmp, flags );
	if( val == NULL ) {
		return LDAP_OTHER;
	}
	
	/* collapse spaces (in place) */
	nvalue.bv_len = 0;
	nvalue.bv_val = tmp.bv_val;

	wasspace=1; /* trim leading spaces */
	for( i=0; i<tmp.bv_len; i++) {
		if ( ASCII_SPACE( tmp.bv_val[i] )) {
			if( wasspace++ == 0 ) {
				/* trim repeated spaces */
				nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
			}
		} else {
			wasspace = 0;
			nvalue.bv_val[nvalue.bv_len++] = tmp.bv_val[i];
		}
	}

	if( nvalue.bv_len ) {
		if( wasspace ) {
			/* last character was a space, trim it */
			--nvalue.bv_len;
		}
		nvalue.bv_val[nvalue.bv_len] = '\0';

	} else {
		/* string of all spaces is treated as one space */
		nvalue.bv_val[0] = ' ';
		nvalue.bv_val[1] = '\0';
		nvalue.bv_len = 1;
	}

	return LDAP_SUCCESS;
}
#else
785

786
static int
787
xUTF8StringNormalize(
788
789
	Syntax *syntax,
	struct berval *val,
790
	struct berval *normalized )
791
{
792
	char *p, *q, *s, *e;
793
	int len = 0;
794

Kurt Zeilenga's avatar
Kurt Zeilenga committed
795
796
797
	/* validator should have refused an empty string */
	assert( val->bv_len );

798
	p = val->bv_val;
799

800
	/* Ignore initial whitespace */
801
	/* All space is ASCII. All ASCII is 1 byte */
802
	for ( ; p < val->bv_val + val->bv_len && ASCII_SPACE( p[ 0 ] ); p++ );
803

804
	normalized->bv_len = val->bv_len - (p - val->bv_val);
Kurt Zeilenga's avatar
Kurt Zeilenga committed
805
806
807
808
809
810

	if( !normalized->bv_len ) {
		ber_mem2bv( " ", 1, 1, normalized );
		return LDAP_SUCCESS;
	}

811
812
	ber_mem2bv( p, normalized->bv_len, 1, normalized );
	e = normalized->bv_val + normalized->bv_len;
813
814
815
816

	assert( normalized->bv_val );

	p = q = normalized->bv_val;
817
	s = NULL;
818

819
	while ( p < e ) {
820
821
822
823
824
		q += len;
		if ( ASCII_SPACE( *p ) ) {
			s = q - len;
			len = 1;
			*q = *p++;
825

826
			/* Ignore the extra whitespace */
827
828
			while ( ASCII_SPACE( *p ) ) {
				p++;
829
			}
Kurt Zeilenga's avatar
Kurt Zeilenga committed
830
		} else {
831
832
833
			len = LDAP_UTF8_COPY(q,p);
			s=NULL;
			p+=len;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
834
		}
835
836
	}

837
	assert( normalized->bv_val <= p );
838
	assert( q+len <= p );
839

840
	/* cannot start with a space */
841
	assert( !ASCII_SPACE( normalized->bv_val[0] ) );
842
843
844
845
846
847
848
849

	/*
	 * If the string ended in space, backup the pointer one
	 * position.  One is enough because the above loop collapsed
	 * all whitespace to a single space.
	 */

	if ( s != NULL ) {
Howard Chu's avatar
Howard Chu committed
850
		len = q - s;
851
		q = s;
Kurt Zeilenga's avatar
Kurt Zeilenga committed
852
	}
853

854
	/* cannot end with a space */
855
856
857
	assert( !ASCII_SPACE( *q ) );

	q += len;
858
859
860
861

	/* null terminate */
	*q = '\0';

862
	normalized->bv_len = q - normalized->bv_val;
863

864
	return LDAP_SUCCESS;
865
866
}

867
/* Returns Unicode canonically normalized copy of a substring assertion
868
 * Skipping attribute description */
869
static SubstringsAssertion *
870
UTF8SubstringsAssertionNormalize(
871
	SubstringsAssertion *sa,
Kurt Zeilenga's avatar
Kurt Zeilenga committed
872
	unsigned casefold )
873
874
875
876
{
	SubstringsAssertion *nsa;
	int i;

Julius Enarusai's avatar
   
Julius Enarusai committed
877
	nsa = (SubstringsAssertion *)SLAP_CALLOC( 1, sizeof(SubstringsAssertion) );
878
879
880
881
	if( nsa == NULL ) {
		return NULL;
	}

882
	if( sa->sa_initial.bv_val != NULL ) {
883
		UTF8bvnormalize( &sa->sa_initial, &nsa->sa_initial, casefold );
884
		if( nsa->sa_initial.bv_val == NULL ) {
885
886
887
888
889
			goto err;
		}
	}

	if( sa->sa_any != NULL ) {
890
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
891
892
			/* empty */
		}
893
		nsa->sa_any = (struct berval *)
Julius Enarusai's avatar
   
Julius Enarusai committed
894
895
896
897
			SLAP_MALLOC( (i + 1) * sizeof(struct berval) );
		if( nsa->sa_any == NULL ) {
				goto err;
		}
898

899
		for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) {
900
			UTF8bvnormalize( &sa->sa_any[i], &nsa->sa_any[i], 
901
				casefold );
902
			if( nsa->sa_any[i].bv_val == NULL ) {
903
904
905
				goto err;
			}
		}
906
		nsa->sa_any[i].bv_val = NULL;
907
908
	}

909
	if( sa->sa_final.bv_val != NULL ) {
910
		UTF8bvnormalize( &sa->sa_final, &nsa->sa_final, casefold );
911
		if( nsa->sa_final.bv_val == NULL ) {
912
913
914
915
916
917
918
			goto err;
		}
	}

	return nsa;

err:
Howard Chu's avatar
Howard Chu committed
919
	if ( nsa->sa_final.bv_val ) free( nsa->sa_final.bv_val );
920
	if ( nsa->sa_any ) ber_bvarray_free( nsa->sa_any );
Howard Chu's avatar
Howard Chu committed
921
	if ( nsa->sa_initial.bv_val ) free( nsa->sa_initial.bv_val );
922
923
924
925
	ch_free( nsa );
	return NULL;
}

926
#ifndef SLAPD_APPROX_OLDSINGLESTRING
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944

#if defined(SLAPD_APPROX_INITIALS)
#define SLAPD_APPROX_DELIMITER "._ "
#define SLAPD_APPROX_WORDLEN 2
#else
#define SLAPD_APPROX_DELIMITER " "
#define SLAPD_APPROX_WORDLEN 1
#endif

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
945
946
	struct berval *nval, *assertv;
	char *val, **values, **words, *c;
947
948
	int i, count, len, nextchunk=0, nextavail=0;

949
	/* Yes, this is necessary */
950
	nval = UTF8bvnormalize( value, NULL, LDAP_UTF8_APPROX );
951
	if( nval == NULL ) {
952
953
954
955
956
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
Kurt Zeilenga's avatar
Kurt Zeilenga committed
957
958
	assertv = UTF8bvnormalize( ((struct berval *)assertedValue),
		NULL, LDAP_UTF8_APPROX );
959
	if( assertv == NULL ) {
960
		ber_bvfree( nval );
961
962
963
		*matchp = 1;
		return LDAP_SUCCESS;
	}
964
965

	/* Isolate how many words there are */
966
	for ( c = nval->bv_val, count = 1; *c; c++ ) {
967
968
969
970
971
972
973
974
975
		c = strpbrk( c, SLAPD_APPROX_DELIMITER );
		if ( c == NULL ) break;
		*c = '\0';
		count++;
	}

	/* Get a phonetic copy of each word */
	words = (char **)ch_malloc( count * sizeof(char *) );
	values = (char **)ch_malloc( count * sizeof(char *) );
976
	for ( c = nval->bv_val, i = 0;  i < count; i++, c += strlen(c) + 1 ) {
977
978
979
980
		words[i] = c;
		values[i] = phonetic(c);
	}

981
	/* Work through the asserted value's words, to see if at least some
982
983
	   of the words are there, in the same order. */
	len = 0;
984
985
	while ( (ber_len_t) nextchunk < assertv->bv_len ) {
		len = strcspn( assertv->bv_val + nextchunk, SLAPD_APPROX_DELIMITER);
986
987
988
989
		if( len == 0 ) {
			nextchunk++;
			continue;
		}
990
#if defined(SLAPD_APPROX_INITIALS)
991
		else if( len == 1 ) {
992
993
			/* Single letter words need to at least match one word's initial */
			for( i=nextavail; i<count; i++ )
994
				if( !strncasecmp( assertv->bv_val + nextchunk, words[i], 1 )) {
995
					nextavail=i+1;
996
					break;
997
				}
998
999
		}
#endif
1000
		else {
1001
			/* Isolate the next word in the asserted value and phonetic it */
1002
1003
			assertv->bv_val[nextchunk+len] = '\0';
			val = phonetic( assertv->bv_val + nextchunk );
1004
1005
1006
1007
1008
1009
1010
1011

			/* See if this phonetic chunk is in the remaining words of *value */
			for( i=nextavail; i<count; i++ ){
				if( !strcmp( val, values[i] ) ){
					nextavail = i+1;
					break;
				}
			}
1012
			ch_free( val );
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
		}

		/* This chunk in the asserted value was NOT within the *value. */
		if( i >= count ) {
			nextavail=-1;
			break;
		}

		/* Go on to the next word in the asserted value */
		nextchunk += len+1;
	}

	/* If some of the words were seen, call it a match */
	if( nextavail > 0 ) {
		*matchp = 0;
	}
	else {
		*matchp = 1;
	}

	/* Cleanup allocs */
1034
	ber_bvfree( assertv );
1035
1036
1037
1038
1039
	for( i=0; i<count; i++ ) {
		ch_free( values[i] );
	}
	ch_free( values );
	ch_free( words );
1040
	ber_bvfree( nval );
1041
1042
1043
1044

	return LDAP_SUCCESS;
}

1045
static int 
1046
1047
1048
1049
1050
1051
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1052
1053
	BerVarray values,
	BerVarray *keysp )
1054
{
1055
	char *c;
1056
	int i,j, len, wordcount, keycount=0;
1057
	struct berval *newkeys;
1058
	BerVarray keys=NULL;
1059

1060
	for( j=0; values[j].bv_val != NULL; j++ ) {
1061
		struct berval val = { 0, NULL };
1062
		/* Yes, this is necessary */
1063
1064
		UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX );
		assert( val.bv_val != NULL );
1065

1066
		/* Isolate how many words there are. There will be a key for each */
1067
		for( wordcount = 0, c = val.bv_val; *c; c++) {
1068
1069
1070
1071
1072
1073
1074
1075
			len = strcspn(c, SLAPD_APPROX_DELIMITER);
			if( len >= SLAPD_APPROX_WORDLEN ) wordcount++;
			c+= len;
			if (*c == '\0') break;
			*c = '\0';
		}

		/* Allocate/increase storage to account for new keys */
1076
1077
		newkeys = (struct berval *)ch_malloc( (keycount + wordcount + 1) 
			* sizeof(struct berval) );
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1078
		AC_MEMCPY( newkeys, keys, keycount * sizeof(struct berval) );
1079
1080
1081
1082
		if( keys ) ch_free( keys );
		keys = newkeys;

		/* Get a phonetic copy of each word */
1083
		for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
1084
1085
			len = strlen( c );
			if( len < SLAPD_APPROX_WORDLEN ) continue;
1086
			ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
1087
1088
1089
1090
			keycount++;
			i++;
		}

1091
		ber_memfree( val.bv_val );
1092
	}
1093
	keys[keycount].bv_val = NULL;
1094
1095
1096
1097
1098
	*keysp = keys;

	return LDAP_SUCCESS;
}

1099
static int 
1100
1101
1102
1103
1104
1105
approxFilter(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1106
	void * assertedValue,
1107
	BerVarray *keysp )
1108
{
1109
	char *c;
1110
	int i, count, len;
1111
	struct berval *val;
1112
	BerVarray keys;
1113

1114
	/* Yes, this is necessary */
1115
	val = UTF8bvnormalize( ((struct berval *)assertedValue),
Kurt Zeilenga's avatar
Kurt Zeilenga committed
1116
		NULL, LDAP_UTF8_APPROX );
1117
	if( val == NULL || val->bv_val == NULL ) {
1118
1119
		keys = (struct berval *)ch_malloc( sizeof(struct berval) );
		keys[0].bv_val = NULL;
1120
		*keysp = keys;
1121
		ber_bvfree( val );
1122
1123
1124
		return LDAP_SUCCESS;
	}

1125
	/* Isolate how many words there are. There will be a key for each */
1126
	for( count = 0,c = val->bv_val; *c; c++) {
1127
1128
1129
1130
1131
1132
1133
1134
		len = strcspn(c, SLAPD_APPROX_DELIMITER);
		if( len >= SLAPD_APPROX_WORDLEN ) count++;
		c+= len;
		if (*c == '\0') break;
		*c = '\0';
	}

	/* Allocate storage for new keys */
1135
	keys = (struct berval *)ch_malloc( (count + 1) * sizeof(struct berval) );
1136
1137

	/* Get a phonetic copy of each word */
1138
	for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
1139
1140
		len = strlen(c);
		if( len < SLAPD_APPROX_WORDLEN ) continue;
1141
		ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
1142
1143
1144
		i++;
	}

1145
	ber_bvfree( val );
1146

1147
	keys[count].bv_val = NULL;
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
	*keysp = keys;

	return LDAP_SUCCESS;
}


#else
/* No other form of Approximate Matching is defined */

static int
approxMatch(
	int *matchp,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *value,
	void *assertedValue )
{
	char *vapprox, *avapprox;
1167
	char *s, *t;
1168

1169
	/* Yes, this is necessary */
1170
	s = UTF8normalize( value, UTF8_NOCASEFOLD );
1171
1172
1173
1174
1175
1176
	if( s == NULL ) {
		*matchp = 1;
		return LDAP_SUCCESS;
	}

	/* Yes, this is necessary */
1177
	t = UTF8normalize( ((struct berval *)assertedValue),
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
			   UTF8_NOCASEFOLD );
	if( t == NULL ) {
		free( s );
		*matchp = -1;
		return LDAP_SUCCESS;
	}

	vapprox = phonetic( strip8bitChars( s ) );
	avapprox = phonetic( strip8bitChars( t ) );

	free( s );
	free( t );
1190
1191
1192
1193
1194
1195
1196
1197
1198

	*matchp = strcmp( vapprox, avapprox );

	ch_free( vapprox );
	ch_free( avapprox );

	return LDAP_SUCCESS;
}

1199
static int 
1200
1201
1202
1203
1204
1205
approxIndexer(
	slap_mask_t use,
	slap_mask_t flags,
	Syntax *syntax,
	MatchingRule *mr,
	struct berval *prefix,
1206
1207
	BerVarray values,
	BerVarray *keysp )
1208
1209
{
	int i;
1210