cache.c 37 KB
Newer Older
1
2
/* cache.c - routines to maintain an in-core cache of entries */
/* $OpenLDAP$ */
Kurt Zeilenga's avatar
Notices    
Kurt Zeilenga committed
3
4
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
 *
Kurt Zeilenga's avatar
Kurt Zeilenga committed
5
 * Copyright 2000-2008 The OpenLDAP Foundation.
Kurt Zeilenga's avatar
Notices    
Kurt Zeilenga committed
6
7
8
9
10
11
12
13
14
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.
 *
 * A copy of this license is available in the file LICENSE in the
 * top-level directory of the distribution or, alternatively, at
 * <http://www.OpenLDAP.org/license.html>.
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 */

#include "portable.h"

#include <stdio.h>

#include <ac/errno.h>
#include <ac/string.h>
#include <ac/socket.h>

#include "slap.h"

#include "back-bdb.h"

29
30
#include "ldap_rq.h"

Howard Chu's avatar
Howard Chu committed
31
#ifdef BDB_HIER
32
#define bdb_cache_lru_purge	hdb_cache_lru_purge
Howard Chu's avatar
Howard Chu committed
33
#endif
34
static void bdb_cache_lru_purge( struct bdb_info *bdb );
Howard Chu's avatar
Howard Chu committed
35
36

static int	bdb_cache_delete_internal(Cache *cache, EntryInfo *e, int decr);
37
#ifdef LDAP_DEBUG
38
#define SLAPD_UNUSED
Pierangelo Masarati's avatar
Pierangelo Masarati committed
39
#ifdef SLAPD_UNUSED
40
static void	bdb_lru_print(Cache *cache);
41
#endif
Pierangelo Masarati's avatar
Pierangelo Masarati committed
42
#endif
43

44
45
46
47
48
49
50
51
52
53
54
55
/* For concurrency experiments only! */
#if 0
#define	ldap_pvt_thread_rdwr_wlock(a)	0
#define	ldap_pvt_thread_rdwr_wunlock(a)	0
#define	ldap_pvt_thread_rdwr_rlock(a)	0
#define	ldap_pvt_thread_rdwr_runlock(a)	0
#endif

#if 0
#define ldap_pvt_thread_mutex_trylock(a) 0
#endif

Howard Chu's avatar
Howard Chu committed
56
static EntryInfo *
Howard Chu's avatar
Howard Chu committed
57
bdb_cache_entryinfo_new( Cache *cache )
58
{
Howard Chu's avatar
Howard Chu committed
59
	EntryInfo *ei = NULL;
60

Howard Chu's avatar
Howard Chu committed
61
	if ( cache->c_eifree ) {
62
		ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex );
Howard Chu's avatar
Howard Chu committed
63
64
65
		if ( cache->c_eifree ) {
			ei = cache->c_eifree;
			cache->c_eifree = ei->bei_lrunext;
66
			ei->bei_finders = 0;
Howard Chu's avatar
Howard Chu committed
67
		}
68
		ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex );
Howard Chu's avatar
Howard Chu committed
69
	}
70
71
	if ( !ei ) {
		ei = ch_calloc(1, sizeof(EntryInfo));
Howard Chu's avatar
Howard Chu committed
72
73
		ldap_pvt_thread_mutex_init( &ei->bei_kids_mutex );
	}
74

75
76
	ei->bei_state = CACHE_ENTRY_REFERENCED;

Howard Chu's avatar
Howard Chu committed
77
	return ei;
78
79
}

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
static void
bdb_cache_entryinfo_free( Cache *cache, EntryInfo *ei )
{
	free( ei->bei_nrdn.bv_val );
	ei->bei_nrdn.bv_val = NULL;
#ifdef BDB_HIER
	free( ei->bei_rdn.bv_val );
	ei->bei_rdn.bv_val = NULL;
	ei->bei_modrdns = 0;
	ei->bei_ckids = 0;
	ei->bei_dkids = 0;
#endif
	ei->bei_parent = NULL;
	ei->bei_kids = NULL;
	ei->bei_lruprev = NULL;

	ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex );
	ei->bei_lrunext = cache->c_eifree;
	cache->c_eifree = ei;
	ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex );
}

102
103
104
105
106
107
108
109
#define LRU_DEL( c, e ) do { \
	if ( e == (c)->c_lruhead ) (c)->c_lruhead = e->bei_lruprev; \
	if ( e == (c)->c_lrutail ) (c)->c_lrutail = e->bei_lruprev; \
	e->bei_lrunext->bei_lruprev = e->bei_lruprev; \
	e->bei_lruprev->bei_lrunext = e->bei_lrunext; \
	e->bei_lruprev = NULL; \
} while ( 0 )

110
111
112
113
114
115
116
117
118
/* Note - we now use a Second-Chance / Clock algorithm instead of
 * Least-Recently-Used. This tremendously improves concurrency
 * because we no longer need to manipulate the lists every time an
 * entry is touched. We only need to lock the lists when adding
 * or deleting an entry. It's now a circular doubly-linked list.
 * We always append to the tail, but the head traverses the circle
 * during a purge operation.
 */
static void
Howard Chu's avatar
Howard Chu committed
119
bdb_cache_lru_link( struct bdb_info *bdb, EntryInfo *ei )
120
{
121

122
123
124
125
	/* Already linked, ignore */
	if ( ei->bei_lruprev )
		return;

126
	/* Insert into circular LRU list */
Howard Chu's avatar
Howard Chu committed
127
	ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex );
128

Howard Chu's avatar
Howard Chu committed
129
130
131
132
	ei->bei_lruprev = bdb->bi_cache.c_lrutail;
	if ( bdb->bi_cache.c_lrutail ) {
		ei->bei_lrunext = bdb->bi_cache.c_lrutail->bei_lrunext;
		bdb->bi_cache.c_lrutail->bei_lrunext = ei;
133
134
135
136
		if ( ei->bei_lrunext )
			ei->bei_lrunext->bei_lruprev = ei;
	} else {
		ei->bei_lrunext = ei->bei_lruprev = ei;
Howard Chu's avatar
Howard Chu committed
137
		bdb->bi_cache.c_lruhead = ei;
138
	}
Howard Chu's avatar
Howard Chu committed
139
140
	bdb->bi_cache.c_lrutail = ei;
	ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex );
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
}

#ifdef NO_THREADS
#define NO_DB_LOCK
#endif

/* #define NO_DB_LOCK 1 */
/* Note: The BerkeleyDB locks are much slower than regular
 * mutexes or rdwr locks. But the BDB implementation has the
 * advantage of using a fixed size lock table, instead of
 * allocating a lock object per entry in the DB. That's a
 * key benefit for scaling. It also frees us from worrying
 * about undetectable deadlocks between BDB activity and our
 * own cache activity. It's still worth exploring faster
 * alternatives though.
 */

Howard Chu's avatar
Howard Chu committed
158
/* Atomically release and reacquire a lock */
Jong Hyuk Choi's avatar
Jong Hyuk Choi committed
159
int
Howard Chu's avatar
Howard Chu committed
160
bdb_cache_entry_db_relock(
Howard Chu's avatar
Howard Chu committed
161
	struct bdb_info *bdb,
162
	BDB_LOCKER locker,
Howard Chu's avatar
Howard Chu committed
163
164
165
166
	EntryInfo *ei,
	int rw,
	int tryOnly,
	DB_LOCK *lock )
167
{
168
#ifdef NO_DB_LOCK
Howard Chu's avatar
Howard Chu committed
169
	return 0;
170
#else
Howard Chu's avatar
Howard Chu committed
171
172
173
	int	rc;
	DBT	lockobj;
	DB_LOCKREQ list[2];
174

Howard Chu's avatar
Howard Chu committed
175
176
	if ( !lock ) return 0;

177
	lockobj.data = &ei->bei_id;
178
	lockobj.size = sizeof(ei->bei_id) + 1;
179

Howard Chu's avatar
Howard Chu committed
180
181
182
183
184
185
	list[0].op = DB_LOCK_PUT;
	list[0].lock = *lock;
	list[1].op = DB_LOCK_GET;
	list[1].lock = *lock;
	list[1].mode = rw ? DB_LOCK_WRITE : DB_LOCK_READ;
	list[1].obj = &lockobj;
186
	rc = bdb->bi_dbenv->lock_vec(bdb->bi_dbenv, BDB_LOCKID(locker), tryOnly ? DB_LOCK_NOWAIT : 0,
Howard Chu's avatar
Howard Chu committed
187
		list, 2, NULL );
188

189
	if (rc && !tryOnly) {
Howard Chu's avatar
Howard Chu committed
190
		Debug( LDAP_DEBUG_TRACE,
191
			"bdb_cache_entry_db_relock: entry %ld, rw %d, rc %d\n",
Howard Chu's avatar
Howard Chu committed
192
193
194
			ei->bei_id, rw, rc );
	} else {
		*lock = list[1].lock;
195
	}
Howard Chu's avatar
Howard Chu committed
196
	return rc;
Howard Chu's avatar
Howard Chu committed
197
#endif
198
}
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
199

200
static int
201
bdb_cache_entry_db_lock( struct bdb_info *bdb, BDB_LOCKER locker, EntryInfo *ei,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
202
	int rw, int tryOnly, DB_LOCK *lock )
203
{
204
#ifdef NO_DB_LOCK
Howard Chu's avatar
Howard Chu committed
205
206
	return 0;
#else
207
208
209
210
	int       rc;
	DBT       lockobj;
	int       db_rw;

Howard Chu's avatar
Howard Chu committed
211
212
	if ( !lock ) return 0;

213
214
215
216
217
	if (rw)
		db_rw = DB_LOCK_WRITE;
	else
		db_rw = DB_LOCK_READ;

218
	lockobj.data = &ei->bei_id;
219
	lockobj.size = sizeof(ei->bei_id) + 1;
Howard Chu's avatar
Howard Chu committed
220

221
	rc = LOCK_GET(bdb->bi_dbenv, BDB_LOCKID(locker), tryOnly ? DB_LOCK_NOWAIT : 0,
222
					&lockobj, db_rw, lock);
223
	if (rc && !tryOnly) {
224
		Debug( LDAP_DEBUG_TRACE,
225
			"bdb_cache_entry_db_lock: entry %ld, rw %d, rc %d\n",
Howard Chu's avatar
Howard Chu committed
226
			ei->bei_id, rw, rc );
227
	}
228
	return rc;
229
#endif /* NO_DB_LOCK */
230
231
232
}

int
Howard Chu's avatar
Howard Chu committed
233
bdb_cache_entry_db_unlock ( struct bdb_info *bdb, DB_LOCK *lock )
234
{
235
#ifdef NO_DB_LOCK
Howard Chu's avatar
Howard Chu committed
236
237
	return 0;
#else
238
239
	int rc;

240
	if ( !lock || lock->mode == DB_LOCK_NG ) return 0;
241

Howard Chu's avatar
Howard Chu committed
242
	rc = LOCK_PUT ( bdb->bi_dbenv, lock );
243
	return rc;
Howard Chu's avatar
Howard Chu committed
244
#endif
245
246
}

247
248
249
250
251
252
253
254
void
bdb_cache_return_entry_rw( struct bdb_info *bdb, Entry *e,
	int rw, DB_LOCK *lock )
{
	EntryInfo *ei;
	int free = 0;

	ei = e->e_private;
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
255
256
257
	if ( ei &&
		( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) &&
		( bdb_cache_entryinfo_trylock( ei ) == 0 )) {
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
258
		if ( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) {
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
259
260
261
262
263
264
265
266
267
268
			/* Releasing the entry can only be done when
			 * we know that nobody else is using it, i.e we
			 * should have an entry_db writelock.  But the
			 * flag is only set by the thread that loads the
			 * entry, and only if no other threads has found
			 * it while it was working.  All other threads
			 * clear the flag, which mean that we should be
			 * the only thread using the entry if the flag
			 * is set here.
			 */
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
269
270
271
272
273
			ei->bei_e = NULL;
			ei->bei_state ^= CACHE_ENTRY_NOT_CACHED;
			free = 1;
		}
		bdb_cache_entryinfo_unlock( ei );
274
	}
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
275
	bdb_cache_entry_db_unlock( bdb, lock );
276
277
278
279
280
281
	if ( free ) {
		e->e_private = NULL;
		bdb_entry_return( e );
	}
}

282
static int
Howard Chu's avatar
Howard Chu committed
283
bdb_cache_entryinfo_destroy( EntryInfo *e )
284
{
Howard Chu's avatar
Howard Chu committed
285
286
	ldap_pvt_thread_mutex_destroy( &e->bei_kids_mutex );
	free( e->bei_nrdn.bv_val );
Howard Chu's avatar
Howard Chu committed
287
288
289
#ifdef BDB_HIER
	free( e->bei_rdn.bv_val );
#endif
Howard Chu's avatar
Howard Chu committed
290
	free( e );
291
292
293
	return 0;
}

294
/* Do a length-ordered sort on normalized RDNs */
Howard Chu's avatar
Howard Chu committed
295
296
static int
bdb_rdn_cmp( const void *v_e1, const void *v_e2 )
297
{
Howard Chu's avatar
Howard Chu committed
298
	const EntryInfo *e1 = v_e1, *e2 = v_e2;
299
	int rc = e1->bei_nrdn.bv_len - e2->bei_nrdn.bv_len;
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
300
301
302
303
	if (rc == 0) {
		rc = strncmp( e1->bei_nrdn.bv_val, e2->bei_nrdn.bv_val,
			e1->bei_nrdn.bv_len );
	}
Howard Chu's avatar
Howard Chu committed
304
305
	return rc;
}
306

Howard Chu's avatar
Howard Chu committed
307
308
309
310
311
static int
bdb_id_cmp( const void *v_e1, const void *v_e2 )
{
	const EntryInfo *e1 = v_e1, *e2 = v_e2;
	return e1->bei_id - e2->bei_id;
312
313
}

314
315
316
317
static int
bdb_id_dup_err( void *v1, void *v2 )
{
	EntryInfo *e2 = v2;
Howard Chu's avatar
Howard Chu committed
318
	e2->bei_lrunext = v1;
319
320
321
	return -1;
}

Howard Chu's avatar
Howard Chu committed
322
/* Create an entryinfo in the cache. Caller must release the locks later.
323
 */
324
static int
Howard Chu's avatar
Howard Chu committed
325
326
bdb_entryinfo_add_internal(
	struct bdb_info *bdb,
Howard Chu's avatar
Howard Chu committed
327
	EntryInfo *ei,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
328
	EntryInfo **res )
329
{
Howard Chu's avatar
Howard Chu committed
330
	EntryInfo *ei2 = NULL;
331

Howard Chu's avatar
Howard Chu committed
332
	*res = NULL;
333

Howard Chu's avatar
Howard Chu committed
334
	ei2 = bdb_cache_entryinfo_new( &bdb->bi_cache );
335

Howard Chu's avatar
Howard Chu committed
336
	ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
337
	bdb_cache_entryinfo_lock( ei->bei_parent );
338

Howard Chu's avatar
Howard Chu committed
339
340
	ei2->bei_id = ei->bei_id;
	ei2->bei_parent = ei->bei_parent;
Howard Chu's avatar
Howard Chu committed
341
#ifdef BDB_HIER
Howard Chu's avatar
Howard Chu committed
342
	ei2->bei_rdn = ei->bei_rdn;
Howard Chu's avatar
Howard Chu committed
343
#endif
344
345
346
#ifdef SLAP_ZONE_ALLOC
	ei2->bei_bdb = bdb;
#endif
Howard Chu's avatar
Howard Chu committed
347
348

	/* Add to cache ID tree */
349
350
	if (avl_insert( &bdb->bi_cache.c_idtree, ei2, bdb_id_cmp,
		bdb_id_dup_err )) {
Howard Chu's avatar
Howard Chu committed
351
		EntryInfo *eix = ei2->bei_lrunext;
352
		bdb_cache_entryinfo_free( &bdb->bi_cache, ei2 );
Howard Chu's avatar
Howard Chu committed
353
		ei2 = eix;
Howard Chu's avatar
Howard Chu committed
354
#ifdef BDB_HIER
Howard Chu's avatar
Howard Chu committed
355
356
357
358
		/* It got freed above because its value was
		 * assigned to ei2.
		 */
		ei->bei_rdn.bv_val = NULL;
Howard Chu's avatar
Howard Chu committed
359
#endif
Howard Chu's avatar
Howard Chu committed
360
	} else {
Howard Chu's avatar
Howard Chu committed
361
362
		int rc;

Howard Chu's avatar
Howard Chu committed
363
		bdb->bi_cache.c_eiused++;
Howard Chu's avatar
Howard Chu committed
364
		ber_dupbv( &ei2->bei_nrdn, &ei->bei_nrdn );
365
366
367
368
369
370
371

		/* This is a new leaf node. But if parent had no kids, then it was
		 * a leaf and we would be decrementing that. So, only increment if
		 * the parent already has kids.
		 */
		if ( ei->bei_parent->bei_kids || !ei->bei_parent->bei_id )
			bdb->bi_cache.c_leaves++;
Howard Chu's avatar
Howard Chu committed
372
		rc = avl_insert( &ei->bei_parent->bei_kids, ei2, bdb_rdn_cmp,
Howard Chu's avatar
Howard Chu committed
373
			avl_dup_error );
Howard Chu's avatar
Howard Chu committed
374
375
376
377
378
		if ( rc ) {
			/* This should never happen; entry cache is corrupt */
			bdb->bi_dbenv->log_flush( bdb->bi_dbenv, NULL );
			assert( !rc );
		}
379
380
381
#ifdef BDB_HIER
		ei->bei_parent->bei_ckids++;
#endif
382
383
	}

Howard Chu's avatar
Howard Chu committed
384
385
	*res = ei2;
	return 0;
386
387
}

Howard Chu's avatar
Howard Chu committed
388
389
390
391
392
393
394
/* Find the EntryInfo for the requested DN. If the DN cannot be found, return
 * the info for its closest ancestor. *res should be NULL to process a
 * complete DN starting from the tree root. Otherwise *res must be the
 * immediate parent of the requested DN, and only the RDN will be searched.
 * The EntryInfo is locked upon return and must be unlocked by the caller.
 */
int
395
bdb_cache_find_ndn(
396
	Operation	*op,
Howard Chu's avatar
Howard Chu committed
397
	BDB_LOCKER		locker,
Howard Chu's avatar
Howard Chu committed
398
	struct berval	*ndn,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
399
	EntryInfo	**res )
400
{
401
	struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private;
Howard Chu's avatar
Howard Chu committed
402
403
404
	EntryInfo	ei, *eip, *ei2;
	int rc = 0;
	char *ptr;
405
406

	/* this function is always called with normalized DN */
Howard Chu's avatar
Howard Chu committed
407
408
409
	if ( *res ) {
		/* we're doing a onelevel search for an RDN */
		ei.bei_nrdn.bv_val = ndn->bv_val;
410
		ei.bei_nrdn.bv_len = dn_rdnlen( op->o_bd, ndn );
Howard Chu's avatar
Howard Chu committed
411
412
413
		eip = *res;
	} else {
		/* we're searching a full DN from the root */
414
		ptr = ndn->bv_val + ndn->bv_len - op->o_bd->be_nsuffix[0].bv_len;
Howard Chu's avatar
Howard Chu committed
415
		ei.bei_nrdn.bv_val = ptr;
416
		ei.bei_nrdn.bv_len = op->o_bd->be_nsuffix[0].bv_len;
Howard Chu's avatar
Howard Chu committed
417
418
419
420
421
422
423
424
425
426
		/* Skip to next rdn if suffix is empty */
		if ( ei.bei_nrdn.bv_len == 0 ) {
			for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val
				&& !DN_SEPARATOR(*ptr); ptr--) /* empty */;
			if ( ptr >= ndn->bv_val ) {
				if (DN_SEPARATOR(*ptr)) ptr++;
				ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr;
				ei.bei_nrdn.bv_val = ptr;
			}
		}
Howard Chu's avatar
Howard Chu committed
427
428
429
430
		eip = &bdb->bi_cache.c_dntree;
	}
	
	for ( bdb_cache_entryinfo_lock( eip ); eip; ) {
431
		eip->bei_state |= CACHE_ENTRY_REFERENCED;
Howard Chu's avatar
Howard Chu committed
432
		ei.bei_parent = eip;
Howard Chu's avatar
Howard Chu committed
433
434
		ei2 = (EntryInfo *)avl_find( eip->bei_kids, &ei, bdb_rdn_cmp );
		if ( !ei2 ) {
Howard Chu's avatar
Howard Chu committed
435
			DB_LOCK lock;
Howard Chu's avatar
Howard Chu committed
436
437
			int len = ei.bei_nrdn.bv_len;
				
438
439
440
441
442
			if ( BER_BVISEMPTY( ndn )) {
				*res = eip;
				return LDAP_SUCCESS;
			}

Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
443
444
			ei.bei_nrdn.bv_len = ndn->bv_len -
				(ei.bei_nrdn.bv_val - ndn->bv_val);
Howard Chu's avatar
Howard Chu committed
445
446
			bdb_cache_entryinfo_unlock( eip );

Howard Chu's avatar
Howard Chu committed
447
448
449
450
451
			BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Reading %s",
				ei.bei_nrdn.bv_val );

			lock.mode = DB_LOCK_NG;
			rc = bdb_dn2id( op, &ei.bei_nrdn, &ei, locker, &lock );
Howard Chu's avatar
Howard Chu committed
452
453
			if (rc) {
				bdb_cache_entryinfo_lock( eip );
Howard Chu's avatar
Howard Chu committed
454
				bdb_cache_entry_db_unlock( bdb, &lock );
Howard Chu's avatar
Howard Chu committed
455
456
457
				*res = eip;
				return rc;
			}
458

Howard Chu's avatar
Howard Chu committed
459
460
461
			BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Read got %s(%d)",
				ei.bei_nrdn.bv_val, ei.bei_id );

Howard Chu's avatar
Howard Chu committed
462
463
			/* DN exists but needs to be added to cache */
			ei.bei_nrdn.bv_len = len;
464
			rc = bdb_entryinfo_add_internal( bdb, &ei, &ei2 );
Howard Chu's avatar
Howard Chu committed
465
466
			/* add_internal left eip and c_rwlock locked */
			ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
467
			bdb_cache_entry_db_unlock( bdb, &lock );
Howard Chu's avatar
Howard Chu committed
468
469
470
471
			if ( rc ) {
				*res = eip;
				return rc;
			}
472
		} else if ( ei2->bei_state & CACHE_ENTRY_DELETED ) {
Howard Chu's avatar
Howard Chu committed
473
474
475
476
			/* In the midst of deleting? Give it a chance to
			 * complete.
			 */
			bdb_cache_entryinfo_unlock( eip );
477
			ldap_pvt_thread_yield();
Howard Chu's avatar
Howard Chu committed
478
479
480
481
482
483
484
485
486
487
488
			bdb_cache_entryinfo_lock( eip );
			*res = eip;
			return DB_NOTFOUND;
		}
		bdb_cache_entryinfo_unlock( eip );
		bdb_cache_entryinfo_lock( ei2 );

		eip = ei2;

		/* Advance to next lower RDN */
		for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val
489
			&& !DN_SEPARATOR(*ptr); ptr--) /* empty */;
Howard Chu's avatar
Howard Chu committed
490
		if ( ptr >= ndn->bv_val ) {
491
			if (DN_SEPARATOR(*ptr)) ptr++;
Howard Chu's avatar
Howard Chu committed
492
493
494
495
496
497
			ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr - 1;
			ei.bei_nrdn.bv_val = ptr;
		}
		if ( ptr < ndn->bv_val ) {
			*res = eip;
			break;
498
499
500
		}
	}

Howard Chu's avatar
Howard Chu committed
501
	return rc;
502
503
}

Howard Chu's avatar
Howard Chu committed
504
505
506
507
#ifdef BDB_HIER
/* Walk up the tree from a child node, looking for an ID that's already
 * been linked into the cache.
 */
Howard Chu's avatar
Howard Chu committed
508
int
509
510
hdb_cache_find_parent(
	Operation *op,
511
	BDB_LOCKER	locker,
Howard Chu's avatar
Howard Chu committed
512
	ID id,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
513
	EntryInfo **res )
Howard Chu's avatar
Howard Chu committed
514
{
515
	struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private;
Howard Chu's avatar
Howard Chu committed
516
517
518
519
520
	EntryInfo ei, eip, *ei2 = NULL, *ein = NULL, *eir = NULL;
	int rc;

	ei.bei_id = id;
	ei.bei_kids = NULL;
Howard Chu's avatar
Howard Chu committed
521
	ei.bei_ckids = 0;
Howard Chu's avatar
Howard Chu committed
522
523

	for (;;) {
Howard Chu's avatar
Howard Chu committed
524
		rc = hdb_dn2id_parent( op, locker, &ei, &eip.bei_id );
Howard Chu's avatar
Howard Chu committed
525
526
527
528
529
530
		if ( rc ) break;

		/* Save the previous node, if any */
		ei2 = ein;

		/* Create a new node for the current ID */
Howard Chu's avatar
Howard Chu committed
531
		ein = bdb_cache_entryinfo_new( &bdb->bi_cache );
Howard Chu's avatar
Howard Chu committed
532
		ein->bei_id = ei.bei_id;
Howard Chu's avatar
Howard Chu committed
533
		ein->bei_kids = ei.bei_kids;
Howard Chu's avatar
Howard Chu committed
534
535
		ein->bei_nrdn = ei.bei_nrdn;
		ein->bei_rdn = ei.bei_rdn;
Howard Chu's avatar
Howard Chu committed
536
		ein->bei_ckids = ei.bei_ckids;
537
538
539
#ifdef SLAP_ZONE_ALLOC
		ein->bei_bdb = bdb;
#endif
Howard Chu's avatar
Howard Chu committed
540
		ei.bei_ckids = 0;
Howard Chu's avatar
Howard Chu committed
541
542
		
		/* This node is not fully connected yet */
543
		ein->bei_state |= CACHE_ENTRY_NOT_LINKED;
Howard Chu's avatar
Howard Chu committed
544
545

		/* Insert this node into the ID tree */
Howard Chu's avatar
Howard Chu committed
546
		ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
547
		if ( avl_insert( &bdb->bi_cache.c_idtree, (caddr_t)ein,
548
			bdb_id_cmp, bdb_id_dup_err ) ) {
Howard Chu's avatar
Howard Chu committed
549
			EntryInfo *eix = ein->bei_lrunext;
Howard Chu's avatar
Howard Chu committed
550

Howard Chu's avatar
Howard Chu committed
551
552
553
			/* Someone else created this node just before us.
			 * Free our new copy and use the existing one.
			 */
554
555
			bdb_cache_entryinfo_free( &bdb->bi_cache, ein );
			ein = eix;
Howard Chu's avatar
Howard Chu committed
556
557
			
			/* Link in any kids we've already processed */
Howard Chu's avatar
Howard Chu committed
558
559
560
561
			if ( ei2 ) {
				bdb_cache_entryinfo_lock( ein );
				avl_insert( &ein->bei_kids, (caddr_t)ei2,
					bdb_rdn_cmp, avl_dup_error );
Howard Chu's avatar
Howard Chu committed
562
				ein->bei_ckids++;
Howard Chu's avatar
Howard Chu committed
563
564
				bdb_cache_entryinfo_unlock( ein );
			}
Howard Chu's avatar
Howard Chu committed
565
566
		}

567
568
569
570
571
		/* If this is the first time, save this node
		 * to be returned later.
		 */
		if ( eir == NULL ) eir = ein;

Howard Chu's avatar
Howard Chu committed
572
573
574
		/* If there was a previous node, link it to this one */
		if ( ei2 ) ei2->bei_parent = ein;

Howard Chu's avatar
Howard Chu committed
575
		/* Look for this node's parent */
Howard Chu's avatar
Howard Chu committed
576
577
578
579
580
581
		if ( eip.bei_id ) {
			ei2 = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree,
					(caddr_t) &eip, bdb_id_cmp );
		} else {
			ei2 = &bdb->bi_cache.c_dntree;
		}
Howard Chu's avatar
Howard Chu committed
582
		bdb->bi_cache.c_eiused++;
583
584
		if ( ei2 && ( ei2->bei_kids || !ei2->bei_id ))
				bdb->bi_cache.c_leaves++;
Howard Chu's avatar
Howard Chu committed
585
		ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
586

Howard Chu's avatar
Howard Chu committed
587
		/* Got the parent, link in and we're done. */
Howard Chu's avatar
Howard Chu committed
588
		if ( ei2 ) {
589
			bdb_cache_entryinfo_lock( eir );
Howard Chu's avatar
Howard Chu committed
590
			bdb_cache_entryinfo_lock( ei2 );
Howard Chu's avatar
Howard Chu committed
591
			ein->bei_parent = ei2;
592

593
594
595
596
			avl_insert( &ei2->bei_kids, (caddr_t)ein, bdb_rdn_cmp,
				avl_dup_error);
			ei2->bei_ckids++;

597
598
599
600
			/* Reset all the state info */
			for (ein = eir; ein != ei2; ein=ein->bei_parent)
				ein->bei_state &= ~CACHE_ENTRY_NOT_LINKED;

Howard Chu's avatar
Howard Chu committed
601
			bdb_cache_entryinfo_unlock( ei2 );
Howard Chu's avatar
Howard Chu committed
602
603

			*res = eir;
Howard Chu's avatar
Howard Chu committed
604
605
606
607
			break;
		}
		ei.bei_kids = NULL;
		ei.bei_id = eip.bei_id;
Howard Chu's avatar
Howard Chu committed
608
		ei.bei_ckids = 1;
Howard Chu's avatar
Howard Chu committed
609
610
611
612
613
		avl_insert( &ei.bei_kids, (caddr_t)ein, bdb_rdn_cmp,
			avl_dup_error );
	}
	return rc;
}
614

Howard Chu's avatar
Howard Chu committed
615
616
617
618
/* Used by hdb_dn2idl when loading the EntryInfo for all the children
 * of a given node
 */
int hdb_cache_load(
619
620
621
622
623
624
625
	struct bdb_info *bdb,
	EntryInfo *ei,
	EntryInfo **res )
{
	EntryInfo *ei2;
	int rc;

Howard Chu's avatar
Howard Chu committed
626
	/* See if we already have this one */
627
628
629
	bdb_cache_entryinfo_lock( ei->bei_parent );
	ei2 = (EntryInfo *)avl_find( ei->bei_parent->bei_kids, ei, bdb_rdn_cmp );
	bdb_cache_entryinfo_unlock( ei->bei_parent );
Howard Chu's avatar
Howard Chu committed
630

631
	if ( !ei2 ) {
Howard Chu's avatar
Howard Chu committed
632
633
634
635
636
637
638
		/* Not found, add it */
		struct berval bv;

		/* bei_rdn was not malloc'd before, do it now */
		ber_dupbv( &bv, &ei->bei_rdn );
		ei->bei_rdn = bv;

639
640
641
642
		rc = bdb_entryinfo_add_internal( bdb, ei, res );
		bdb_cache_entryinfo_unlock( ei->bei_parent );
		ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
	} else {
Howard Chu's avatar
Howard Chu committed
643
		/* Found, return it */
644
645
646
647
648
		*res = ei2;
		return 0;
	}
	return rc;
}
Howard Chu's avatar
Howard Chu committed
649
650
#endif

651
652
653
654
/* This is best-effort only. If all entries in the cache are
 * busy, they will all be kept. This is unlikely to happen
 * unless the cache is very much smaller than the working set.
 */
655
static void
656
bdb_cache_lru_purge( struct bdb_info *bdb )
657
{
658
	DB_LOCK		lock, *lockp;
659
660
	EntryInfo *elru, *elnext = NULL;
	int count, islocked, eimax;
661

662
663
	/* Wait for the mutex; we're the only one trying to purge. */
	ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex );
664

665
	if ( bdb->bi_cache.c_cursize <= bdb->bi_cache.c_maxsize ) {
666
		ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex );
Howard Chu's avatar
Howard Chu committed
667
		bdb->bi_cache.c_purging = 0;
668
		return;
669
	}
670

671
672
673
674
675
676
	if ( bdb->bi_cache.c_locker ) {
		lockp = &lock;
	} else {
		lockp = NULL;
	}

677
	count = 0;
678

679
680
681
682
683
684
	/* maximum number of EntryInfo leaves to cache. In slapcat
	 * we always free all leaf nodes.
	 */
	if ( slapMode & SLAP_TOOL_READONLY )
		eimax = 0;
	else
685
		eimax = bdb->bi_cache.c_eimax;
686

687
	/* Look for an unused entry to remove */
688
	for ( elru = bdb->bi_cache.c_lruhead; elru; elru = elnext ) {
689
690
		elnext = elru->bei_lrunext;

691
		if ( bdb_cache_entryinfo_trylock( elru ))
692
			goto bottom;
693

694
695
696
697
		/* This flag implements the clock replacement behavior */
		if ( elru->bei_state & ( CACHE_ENTRY_REFERENCED )) {
			elru->bei_state &= ~CACHE_ENTRY_REFERENCED;
			bdb_cache_entryinfo_unlock( elru );
698
			goto bottom;
699
700
701
702
		}

		/* If this node is in the process of linking into the cache,
		 * or this node is being deleted, skip it.
703
		 */
Howard Chu's avatar
Howard Chu committed
704
705
		if (( elru->bei_state & ( CACHE_ENTRY_NOT_LINKED |
			CACHE_ENTRY_DELETED | CACHE_ENTRY_LOADING )) ||
706
			elru->bei_finders > 0 ) {
707
			bdb_cache_entryinfo_unlock( elru );
708
			goto bottom;
709
		}
710

711
		/* entryinfo is locked */
712
		islocked = 1;
713

714
715
716
		/* If we can successfully writelock it, then
		 * the object is idle.
		 */
717
718
		if ( bdb_cache_entry_db_lock( bdb,
			bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) {
719
720
721
722

			/* Free entry for this node if it's present */
			if ( elru->bei_e ) {
				elru->bei_e->e_private = NULL;
723
#ifdef SLAP_ZONE_ALLOC
724
				bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq );
725
#else
726
				bdb_entry_return( elru->bei_e );
727
#endif
728
729
730
				elru->bei_e = NULL;
				count++;
			}
731
			bdb_cache_entry_db_unlock( bdb, lockp );
732

733
734
			/* 
			 * If it is a leaf node, and we're over the limit, free it.
735
			 */
736
737
			if ( elru->bei_kids ) {
				/* Drop from list, we ignore it... */
Howard Chu's avatar
Howard Chu committed
738
				LRU_DEL( &bdb->bi_cache, elru );
739
740
741
742
743
744
			} else if ( bdb->bi_cache.c_leaves > eimax ) {
				/* Too many leaf nodes, free this one */
				bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 );
				bdb_cache_delete_cleanup( &bdb->bi_cache, elru );
				islocked = 0;
			}	/* Leave on list until we need to free it */
745
		}
746

747
748
749
750
751
752
753
754
		if ( islocked )
			bdb_cache_entryinfo_unlock( elru );

		if ( count >= bdb->bi_cache.c_minfree ) {
			ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex );
			bdb->bi_cache.c_cursize -= count;
			ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex );
			break;
755
		}
756
757
758
bottom:
		if ( elnext == bdb->bi_cache.c_lruhead )
			break;
759
760
	}

Howard Chu's avatar
Howard Chu committed
761
	bdb->bi_cache.c_lruhead = elnext;
762
	ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex );
Howard Chu's avatar
Howard Chu committed
763
	bdb->bi_cache.c_purging = 0;
764
765
}

Howard Chu's avatar
Howard Chu committed
766
767
768
EntryInfo *
bdb_cache_find_info(
	struct bdb_info *bdb,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
769
	ID id )
Howard Chu's avatar
Howard Chu committed
770
{
Pierangelo Masarati's avatar
Pierangelo Masarati committed
771
772
	EntryInfo	ei = { 0 },
			*ei2;
Howard Chu's avatar
Howard Chu committed
773
774
775
776
777
778
779
780
781
782

	ei.bei_id = id;

	ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock );
	ei2 = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree,
					(caddr_t) &ei, bdb_id_cmp );
	ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock );
	return ei2;
}

783
/*
784
 * cache_find_id - find an entry in the cache, given id.
785
 * The entry is locked for Read upon return. Call with flag ID_LOCKED if
Howard Chu's avatar
Howard Chu committed
786
 * the supplied *eip was already locked.
787
788
 */

Howard Chu's avatar
Howard Chu committed
789
int
790
bdb_cache_find_id(
791
	Operation *op,
Howard Chu's avatar
Howard Chu committed
792
	DB_TXN	*tid,
793
	ID				id,
Howard Chu's avatar
Howard Chu committed
794
	EntryInfo	**eip,
795
	int		flag,
796
	BDB_LOCKER	locker,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
797
	DB_LOCK		*lock )
798
{
799
	struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private;
Howard Chu's avatar
Howard Chu committed
800
	Entry	*ep = NULL;
801
	int	rc = 0, load = 0;
Pierangelo Masarati's avatar
Pierangelo Masarati committed
802
	EntryInfo ei = { 0 };
Howard Chu's avatar
Howard Chu committed
803
804
805

	ei.bei_id = id;

806
807
808
#ifdef SLAP_ZONE_ALLOC
	slap_zh_rlock(bdb->bi_cache.c_zctx);
#endif
Howard Chu's avatar
Howard Chu committed
809
810
	/* If we weren't given any info, see if we have it already cached */
	if ( !*eip ) {
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
811
again:	ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
812
		*eip = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree,
Kurt Zeilenga's avatar
cleanup    
Kurt Zeilenga committed
813
			(caddr_t) &ei, bdb_id_cmp );
Howard Chu's avatar
Howard Chu committed
814
		if ( *eip ) {
Howard Chu's avatar
Howard Chu committed
815
			/* If the lock attempt fails, the info is in use */
816
			if ( bdb_cache_entryinfo_trylock( *eip )) {
Howard Chu's avatar
Howard Chu committed
817
				ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
818
819
820
				/* If this node is being deleted, treat
				 * as if the delete has already finished
				 */
821
822
823
				if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) {
					return DB_NOTFOUND;
				}
Howard Chu's avatar
Howard Chu committed
824
825
826
827
828
829
830
831
832
833
				/* otherwise, wait for the info to free up */
				ldap_pvt_thread_yield();
				goto again;
			}
			/* If this info isn't hooked up to its parent yet,
			 * unlock and wait for it to be fully initialized
			 */
			if ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) {
				bdb_cache_entryinfo_unlock( *eip );
				ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock );
Howard Chu's avatar
Howard Chu committed
834
835
836
				ldap_pvt_thread_yield();
				goto again;
			}
837
			flag |= ID_LOCKED;
Howard Chu's avatar
Howard Chu committed
838
839
840
		}
		ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock );
	}
841

Howard Chu's avatar
Howard Chu committed
842
843
	/* See if the ID exists in the database; add it to the cache if so */
	if ( !*eip ) {
Howard Chu's avatar
Howard Chu committed
844
#ifndef BDB_HIER
845
		rc = bdb_id2entry( op->o_bd, tid, locker, id, &ep );
Howard Chu's avatar
Howard Chu committed
846
		if ( rc == 0 ) {
Howard Chu's avatar
Howard Chu committed
847
			rc = bdb_cache_find_ndn( op, locker,
848
				&ep->e_nname, eip );
849
			if ( *eip ) flag |= ID_LOCKED;
Howard Chu's avatar
Howard Chu committed
850
			if ( rc ) {
851
				ep->e_private = NULL;
852
853
854
#ifdef SLAP_ZONE_ALLOC
				bdb_entry_return( bdb, ep, (*eip)->bei_zseq );
#else
Howard Chu's avatar
Howard Chu committed
855
				bdb_entry_return( ep );
856
#endif
Howard Chu's avatar
Howard Chu committed
857
858
859
				ep = NULL;
			}
		}
Howard Chu's avatar
Howard Chu committed
860
#else
Howard Chu's avatar
Howard Chu committed
861
		rc = hdb_cache_find_parent(op, locker, id, eip );
862
		if ( rc == 0 ) flag |= ID_LOCKED;
Howard Chu's avatar
Howard Chu committed
863
#endif
Howard Chu's avatar
Howard Chu committed
864
	}
865

Howard Chu's avatar
Howard Chu committed
866
	/* Ok, we found the info, do we have the entry? */
Howard Chu's avatar
cleanup    
Howard Chu committed
867
	if ( rc == 0 ) {
Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
868
869
870
871
872
		if ( !( flag & ID_LOCKED )) {
			bdb_cache_entryinfo_lock( *eip );
			flag |= ID_LOCKED;
		}

873
		if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) {
Howard Chu's avatar
Howard Chu committed
874
			rc = DB_NOTFOUND;
875
		} else {
876
			(*eip)->bei_finders++;
Howard Chu's avatar
Howard Chu committed
877
			(*eip)->bei_state |= CACHE_ENTRY_REFERENCED;
878
			/* Make sure only one thread tries to load the entry */
879
880
881
882
883
884
885
886
887
load1:
#ifdef SLAP_ZONE_ALLOC
			if ((*eip)->bei_e && !slap_zn_validate(
					bdb->bi_cache.c_zctx, (*eip)->bei_e, (*eip)->bei_zseq)) {
				(*eip)->bei_e = NULL;
				(*eip)->bei_zseq = 0;
			}
#endif
			if ( !(*eip)->bei_e && !((*eip)->bei_state & CACHE_ENTRY_LOADING)) {
888
889
890
				load = 1;
				(*eip)->bei_state |= CACHE_ENTRY_LOADING;
			}
891

Quanah Gibson-Mount's avatar
Quanah Gibson-Mount committed
892
893
894
895
896
897
898
			if ( !load ) {
				/* Clear the uncached state if we are not
				 * loading it, i.e it is already cached or
				 * another thread is currently loading it.
				 */
				(*eip)->bei_state &= ~CACHE_ENTRY_NOT_CACHED;
				flag &= ~ID_NOCACHE;
899
900
901
			}

			if ( flag & ID_LOCKED ) {
902
				bdb_cache_entryinfo_unlock( *eip );
903
				flag ^= ID_LOCKED;
904
			}
Howard Chu's avatar
Howard Chu committed
905
			rc = bdb_cache_entry_db_lock( bdb, locker, *eip, load, 0, lock );
906
907
			if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) {
				rc = DB_NOTFOUND;
Howard Chu's avatar
Howard Chu committed
908
				bdb_cache_entry_db_unlock( bdb, lock );
909
			} else if ( rc == 0 ) {
910
				if ( load ) {
Howard Chu's avatar
Howard Chu committed
911
					if ( !ep) {
912
						rc = bdb_id2entry( op->o_bd, tid, locker, id, &ep );
913
914
					}
					if ( rc == 0 ) {
915
						ep->e_private = *eip;
Howard Chu's avatar
Howard Chu committed
916
#ifdef BDB_HIER
917
						bdb_fix_dn( ep, 0 );
Howard Chu's avatar
Howard Chu committed
918
#endif
919
						(*eip)->bei_e = ep;
920
921
922
#ifdef SLAP_ZONE_ALLOC
						(*eip)->bei_zseq = *((ber_len_t *)ep - 2);
#endif
923
						ep = NULL;
Howard Chu's avatar
Howard Chu committed
924
						bdb_cache_lru_link( bdb, *eip );