Commit ab90bfd9 authored by Ondřej Kuzník's avatar Ondřej Kuzník
Browse files

ITS#9599 Implement tiered load balancing

parent e98374fa
...@@ -644,7 +644,13 @@ only valid when using GnuTLS and Mozilla NSS. ...@@ -644,7 +644,13 @@ only valid when using GnuTLS and Mozilla NSS.
.SH BACKEND CONFIGURATION .SH BACKEND CONFIGURATION
Options in this section describe how the Options in this section describe how the
.B lloadd .B lloadd
connects and authenticates to the backend servers. connects and authenticates to the backend servers. Backends are organised in groups
.RB ( tiers ).
Backends in the first tier are tried first, if none of them are reachable, the
following tier is tried in the same way. If there is a backend in the tier that
has suitable connections, but they are busy, no further tier is consulted. This
is useful in high availability scenarios where a group of servers (e.g. the
local environment) should be contacted if possible.
It is assumed all backend servers serve the same data. On startup, the It is assumed all backend servers serve the same data. On startup, the
configured connections are set up and those not dedicated to handle bind configured connections are set up and those not dedicated to handle bind
...@@ -730,6 +736,33 @@ set on the upstream connections, overriding the operating system setting. ...@@ -730,6 +736,33 @@ set on the upstream connections, overriding the operating system setting.
Only some systems support the customization of this parameter, it is Only some systems support the customization of this parameter, it is
ignored otherwise and system-wide settings are used. ignored otherwise and system-wide settings are used.
.SH TIER OPTIONS
.TP
.B tier
.B <tier type>
Groups servers which should be considered in the same try. If a viable
connection is found even if busy, the load balancer does not proceed to the
next tier. The process of selection a connection within a tier depends on the
tier's type.
.RE
Available types are:
.TP
.B roundrobin
Servers are tried in order and if one is selected successfully, the following
search will try from the one next on the list.
.TP
.B weighted
Backend servers accept a new option
.B weight=<int>
which indicates how often it should be selected. If unspecified, weight
defaults to 0 and such backends have a slight chance of being selected even
when a non-zero weight backend is configured in the tier. The selection process
is along the lines of
.BR RFC2782 .
.SH BACKEND OPTIONS .SH BACKEND OPTIONS
.TP .TP
...@@ -879,6 +912,7 @@ bindconf ...@@ -879,6 +912,7 @@ bindconf
binddn=cn=test binddn=cn=test
credentials=pass credentials=pass
tier weighted
backend-server backend-server
uri=ldap://ldap1.example.com uri=ldap://ldap1.example.com
numconns=3 numconns=3
...@@ -886,6 +920,7 @@ backend-server ...@@ -886,6 +920,7 @@ backend-server
retry=5000 retry=5000
max-pending-ops=5 max-pending-ops=5
conn-max-pending=3 conn-max-pending=3
weight=5
backend-server backend-server
uri=ldap://ldap2.example.com uri=ldap://ldap2.example.com
...@@ -894,6 +929,7 @@ backend-server ...@@ -894,6 +929,7 @@ backend-server
retry=5000 retry=5000
max-pending-ops=5 max-pending-ops=5
conn-max-pending=3 conn-max-pending=3
weight=10
.fi .fi
.RE .RE
.LP .LP
......
...@@ -21,6 +21,7 @@ NT_OBJS = nt_svc.o ../../libraries/liblutil/slapdmsg.res ...@@ -21,6 +21,7 @@ NT_OBJS = nt_svc.o ../../libraries/liblutil/slapdmsg.res
SRCS = backend.c bind.c config.c connection.c client.c \ SRCS = backend.c bind.c config.c connection.c client.c \
daemon.c epoch.c extended.c init.c operation.c \ daemon.c epoch.c extended.c init.c operation.c \
tier.c tier_roundrobin.c tier_weighted.c \
upstream.c libevent_support.c \ upstream.c libevent_support.c \
$(@PLAT@_SRCS) $(@PLAT@_SRCS)
......
...@@ -392,44 +392,17 @@ upstream_select( ...@@ -392,44 +392,17 @@ upstream_select(
int *res, int *res,
char **message ) char **message )
{ {
LloadBackend *b, *first, *next; LloadTier *tier;
int rc = 0; int finished = 0;
checked_lock( &backend_mutex ); LDAP_STAILQ_FOREACH( tier, &tiers, t_next ) {
first = b = current_backend; if ( (finished = tier->t_type.tier_select(
checked_unlock( &backend_mutex ); tier, op, cp, res, message )) ) {
break;
*res = LDAP_UNAVAILABLE;
if ( !first ) {
return NULL;
}
/* TODO: Two runs, one with trylock, then one actually locked if we don't
* find anything? */
do {
checked_lock( &b->b_mutex );
next = LDAP_CIRCLEQ_LOOP_NEXT( &backend, b, b_next );
rc = backend_select( b, op, cp, res, message );
checked_unlock( &b->b_mutex );
if ( rc && *cp ) {
/*
* Round-robin step:
* Rotate the queue to put this backend at the end. The race here
* is acceptable.
*/
checked_lock( &backend_mutex );
current_backend = next;
checked_unlock( &backend_mutex );
return rc;
} }
}
b = next; return finished;
} while ( b != first );
return rc;
} }
/* /*
...@@ -726,26 +699,41 @@ backend_reset( LloadBackend *b, int gentle ) ...@@ -726,26 +699,41 @@ backend_reset( LloadBackend *b, int gentle )
assert_locked( &b->b_mutex ); assert_locked( &b->b_mutex );
} }
LloadBackend *
lload_backend_new( void )
{
LloadBackend *b;
b = ch_calloc( 1, sizeof(LloadBackend) );
LDAP_CIRCLEQ_INIT( &b->b_conns );
LDAP_CIRCLEQ_INIT( &b->b_bindconns );
LDAP_CIRCLEQ_INIT( &b->b_preparing );
LDAP_CIRCLEQ_ENTRY_INIT( b, b_next );
b->b_numconns = 1;
b->b_numbindconns = 1;
b->b_weight = 1;
b->b_retry_timeout = 5000;
ldap_pvt_thread_mutex_init( &b->b_mutex );
return b;
}
void void
lload_backend_destroy( LloadBackend *b ) lload_backend_destroy( LloadBackend *b )
{ {
LloadBackend *next = LDAP_CIRCLEQ_LOOP_NEXT( &backend, b, b_next );
Debug( LDAP_DEBUG_CONNS, "lload_backend_destroy: " Debug( LDAP_DEBUG_CONNS, "lload_backend_destroy: "
"destroying backend uri='%s', numconns=%d, numbindconns=%d\n", "destroying backend uri='%s', numconns=%d, numbindconns=%d\n",
b->b_uri.bv_val, b->b_numconns, b->b_numbindconns ); b->b_uri.bv_val, b->b_numconns, b->b_numbindconns );
checked_lock( &b->b_mutex ); checked_lock( &b->b_mutex );
b->b_tier->t_type.tier_remove_backend( b->b_tier, b );
b->b_numconns = b->b_numbindconns = 0; b->b_numconns = b->b_numbindconns = 0;
backend_reset( b, 0 ); backend_reset( b, 0 );
LDAP_CIRCLEQ_REMOVE( &backend, b, b_next );
if ( b == next ) {
current_backend = NULL;
} else {
current_backend = next;
}
#ifdef BALANCER_MODULE #ifdef BALANCER_MODULE
if ( b->b_monitor ) { if ( b->b_monitor ) {
BackendDB *be; BackendDB *be;
...@@ -760,6 +748,7 @@ lload_backend_destroy( LloadBackend *b ) ...@@ -760,6 +748,7 @@ lload_backend_destroy( LloadBackend *b )
assert( rc == LDAP_SUCCESS ); assert( rc == LDAP_SUCCESS );
} }
#endif /* BALANCER_MODULE */ #endif /* BALANCER_MODULE */
checked_unlock( &b->b_mutex ); checked_unlock( &b->b_mutex );
ldap_pvt_thread_mutex_destroy( &b->b_mutex ); ldap_pvt_thread_mutex_destroy( &b->b_mutex );
...@@ -774,13 +763,3 @@ lload_backend_destroy( LloadBackend *b ) ...@@ -774,13 +763,3 @@ lload_backend_destroy( LloadBackend *b )
ch_free( b->b_name.bv_val ); ch_free( b->b_name.bv_val );
ch_free( b ); ch_free( b );
} }
void
lload_backends_destroy( void )
{
while ( !LDAP_CIRCLEQ_EMPTY( &backend ) ) {
LloadBackend *b = LDAP_CIRCLEQ_FIRST( &backend );
lload_backend_destroy( b );
}
}
...@@ -113,6 +113,7 @@ static ConfigFile *cfn; ...@@ -113,6 +113,7 @@ static ConfigFile *cfn;
static ConfigDriver config_fname; static ConfigDriver config_fname;
static ConfigDriver config_generic; static ConfigDriver config_generic;
static ConfigDriver config_tier;
static ConfigDriver config_backend; static ConfigDriver config_backend;
static ConfigDriver config_bindconf; static ConfigDriver config_bindconf;
static ConfigDriver config_restrict_oid; static ConfigDriver config_restrict_oid;
...@@ -132,10 +133,6 @@ static ConfigDriver config_share_tls_ctx; ...@@ -132,10 +133,6 @@ static ConfigDriver config_share_tls_ctx;
static ConfigDriver backend_cf_gen; static ConfigDriver backend_cf_gen;
#endif /* BALANCER_MODULE */ #endif /* BALANCER_MODULE */
lload_b_head backend = LDAP_CIRCLEQ_HEAD_INITIALIZER(backend);
ldap_pvt_thread_mutex_t backend_mutex;
LloadBackend *current_backend = NULL;
struct slap_bindconf bindconf = {}; struct slap_bindconf bindconf = {};
struct berval lloadd_identity = BER_BVNULL; struct berval lloadd_identity = BER_BVNULL;
...@@ -182,6 +179,8 @@ enum { ...@@ -182,6 +179,8 @@ enum {
CFG_CLIENT_PENDING, CFG_CLIENT_PENDING,
CFG_RESTRICT_EXOP, CFG_RESTRICT_EXOP,
CFG_RESTRICT_CONTROL, CFG_RESTRICT_CONTROL,
CFG_TIER,
CFG_WEIGHT,
CFG_LAST CFG_LAST
}; };
...@@ -205,6 +204,17 @@ static ConfigTable config_back_cf_table[] = { ...@@ -205,6 +204,17 @@ static ConfigTable config_back_cf_table[] = {
&config_generic, &config_generic,
NULL, NULL, NULL NULL, NULL, NULL
}, },
{ "tier", "name", 2, 2, 0,
ARG_MAGIC|ARG_STRING|CFG_TIER,
&config_tier,
"( OLcfgBkAt:13.39 "
"NAME 'olcBkLloadTierType' "
"DESC 'Tier type' "
"EQUALITY caseIgnoreMatch "
"SYNTAX OMsDirectoryString "
"SINGLE-VALUE )",
NULL, NULL
},
/* conf-file only option */ /* conf-file only option */
{ "backend-server", "backend options", 2, 0, 0, { "backend-server", "backend options", 2, 0, 0,
ARG_MAGIC|CFG_BACKEND, ARG_MAGIC|CFG_BACKEND,
...@@ -747,6 +757,17 @@ static ConfigTable config_back_cf_table[] = { ...@@ -747,6 +757,17 @@ static ConfigTable config_back_cf_table[] = {
"SINGLE-VALUE )", "SINGLE-VALUE )",
NULL, NULL NULL, NULL
}, },
{ "", NULL, 2, 2, 0,
ARG_MAGIC|ARG_UINT|CFG_WEIGHT,
&backend_cf_gen,
"( OLcfgBkAt:13.40 "
"NAME 'olcBkLloadWeight' "
"DESC 'Backend weight' "
"SYNTAX OMsInteger "
"SINGLE-VALUE )",
NULL,
{ .v_uint = 0 },
},
#endif /* BALANCER_MODULE */ #endif /* BALANCER_MODULE */
{ NULL, NULL, 0, 0, 0, ARG_IGNORED, NULL } { NULL, NULL, 0, 0, 0, ARG_IGNORED, NULL }
...@@ -754,9 +775,13 @@ static ConfigTable config_back_cf_table[] = { ...@@ -754,9 +775,13 @@ static ConfigTable config_back_cf_table[] = {
#ifdef BALANCER_MODULE #ifdef BALANCER_MODULE
static ConfigCfAdd lload_cfadd; static ConfigCfAdd lload_cfadd;
static ConfigLDAPadd lload_backend_ldadd; static ConfigLDAPadd lload_backend_ldadd;
static ConfigLDAPadd lload_tier_ldadd;
#ifdef SLAP_CONFIG_DELETE #ifdef SLAP_CONFIG_DELETE
static ConfigLDAPdel lload_backend_lddel; static ConfigLDAPdel lload_backend_lddel;
static ConfigLDAPdel lload_tier_lddel;
#endif /* SLAP_CONFIG_DELETE */ #endif /* SLAP_CONFIG_DELETE */
static ConfigOCs lloadocs[] = { static ConfigOCs lloadocs[] = {
...@@ -807,12 +832,27 @@ static ConfigOCs lloadocs[] = { ...@@ -807,12 +832,27 @@ static ConfigOCs lloadocs[] = {
"$ olcBkLloadMaxPendingOps " "$ olcBkLloadMaxPendingOps "
"$ olcBkLloadMaxPendingConns ) " "$ olcBkLloadMaxPendingConns ) "
"MAY ( olcBkLloadStartTLS " "MAY ( olcBkLloadStartTLS "
"$ olcBkLloadWeight ) "
") )", ") )",
Cft_Misc, config_back_cf_table, Cft_Misc, config_back_cf_table,
lload_backend_ldadd, lload_backend_ldadd,
NULL, NULL,
#ifdef SLAP_CONFIG_DELETE #ifdef SLAP_CONFIG_DELETE
lload_backend_lddel, lload_backend_lddel,
#endif /* SLAP_CONFIG_DELETE */
},
{ "( OLcfgBkOc:13.3 "
"NAME 'olcBkLloadTierConfig' "
"DESC 'Lload tier configuration' "
"SUP olcConfig STRUCTURAL "
"MUST ( cn "
"$ olcBkLloadTierType "
") )",
Cft_Misc, config_back_cf_table,
lload_tier_ldadd,
NULL,
#ifdef SLAP_CONFIG_DELETE
lload_tier_lddel,
#endif /* SLAP_CONFIG_DELETE */ #endif /* SLAP_CONFIG_DELETE */
}, },
{ NULL, 0, NULL } { NULL, 0, NULL }
...@@ -1073,6 +1113,26 @@ lload_backend_finish( ConfigArgs *ca ) ...@@ -1073,6 +1113,26 @@ lload_backend_finish( ConfigArgs *ca )
b->b_retry_event = event; b->b_retry_event = event;
} }
if ( BER_BVISEMPTY( &b->b_name ) ) {
struct berval bv;
LloadBackend *b2;
int i = 1;
LDAP_CIRCLEQ_FOREACH ( b2, &b->b_tier->t_backends, b_next ) {
i++;
}
bv.bv_val = ca->cr_msg;
bv.bv_len =
snprintf( ca->cr_msg, sizeof(ca->cr_msg), "server %d", i );
ber_dupbv( &b->b_name, &bv );
}
if ( b->b_tier->t_type.tier_add_backend( b->b_tier, b ) ) {
goto fail;
}
return LDAP_SUCCESS; return LDAP_SUCCESS;
fail: fail:
...@@ -1085,28 +1145,6 @@ fail: ...@@ -1085,28 +1145,6 @@ fail:
return -1; return -1;
} }
static LloadBackend *
backend_alloc( void )
{
LloadBackend *b;
b = ch_calloc( 1, sizeof(LloadBackend) );
LDAP_CIRCLEQ_INIT( &b->b_conns );
LDAP_CIRCLEQ_INIT( &b->b_bindconns );
LDAP_CIRCLEQ_INIT( &b->b_preparing );
b->b_numconns = 1;
b->b_numbindconns = 1;
b->b_retry_timeout = 5000;
ldap_pvt_thread_mutex_init( &b->b_mutex );
LDAP_CIRCLEQ_INSERT_TAIL( &backend, b, b_next );
return b;
}
static int static int
backend_config_url( LloadBackend *b, struct berval *uri ) backend_config_url( LloadBackend *b, struct berval *uri )
{ {
...@@ -1183,16 +1221,29 @@ static int ...@@ -1183,16 +1221,29 @@ static int
config_backend( ConfigArgs *c ) config_backend( ConfigArgs *c )
{ {
LloadBackend *b; LloadBackend *b;
LloadTier *tier;
int i, rc = 0; int i, rc = 0;
b = backend_alloc(); tier = LDAP_STAILQ_LAST( &tiers, LloadTier, t_next );
if ( !tier ) {
Debug( LDAP_DEBUG_ANY, "config_backend: "
"no tier configured yet\n" );
return -1;
}
/* FIXME: maybe tier_add_backend could allocate it? */
b = lload_backend_new();
b->b_tier = tier;
for ( i = 1; i < c->argc; i++ ) { for ( i = 1; i < c->argc; i++ ) {
if ( lload_backend_parse( c->argv[i], b ) ) { if ( lload_backend_parse( c->argv[i], b ) ) {
Debug( LDAP_DEBUG_ANY, "config_backend: " if ( !tier->t_type.tier_backend_config ||
"error parsing backend configuration item '%s'\n", tier->t_type.tier_backend_config( tier, b, c->argv[i] ) ) {
c->argv[i] ); Debug( LDAP_DEBUG_ANY, "config_backend: "
return -1; "error parsing backend configuration item '%s'\n",
c->argv[i] );
return -1;
}
} }
} }
...@@ -1463,6 +1514,80 @@ done: ...@@ -1463,6 +1514,80 @@ done:
return rc; return rc;
} }
static int
config_tier( ConfigArgs *c )
{
int rc = LDAP_SUCCESS;
struct lload_tier_type *tier_impl;
LloadTier *tier = c->ca_private;
struct berval bv;
int i = 1;
if ( c->op == SLAP_CONFIG_EMIT ) {
switch ( c->type ) {
case CFG_TIER:
c->value_string = ch_strdup( tier->t_type.tier_name );
break;
default:
goto fail;
break;
}
return rc;
} else if ( c->op == LDAP_MOD_DELETE ) {
if ( lload_change.type != LLOAD_CHANGE_DEL ) {
/*
* TODO: Shouldn't really happen while this attribute is in the
* RDN, but we don't enforce it yet.
*
* How would we go about changing the backend type if we ever supported that?
*/
goto fail;
}
return rc;
}
if ( CONFIG_ONLINE_ADD( c ) ) {
assert( tier );
lload_change.target = tier;
return rc;
}
tier_impl = lload_tier_find( c->value_string );
if ( !tier_impl ) {
goto fail;
}
tier = tier_impl->tier_init();
if ( !tier ) {
goto fail;
}
lload_change.target = tier;
if ( LDAP_STAILQ_EMPTY( &tiers ) ) {
LDAP_STAILQ_INSERT_HEAD( &tiers, tier, t_next );
} else {
LloadTier *tier2;
LDAP_STAILQ_FOREACH ( tier2, &tiers, t_next ) {
i++;
}
LDAP_STAILQ_INSERT_TAIL( &tiers, tier, t_next );
}
bv.bv_val = c->cr_msg;
bv.bv_len = snprintf( c->cr_msg, sizeof(c->cr_msg), "tier %d", i );
ber_dupbv( &tier->t_name, &bv );
return rc;
fail:
if ( lload_change.type == LLOAD_CHANGE_ADD ) {
/* Abort the ADD */
lload_change.type = LLOAD_CHANGE_DEL;
}
return 1;
}
static int static int
config_fname( ConfigArgs *c ) config_fname( ConfigArgs *c )
{ {
...@@ -2957,6 +3082,9 @@ static slap_cf_aux_table backendkey[] = { ...@@ -2957,6 +3082,9 @@ static slap_cf_aux_table backendkey[] = {
{ BER_BVC("max-pending-ops="), offsetof(LloadBackend, b_max_pending), 'i', 0, NULL }, { BER_BVC("max-pending-ops="), offsetof(LloadBackend, b_max_pending), 'i', 0, NULL },
{ BER_BVC("conn-max-pending="), offsetof(LloadBackend, b_max_conn_pending), 'i', 0, NULL }, { BER_BVC("conn-max-pending="), offsetof(LloadBackend, b_max_conn_pending), 'i', 0, NULL },
{ BER_BVC("starttls="), offsetof(LloadBackend, b_tls_conf), 'i', 0, tlskey }, { BER_BVC("starttls="), offsetof(LloadBackend, b_tls_conf), 'i', 0, tlskey },
{ BER_BVC("weight="), offsetof(LloadBackend, b_weight), 'i', 0, NULL },
{ BER_BVNULL, 0, 0, 0, NULL } { BER_BVNULL, 0, 0, 0, NULL }
}; };
...@@ -3803,6 +3931,9 @@ backend_cf_gen( ConfigArgs *c ) ...@@ -3803,6 +3931,9 @@ backend_cf_gen( ConfigArgs *c )
case CFG_STARTTLS: case CFG_STARTTLS:
enum_to_verb( tlskey, b->b_tls_conf, &c->value_bv ); enum_to_verb( tlskey, b->b_tls_conf, &c->value_bv );
break; break;
case CFG_WEIGHT:
c->value_uint = b->b_weight;
break;
default: default:
rc = 1; rc = 1;
break; break;
...@@ -3884,6 +4015,9 @@ backend_cf_gen( ConfigArgs *c ) ...@@ -3884,6 +4015,9 @@ backend_cf_gen( ConfigArgs *c )
#endif /* ! HAVE_TLS */ #endif /* ! HAVE_TLS */
b->b_tls_conf = tlskey[i].mask; b->b_tls_conf = tlskey[i].mask;