diff --git a/.gitignore b/.gitignore index bb6d0346e..ffecc6771 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ .cproject .project .settings/ + +# Separate build directory +build/* diff --git a/configlexer.lex b/configlexer.lex index 1b892fe3b..a1d34ed3d 100644 --- a/configlexer.lex +++ b/configlexer.lex @@ -225,6 +225,7 @@ tcp-count{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_COUNT;} tcp-reject-overflow{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_REJECT_OVERFLOW;} tcp-query-count{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_QUERY_COUNT;} tcp-timeout{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_TIMEOUT;} +tcp-idle-timeout{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_IDLE_TIMEOUT;} tcp-mss{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_TCP_MSS;} outgoing-tcp-mss{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_OUTGOING_TCP_MSS;} ipv4-edns-size{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_IPV4_EDNS_SIZE;} @@ -241,6 +242,7 @@ difffile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_DIFFFILE;} xfrdfile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRDFILE;} xfrdir{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRDIR;} xfrd-reload-timeout{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRD_RELOAD_TIMEOUT;} +xfrd-conn-reuse{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_XFRD_CONN_REUSE;} verbosity{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_VERBOSITY;} zone{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONE;} zonefile{COLON} { LEXOUT(("v(%s) ", yytext)); return VAR_ZONEFILE;} diff --git a/configparser.y b/configparser.y index f72d4a3f6..8a46324a8 100644 --- a/configparser.y +++ b/configparser.y @@ -91,12 +91,14 @@ static int parse_range(const char *str, long long *low, long long *high); %token VAR_TCP_REJECT_OVERFLOW %token VAR_TCP_QUERY_COUNT %token VAR_TCP_TIMEOUT +%token VAR_TCP_IDLE_TIMEOUT %token VAR_TCP_MSS %token VAR_OUTGOING_TCP_MSS %token VAR_IPV4_EDNS_SIZE %token VAR_IPV6_EDNS_SIZE %token VAR_STATISTICS %token VAR_XFRD_RELOAD_TIMEOUT +%token VAR_XFRD_CONN_REUSE %token VAR_LOG_TIME_ASCII %token VAR_ROUND_ROBIN %token VAR_MINIMAL_RESPONSES @@ -310,6 +312,8 @@ server_option: { cfg_parser->opt->tcp_query_count = (int)$2; } | VAR_TCP_TIMEOUT number { cfg_parser->opt->tcp_timeout = (int)$2; } + | VAR_TCP_IDLE_TIMEOUT number + { cfg_parser->opt->tcp_idle_timeout = (int)$2; } | VAR_TCP_MSS number { cfg_parser->opt->tcp_mss = (int)$2; } | VAR_OUTGOING_TCP_MSS number @@ -347,6 +351,8 @@ server_option: { cfg_parser->opt->xfrdir = region_strdup(cfg_parser->opt->region, $2); } | VAR_XFRD_RELOAD_TIMEOUT number { cfg_parser->opt->xfrd_reload_timeout = (int)$2; } + | VAR_XFRD_CONN_REUSE boolean + { cfg_parser->opt->xfrd_conn_reuse = $2; } | VAR_VERBOSITY number { cfg_parser->opt->verbosity = (int)$2; } | VAR_RRL_SIZE number diff --git a/configure.ac b/configure.ac index 6c6bb2249..442c8a341 100644 --- a/configure.ac +++ b/configure.ac @@ -899,6 +899,16 @@ AC_ARG_WITH([tcp_timeout], [tcp_timeout=$withval]) AC_DEFINE_UNQUOTED([TCP_TIMEOUT], $tcp_timeout, [Define to the default tcp timeout.]) +dnl +dnl Determine the default tcp idle timeout (used when closing outgoing XFR TCP connections) +dnl +tcp_idle_timeout=10 +AC_ARG_WITH([tcp_idle_timeout], + AC_HELP_STRING([--with-tcp-idle-timeout=number], [Limit the default tcp idle timeout, used when closing outgoing XFR TCP connections when xfrd-conn-resuse option is enabled]), + [tcp_idle_timeout=$withval]) +AC_DEFINE_UNQUOTED([TCP_IDLE_TIMEOUT], $tcp_idle_timeout, [Define to the default tcp idle timeout.]) + + dnl dnl Features dnl diff --git a/nsd-checkconf.c b/nsd-checkconf.c index b392c7111..d8795508e 100644 --- a/nsd-checkconf.c +++ b/nsd-checkconf.c @@ -401,12 +401,14 @@ config_print_zone(nsd_options_type* opt, const char* k, int s, const char *o, SERV_GET_INT(tcp_count, o); SERV_GET_INT(tcp_query_count, o); SERV_GET_INT(tcp_timeout, o); + SERV_GET_INT(tcp_idle_timeout, o); SERV_GET_INT(tcp_mss, o); SERV_GET_INT(outgoing_tcp_mss, o); SERV_GET_INT(ipv4_edns_size, o); SERV_GET_INT(ipv6_edns_size, o); SERV_GET_INT(statistics, o); SERV_GET_INT(xfrd_reload_timeout, o); + SERV_GET_BIN(xfrd_conn_reuse, o); SERV_GET_INT(verbosity, o); SERV_GET_INT(send_buffer_size, o); SERV_GET_INT(receive_buffer_size, o); @@ -547,6 +549,7 @@ config_test_print_server(nsd_options_type* opt) printf("\ttcp-count: %d\n", opt->tcp_count); printf("\ttcp-query-count: %d\n", opt->tcp_query_count); printf("\ttcp-timeout: %d\n", opt->tcp_timeout); + printf("\ttcp-idle-timeout: %d\n", opt->tcp_idle_timeout); printf("\ttcp-mss: %d\n", opt->tcp_mss); printf("\toutgoing-tcp-mss: %d\n", opt->outgoing_tcp_mss); printf("\tipv4-edns-size: %d\n", (int) opt->ipv4_edns_size); @@ -561,6 +564,7 @@ config_test_print_server(nsd_options_type* opt) print_string_var("zonelistfile:", opt->zonelistfile); print_string_var("xfrdir:", opt->xfrdir); printf("\txfrd-reload-timeout: %d\n", opt->xfrd_reload_timeout); + printf("\txfrd-conn-reuse: %s\n", opt->xfrd_conn_reuse?"yes":"no"); printf("\tlog-time-ascii: %s\n", opt->log_time_ascii?"yes":"no"); printf("\tround-robin: %s\n", opt->round_robin?"yes":"no"); printf("\tminimal-responses: %s\n", opt->minimal_responses?"yes":"no"); diff --git a/nsd.c b/nsd.c index be43cae57..183458e99 100644 --- a/nsd.c +++ b/nsd.c @@ -1069,6 +1069,8 @@ main(int argc, char *argv[]) nsd.maximum_tcp_count = nsd.options->tcp_count; } nsd.tcp_timeout = nsd.options->tcp_timeout; + nsd.tcp_idle_timeout = nsd.options->tcp_idle_timeout; + nsd.xfrd_conn_reuse = nsd.options->xfrd_conn_reuse; nsd.tcp_query_count = nsd.options->tcp_query_count; nsd.tcp_mss = nsd.options->tcp_mss; nsd.outgoing_tcp_mss = nsd.options->outgoing_tcp_mss; diff --git a/nsd.conf.5.in b/nsd.conf.5.in index 851c6de83..41712961f 100644 --- a/nsd.conf.5.in +++ b/nsd.conf.5.in @@ -280,6 +280,11 @@ Default is 0, meaning there is no maximum. Overrides the default TCP timeout. This also affects zone transfers over TCP. The default is 120 seconds. .TP +.B tcp\-idle\-timeout:\fR +Overrides the default TCP idle timeout. This is used when closing outgoing +TCP connections used for zone transfers when the xfrd\-conn\-reuse option is enabled. +The default is 10 seconds. +.TP .B tcp-mss:\fR Maximum segment size (MSS) of TCP socket on which the server responds to queries. Value lower than common MSS on Ethernet @@ -370,6 +375,12 @@ transfer, then it will wait for the number of seconds before it will trigger a new reload. Setting this value throttles the reloads to once per the number of seconds. The default is 1 second. .TP +.B xfrd\-conn\-reuse:\fR +When making outgoing XFR requests to the same master an open TCP connection +will be used in preference to opening a new connection for each request. +After all transfers complete, connections will be left open for +tcp\-idle\-timeout seconds to increase the chance of reuse. Default is no. +.TP .B verbosity:\fR This value specifies the verbosity level for (non\-debug) logging. Default is 0. 1 gives more information about incoming notifies and diff --git a/nsd.conf.sample.in b/nsd.conf.sample.in index 8fb0ad88c..62b1872e8 100644 --- a/nsd.conf.sample.in +++ b/nsd.conf.sample.in @@ -166,6 +166,10 @@ server: # Override the default (120 seconds) TCP timeout. # tcp-timeout: 120 + # Override the default (10 seconds) TCP idle timeout, used when + # closing outgoing XFR TCP connections when xfrd-conn-reuse is enabled. + # tcp-idle-timeout: 10 + # Maximum segment size (MSS) of TCP socket on which the server # responds to queries. Default is 0, system default MSS. # tcp-mss: 0 @@ -187,6 +191,10 @@ server: # Number of seconds between reloads triggered by xfrd. # xfrd-reload-timeout: 1 + # Prefer to reuse open connections to a master instead of opening + # a new connection for each transfer request to that master. + # xfrd-conn-reuse: no + # log timestamp in ascii (y-m-d h:m:s.msec), yes is default. # log-time-ascii: yes diff --git a/nsd.h b/nsd.h index 86b8ad66e..1b6b9350d 100644 --- a/nsd.h +++ b/nsd.h @@ -267,6 +267,9 @@ struct nsd int current_tcp_count; int tcp_query_count; int tcp_timeout; + int tcp_idle_timeout; + int xfrd_conn_reuse; + int tcp_mss; int outgoing_tcp_mss; size_t ipv4_edns_size; diff --git a/options.c b/options.c index 7e7d823c1..c8dca8153 100644 --- a/options.c +++ b/options.c @@ -79,6 +79,7 @@ nsd_options_create(region_type* region) opt->tcp_reject_overflow = 0; opt->tcp_query_count = 0; opt->tcp_timeout = TCP_TIMEOUT; + opt->tcp_idle_timeout = TCP_IDLE_TIMEOUT; opt->tcp_mss = 0; opt->outgoing_tcp_mss = 0; opt->ipv4_edns_size = EDNS_MAX_MESSAGE_LEN; @@ -122,6 +123,7 @@ nsd_options_create(region_type* region) opt->zonefiles_write = ZONEFILES_WRITE_INTERVAL; else opt->zonefiles_write = 0; opt->xfrd_reload_timeout = 1; + opt->xfrd_conn_reuse = 0; opt->tls_service_key = NULL; opt->tls_service_ocsp = NULL; opt->tls_service_pem = NULL; diff --git a/options.h b/options.h index 943620f3b..09b11f951 100644 --- a/options.h +++ b/options.h @@ -87,6 +87,7 @@ struct nsd_options { int confine_to_zone; int tcp_query_count; int tcp_timeout; + int tcp_idle_timeout; int tcp_mss; int outgoing_tcp_mss; size_t ipv4_edns_size; @@ -102,6 +103,7 @@ struct nsd_options { const char* zonelistfile; const char* nsid; int xfrd_reload_timeout; + int xfrd_conn_reuse; int zonefiles_check; int zonefiles_write; int log_time_ascii; diff --git a/tpkg/checkconf.tdir/checkconf.check b/tpkg/checkconf.tdir/checkconf.check index 6565eaf79..cbcf32aa7 100644 --- a/tpkg/checkconf.tdir/checkconf.check +++ b/tpkg/checkconf.tdir/checkconf.check @@ -24,6 +24,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -38,6 +39,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -135,6 +137,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -149,6 +152,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -199,6 +203,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -213,6 +218,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: no round-robin: no minimal-responses: no @@ -272,6 +278,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -286,6 +293,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -389,6 +397,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -403,6 +412,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no diff --git a/tpkg/checkconf.tdir/checkconf.check2 b/tpkg/checkconf.tdir/checkconf.check2 index 6565eaf79..cbcf32aa7 100644 --- a/tpkg/checkconf.tdir/checkconf.check2 +++ b/tpkg/checkconf.tdir/checkconf.check2 @@ -24,6 +24,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -38,6 +39,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -135,6 +137,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -149,6 +152,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -199,6 +203,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -213,6 +218,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: no round-robin: no minimal-responses: no @@ -272,6 +278,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -286,6 +293,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no @@ -389,6 +397,7 @@ server: tcp-count: 100 tcp-query-count: 0 tcp-timeout: 120 + tcp-idle-timeout: 10 tcp-mss: 0 outgoing-tcp-mss: 0 ipv4-edns-size: 4096 @@ -403,6 +412,7 @@ server: zonelistfile: "/var/db/nsd/zone.list" xfrdir: "/tmp" xfrd-reload-timeout: 1 + xfrd-conn-reuse: no log-time-ascii: yes round-robin: no minimal-responses: no diff --git a/xfrd-tcp.c b/xfrd-tcp.c index d00c13b75..262f8aa04 100644 --- a/xfrd-tcp.c +++ b/xfrd-tcp.c @@ -296,27 +296,45 @@ tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone) (void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->node); } +/* determine if pipeline is actually in use*/ +static int +tcp_pipe_in_use(struct xfrd_tcp_pipeline* tp) { + /* Check there are no active transfers (outstanding query ids) but we + need to ignore any outstanding responses for skipped transactions + - in other check we do not have at least one 'nonskip' ID */ + /* NOTE: There is a minor question if a tcp connection still with skipped + transactions is truely 'idle', but for our purposes we are going to treat + it as if it is...*/ + return (ID_PIPE_NUM - tp->num_unused > tp->num_skip)?1:0; +} + /* stop the tcp pipe (and all its zones need to retry) */ static void xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp) { int i, conn = -1; - assert(tp->num_unused < ID_PIPE_NUM); /* at least one 'in-use' */ - assert(ID_PIPE_NUM - tp->num_unused > tp->num_skip); /* at least one 'nonskip' */ - /* need to retry for all the zones connected to it */ - /* these could use different lists and go to a different nextmaster*/ - for(i=0; iid[i] && tp->id[i] != TCP_NULL_SKIP) { - xfrd_zone_type* zone = tp->id[i]; - conn = zone->tcp_conn; - zone->tcp_conn = -1; - zone->tcp_waiting = 0; - tcp_pipe_sendlist_remove(tp, zone); - tcp_pipe_id_remove(tp, zone); - xfrd_set_refresh_now(zone); - } - } - assert(conn != -1); + /* With connections now left open when (effectively) idle, it is possible to + arrive here because the far end shuts the idle connection (causing a read + event with no data). So just warn if we get here while the pipe is still + in use */ + if (tcp_pipe_in_use(tp)) { + log_msg(LOG_WARNING, "xfrd: an in use TCP connection was closed by the" + "far end, retrying all zones"); + /* need to retry for all the zones connected to it */ + /* these could use different lists and go to a different nextmaster*/ + for(i=0; iid[i] && tp->id[i] != TCP_NULL_SKIP) { + xfrd_zone_type* zone = tp->id[i]; + conn = zone->tcp_conn; + zone->tcp_conn = -1; + zone->tcp_waiting = 0; + tcp_pipe_sendlist_remove(tp, zone); + tcp_pipe_id_remove(tp, zone); + xfrd_set_refresh_now(zone); + } + } + assert(conn != -1); + } /* now release the entire tcp pipe */ xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn); } @@ -326,7 +344,12 @@ tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp) { int fd = tp->handler.ev_fd; struct timeval tv; - tv.tv_sec = xfrd->tcp_set->tcp_timeout; + /* pipe is unused - for now set a fixed idle timeout until EDNS0 + Keepalive is implemented */ + if(!tcp_pipe_in_use(tp)) + tv.tv_sec = xfrd->tcp_set->tcp_idle_timeout; + else + tv.tv_sec = xfrd->tcp_set->tcp_timeout; tv.tv_usec = 0; if(tp->handler_added) event_del(&tp->handler); @@ -360,8 +383,16 @@ xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg) } if((event & EV_TIMEOUT) && tp->handler_added) { /* tcp connection timed out */ - DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout")); - xfrd_tcp_pipe_stop(tp); + /* pipe is unused (timeout triggered while idle), just release it but + note that we don't have access to the conn for the pipe from here */ + if(!tcp_pipe_in_use(tp)){ + DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: idle pipeline had tcp timeout" + "event")); + xfrd_tcp_pipe_release(xfrd->tcp_set, tp, -1); + } + else + /* timed out while in use, do cleanup before releasing */ + xfrd_tcp_pipe_stop(tp); } } @@ -400,61 +431,12 @@ pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp, } } -void -xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone) -{ - struct xfrd_tcp_pipeline* tp; - assert(zone->tcp_conn == -1); - assert(zone->tcp_waiting == 0); - if(set->tcp_count < XFRD_MAX_TCP) { - int i; - assert(!set->tcp_waiting_first); - set->tcp_count ++; - /* find a free tcp_buffer */ - for(i=0; itcp_state[i]->tcp_r->fd == -1) { - zone->tcp_conn = i; - break; - } - } - /** What if there is no free tcp_buffer? return; */ - if (zone->tcp_conn < 0) { - return; - } - - tp = set->tcp_state[zone->tcp_conn]; - zone->tcp_waiting = 0; - - /* stop udp use (if any) */ - if(zone->zone_handler.ev_fd != -1) - xfrd_udp_release(zone); - - if(!xfrd_tcp_open(set, tp, zone)) { - zone->tcp_conn = -1; - set->tcp_count --; - xfrd_set_refresh_now(zone); - return; - } - /* ip and ip_len set by tcp_open */ - tp->node.key = tp; - tp->num_unused = ID_PIPE_NUM; - tp->num_skip = 0; - tp->tcp_send_first = NULL; - tp->tcp_send_last = NULL; - memset(tp->id, 0, sizeof(tp->id)); - for(i=0; iunused[i] = i; - } +static struct xfrd_tcp_pipeline* +xfrd_tcp_find_pipeline(struct xfrd_tcp_set* set, xfrd_zone_type* zone) { - /* insert into tree */ - (void)rbtree_insert(set->pipetree, &tp->node); - xfrd_deactivate_zone(zone); - xfrd_unset_timer(zone); - pipeline_setup_new_zone(set, tp, zone); - return; - } - /* check for a pipeline to the same master with unused ID */ + struct xfrd_tcp_pipeline* tp; + /* check for a pipeline to the same master with unused ID */ if((tp = pipeline_find(set, zone))!= NULL) { int i; if(zone->zone_handler.ev_fd != -1) @@ -466,9 +448,92 @@ xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone) xfrd_deactivate_zone(zone); xfrd_unset_timer(zone); pipeline_setup_new_zone(set, tp, zone); - return; + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "xfrd: zone %s is re-using tcp conn pipeline %d", + zone->apex_str, zone->tcp_conn)); + return tp; } + return NULL; +} + +static struct xfrd_tcp_pipeline* +xfrd_tcp_new_pipeline(struct xfrd_tcp_set* set, xfrd_zone_type* zone) { + struct xfrd_tcp_pipeline* tp; + /* Open a new pipeline if one is available*/ + if(set->tcp_count < XFRD_MAX_TCP) { + int i; + assert(!set->tcp_waiting_first); + set->tcp_count ++; + /* find a free tcp_buffer */ + for(i=0; itcp_state[i]->tcp_r->fd == -1) { + zone->tcp_conn = i; + break; + } + } + /** What if there is no free tcp_buffer? return; */ + if (zone->tcp_conn < 0) { + zone->tcp_conn = -1; + set->tcp_count --; + xfrd_set_refresh_now(zone); + return NULL; + } + + tp = set->tcp_state[zone->tcp_conn]; + zone->tcp_waiting = 0; + + /* stop udp use (if any) */ + if(zone->zone_handler.ev_fd != -1) + xfrd_udp_release(zone); + + if(!xfrd_tcp_open(set, tp, zone)) { + zone->tcp_conn = -1; + set->tcp_count --; + xfrd_set_refresh_now(zone); + return NULL; + } + /* ip and ip_len set by tcp_open */ + tp->node.key = tp; + tp->num_unused = ID_PIPE_NUM; + tp->num_skip = 0; + tp->tcp_send_first = NULL; + tp->tcp_send_last = NULL; + memset(tp->id, 0, sizeof(tp->id)); + for(i=0; iunused[i] = i; + } + + /* insert into tree */ + (void)rbtree_insert(set->pipetree, &tp->node); + xfrd_deactivate_zone(zone); + xfrd_unset_timer(zone); + pipeline_setup_new_zone(set, tp, zone); + DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s opened a new tcp conn num %d with fd %d", + zone->apex_str, zone->tcp_conn, set->tcp_state[zone->tcp_conn]->tcp_r->fd)); + return tp; + } + return NULL; +} + +void +xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone) +{ + struct xfrd_tcp_pipeline* tp; + assert(zone->tcp_conn == -1); + assert(zone->tcp_waiting == 0); + + DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: connection reuse is set to %s", + set->xfrd_conn_reuse?"yes":"no")); + + /* Select an existing or new pipeline first depending on the configuration*/ + tp = set->xfrd_conn_reuse?xfrd_tcp_find_pipeline(set, zone): + xfrd_tcp_new_pipeline(set, zone); + if (tp != NULL) return; + tp = set->xfrd_conn_reuse?xfrd_tcp_new_pipeline(set, zone): + xfrd_tcp_find_pipeline(set, zone); + if (tp != NULL) return; + /* wait, at end of line */ DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp " "connections (%d) reached.", XFRD_MAX_TCP)); @@ -908,7 +973,7 @@ xfrd_tcp_read(struct xfrd_tcp_pipeline* tp) xfrd_make_request(zone); break; } - xfrd_tcp_release(xfrd->tcp_set, zone); + xfrd_tcp_release(xfrd->tcp_set, zone); assert(zone->round_num == -1); break; case xfrd_packet_notimpl: @@ -935,8 +1000,9 @@ xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone) { int conn = zone->tcp_conn; struct xfrd_tcp_pipeline* tp = set->tcp_state[conn]; - DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s", - zone->apex_str, zone->master->ip_address_spec)); + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "xfrd: zone %s mapping to tcp conn %d (%s) is released", + zone->apex_str, zone->tcp_conn, zone->master->ip_address_spec)); assert(zone->tcp_conn != -1); assert(zone->tcp_waiting == 0); zone->tcp_conn = -1; @@ -947,8 +1013,9 @@ xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone) /* remove it from the ID list */ if(tp->id[zone->query_id] != TCP_NULL_SKIP) tcp_pipe_id_remove(tp, zone); - DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused", - tp->num_unused)); + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "xfrd: zone %s query id removed from pipeline, %d unused ids", + zone->apex_str, tp->num_unused)); /* if pipe was full, but no more, then see if waiting element is * for the same master, and can fill the unused ID */ if(tp->num_unused == 1 && set->tcp_waiting_first) { @@ -974,16 +1041,22 @@ xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone) /* waiting zone did not go to same server */ } - /* if all unused, or only skipped leftover, close the pipeline */ - if(tp->num_unused >= ID_PIPE_NUM || tp->num_skip >= ID_PIPE_NUM - tp->num_unused) - xfrd_tcp_pipe_release(set, tp, conn); + if (!set->xfrd_conn_reuse) { + /* if all unused, or only skipped leftover, close the pipeline */ + if(!tcp_pipe_in_use(tp)) + xfrd_tcp_pipe_release(set, tp, conn); + } else { + tcp_pipe_reset_timeout(tp); + } } void xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp, int conn) { - DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released")); + + DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipeline %d being released - has fd %d", + conn, tp->tcp_r->fd)); /* one handler per tcp pipe */ if(tp->handler_added) event_del(&tp->handler); @@ -998,6 +1071,20 @@ xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp, /* remove from pipetree */ (void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->node); + /* this signals the pipe should not be reused e.g., if we are shutting down */ + if (conn == -2) + return; + + /* the calling function didn't know the conn (e.g. timeout) + find the connection for this pipeline so it can be re-used*/ + if (conn == -1) { + for(int i=0; itcp_state[i] == tp) + conn = i; + } + } + assert(conn >= 0); + /* a waiting zone can use the free tcp slot (to another server) */ /* if that zone fails to set-up or connect, we try to start the next * waiting zone in the list */ @@ -1043,6 +1130,8 @@ xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp, /* no task to start, cleanup */ assert(!set->tcp_waiting_first); set->tcp_count --; + DEBUG(DEBUG_XFRD,1, (LOG_INFO, + "xfrd: %d tcp pipelines in use", set->tcp_count)); assert(set->tcp_count >= 0); } diff --git a/xfrd-tcp.h b/xfrd-tcp.h index fd6918768..870e7e306 100644 --- a/xfrd-tcp.h +++ b/xfrd-tcp.h @@ -33,6 +33,10 @@ struct xfrd_tcp_set { int tcp_count; /* TCP timeout. */ int tcp_timeout; + /* TCP idle timeout. */ + int tcp_idle_timeout; + /* TCP connection reuse. */ + int xfrd_conn_reuse; /* rbtree with pipelines sorted by master */ rbtree_type* pipetree; /* double linked list of zones waiting for a TCP connection */ diff --git a/xfrd.c b/xfrd.c index 65d6d955a..56969ef68 100644 --- a/xfrd.c +++ b/xfrd.c @@ -198,6 +198,8 @@ xfrd_init(int socket, struct nsd* nsd, int shortsoa, int reload_active, xfrd->tcp_set = xfrd_tcp_set_create(xfrd->region); xfrd->tcp_set->tcp_timeout = nsd->tcp_timeout; + xfrd->tcp_set->tcp_idle_timeout = nsd->tcp_idle_timeout; + xfrd->tcp_set->xfrd_conn_reuse = nsd->xfrd_conn_reuse; #if !defined(HAVE_ARC4RANDOM) && !defined(HAVE_GETRANDOM) srandom((unsigned long) getpid() * (unsigned long) time(NULL)); #endif @@ -369,6 +371,13 @@ xfrd_shutdown() } } close_notify_fds(xfrd->notify_zones); + /* and any open TCP connections so the far end knows we are gone */ + struct xfrd_tcp_pipeline* tp; + for(int i=0; itcp_set->tcp_state[i]; + if (tp->tcp_r->fd != -1) + xfrd_tcp_pipe_release(xfrd->tcp_set, tp, -2); + } /* wait for server parent (if necessary) */ if(xfrd->reload_pid != -1) {