Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[21.05] zebra restart tuning and keepalived integration #1097

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions nixos/roles/router/keepalived/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ let
'';
};

checkZebraLiveness= fclib.writeShellApplication {
name = "check-zebra-liveness";
runtimeInputs = with pkgs; [ systemd ];
text = ''
systemctl -q is-active zebra
'';
};

keepalivedConf = pkgs.writeText "keepalived.conf" ''
global_defs {
enable_script_security
Expand Down Expand Up @@ -54,6 +62,7 @@ lib.mkIf role.enable {

environment.etc."keepalived/check-default-route-v4".source = "${checkDefaultRoute4}/bin/check-default-route-v4";
environment.etc."keepalived/check-default-route-v6".source = "${checkDefaultRoute6}/bin/check-default-route-v6";
environment.etc."keepalived/check-zebra-liveness".source = "${checkZebraLiveness}/bin/check-zebra-liveness";
environment.etc."keepalived/fc-keepalived".source = "${pkgs.fc.agent}/bin/fc-keepalived";
environment.etc."keepalived/keepalived.conf".source = keepalivedConf;

Expand Down
8 changes: 8 additions & 0 deletions nixos/roles/router/keepalived/dev.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ vrrp_script check_default_route_v6 {
rise 2
}

vrrp_script check_zebra_liveness {
script "/etc/keepalived/check-zebra-liveness"
interval 1
fall 2
rise 2
}

track_file check_stop_file {
# Allow admins to locally send keepalive into FAIL state by adding
# a non "0" entry
Expand Down Expand Up @@ -39,6 +46,7 @@ vrrp_sync_group router {
track_script {
check_default_route_v4 weight 10
check_default_route_v6 weight 5
check_zebra_liveness
}

track_file {
Expand Down
8 changes: 8 additions & 0 deletions nixos/roles/router/keepalived/rzob.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ vrrp_script check_default_route_v6 {
rise 2
}

vrrp_script check_zebra_liveness {
script "/etc/keepalived/check-zebra-liveness"
interval 1
fall 2
rise 2
}

track_file check_stop_file {
# Allow admins to locally send keepalive into FAIL state by adding
# a non "0" entry
Expand Down Expand Up @@ -39,6 +46,7 @@ vrrp_sync_group router {
track_script {
check_default_route_v4 weight 10
check_default_route_v6 weight 5
check_zebra_liveness
}

track_file {
Expand Down
127 changes: 0 additions & 127 deletions nixos/roles/router/keepalived/rzrl1.conf

This file was deleted.

8 changes: 8 additions & 0 deletions nixos/roles/router/keepalived/whq.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ vrrp_script check_dev_route_v6 {
rise 2
}

vrrp_script check_zebra_liveness {
script "/etc/keepalived/check-zebra-liveness"
interval 1
fall 2
rise 2
}

track_file check_stop_file {
# Allow admins to locally send keepalive into FAIL state by adding
# a non "0" entry
Expand Down Expand Up @@ -39,6 +46,7 @@ vrrp_sync_group router {
track_script {
check_dev_route_v4 weight 10
check_dev_route_v6 weight 5
check_zebra_liveness
}

track_file {
Expand Down
17 changes: 15 additions & 2 deletions nixos/services/frr.nix
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,19 @@ in
description = if service == "zebra" then "FRR Zebra routing manager"
else "FRR ${toUpper service} routing daemon";

unitConfig.Documentation = if service == "zebra" then "man:zebra(8)"
else "man:${daemon}(8) man:zebra(8)";
unitConfig = {
Documentation = if service == "zebra" then "man:zebra(8)"
else "man:${daemon}(8) man:zebra(8)";
} // optionalAttrs (service == "zebra") {
# More generous restart limits for zebra to better recover from
# crash situations. We set the restart interval below to 10
# seconds. Give zebra up to 5m20s of grace time, and accommodate
# up to 20 restarts. 20 restarts in 5m20s gives up to 16 seconds
# per start attempt. If zebra restarts in a tight loop, the
# start limit will be triggered after 3m20s.
StartLimitInterval = 320;
StartLimitBurst = 20;
};

# We want to refactor this into reloadTriggers when upgrading to
# 24.05
Expand All @@ -218,6 +229,8 @@ in
+ " " + (concatStringsSep " " scfg.extraOptions);
ExecReload = "${pkgs.python3.interpreter} ${pkgs.frr}/libexec/frr/frr-reload.py --reload --daemon ${daemonName service} --bindir ${pkgs.frr}/bin --rundir /run/frr /etc/frr/${service}.conf";
Restart = "always";
} // optionalAttrs (service == "zebra") {
RestartSec = 10;
};
});
in
Expand Down
Loading