Skip to content

Commit

Permalink
Expose kernel_info_timeout configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
lresende committed Feb 19, 2024
1 parent 7ab3ebe commit 99578cf
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 1 deletion.
4 changes: 4 additions & 0 deletions docs/source/operators/config-add-env.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ Besides those environment variables associated with configurable options, the fo
startup attempt will take place. If a second timeout occurs, Enterprise
Gateway will report a failure to the client.
EG_KERNEL_INFO_TIMEOUT=60
The time (in seconds) Enterprise Gateway will wait for kernel info response
before deeming the request a failure.
EG_SENSITIVE_ENV_KEYS=""
A comma separated list (e.g. "secret,pwd,auth") of sensitive environment
variables. Any environment variables that contain any of the words from this
Expand Down
4 changes: 4 additions & 0 deletions docs/source/users/kernel-envs.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ There are several supported `KERNEL_` variables that the Enterprise Gateway serv
be submitted in the kernel startup if that particular kernel's startup time is
expected to exceed that of the EG_KERNEL_LAUNCH_TIMEOUT set when Enterprise
Gateway starts.
KERNEL_INFO_TIMEOUT=<from user> or EG_KERNEL_INFO_TIMEOUT=60
The time (in seconds) Enterprise Gateway will wait for kernel info response
before deeming the request a failure.
KERNEL_NAMESPACE=<from user> or KERNEL_POD_NAME or EG_NAMESPACE
Kubernetes only. This indicates the name of the namespace to use or create on
Expand Down
6 changes: 6 additions & 0 deletions enterprise_gateway/services/kernels/remotemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ..sessions.kernelsessionmanager import KernelSessionManager

default_kernel_launch_timeout = float(os.getenv("EG_KERNEL_LAUNCH_TIMEOUT", "30"))
default_kernel_info_timeout = float(os.getenv("EG_KERNEL_INFO_TIMEOUT", "60"))
kernel_restart_status_poll_interval = float(os.getenv("EG_RESTART_STATUS_POLL_INTERVAL", 1.0))


Expand Down Expand Up @@ -437,6 +438,7 @@ def __init__(self, **kwargs: dict[str, Any] | None):
self.kernel_id = None
self.user_overrides = {}
self.kernel_launch_timeout = default_kernel_launch_timeout
self.kernel_info_timeout = default_kernel_info_timeout
self.restarting = False # need to track whether we're in a restart situation or not
self._activity_stream = None

Expand Down Expand Up @@ -513,6 +515,10 @@ def _capture_user_overrides(self, **kwargs: dict[str, Any] | None) -> None:
self.kernel_launch_timeout = float(
env.get("KERNEL_LAUNCH_TIMEOUT", default_kernel_launch_timeout)
)
# if KERNEL_INFO_TIMEOUT is passed in the payload, override it.
self.kernel_info_timeout = float(
env.get("KERNEL_INFO_TIMEOUT", default_kernel_info_timeout)
)
self.user_overrides.update(
{
key: value
Expand Down
2 changes: 2 additions & 0 deletions enterprise_gateway/services/processproxies/processproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
redaction_mask = os.getenv("EG_REDACTION_MASK", "********")

default_kernel_launch_timeout = float(os.getenv("EG_KERNEL_LAUNCH_TIMEOUT", "30"))
default_kernel_info_timeout = float(os.getenv("EG_KERNEL_INFO_TIMEOUT", "60"))
max_poll_attempts = int(os.getenv("EG_MAX_POLL_ATTEMPTS", "10"))
poll_interval = float(os.getenv("EG_POLL_INTERVAL", "0.5"))
socket_timeout = float(os.getenv("EG_SOCKET_TIMEOUT", "0.005"))
Expand Down Expand Up @@ -430,6 +431,7 @@ def __init__(self, kernel_manager: RemoteKernelManager, proxy_config: dict): #

self.kernel_id = self.kernel_manager.kernel_id
self.kernel_launch_timeout = default_kernel_launch_timeout
self.kernel_info_timeout = default_kernel_info_timeout
self.lower_port = 0
self.upper_port = 0
self._validate_port_range()
Expand Down
1 change: 1 addition & 0 deletions etc/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ services:
environment:
- "EG_DOCKER_NETWORK=${EG_DOCKER_NETWORK:-enterprise-gateway_enterprise-gateway}"
- "EG_KERNEL_LAUNCH_TIMEOUT=${EG_KERNEL_LAUNCH_TIMEOUT:-60}"
- "EG_KERNEL_INFO_TIMEOUT=${EG_KERNEL_INFO_TIMEOUT:-60}"
- "EG_CULL_IDLE_TIMEOUT=${EG_CULL_IDLE_TIMEOUT:-3600}"
# Use double-defaulting for B/C. Support for EG_KERNEL_WHITELIST will be removed in a future release
- "EG_ALLOWED_KERNELS=${EG_ALLOWED_KERNELS:-${EG_KERNEL_WHITELIST:-'r_docker','python_docker','python_tf_docker','python_tf_gpu_docker','scala_docker'}}"
Expand Down
4 changes: 3 additions & 1 deletion etc/docker/enterprise-gateway/start-enterprise-gateway.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export EG_CULL_CONNECTED=${EG_CULL_CONNECTED:-False}
EG_ALLOWED_KERNELS=${EG_ALLOWED_KERNELS:-${EG_KERNEL_WHITELIST:-"null"}}
export EG_ALLOWED_KERNELS=`echo ${EG_ALLOWED_KERNELS} | sed 's/[][]//g'` # sed is used to strip off surrounding brackets as they should no longer be included.
export EG_DEFAULT_KERNEL_NAME=${EG_DEFAULT_KERNEL_NAME:-python_docker}
export EG_KERNEL_INFO_TIMEOUT=${EG_KERNEL_INFO_TIMEOUT:-60}

# Determine whether the kernels-allowed list should be added to the start command.
# This is conveyed via a 'null' value for the env - which indicates no kernel names
Expand All @@ -46,4 +47,5 @@ exec jupyter enterprisegateway \
--RemoteMappingKernelManager.cull_idle_timeout=${EG_CULL_IDLE_TIMEOUT} \
--RemoteMappingKernelManager.cull_interval=${EG_CULL_INTERVAL} \
--RemoteMappingKernelManager.cull_connected=${EG_CULL_CONNECTED} \
--RemoteMappingKernelManager.default_kernel_name=${EG_DEFAULT_KERNEL_NAME}
--RemoteMappingKernelManager.default_kernel_name=${EG_DEFAULT_KERNEL_NAME} \
--RemoteMappingKernelManager.kernel_info_timeout=${EG_KERNEL_INFO_TIMEOUT}
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ spec:
value: {{ .Values.logLevel }}
- name: EG_KERNEL_LAUNCH_TIMEOUT
value: !!str {{ .Values.kernel.launchTimeout }}
- name: EG_KERNEL_INFO_TIMEOUT
value: !!str {{ .Values.kernel.infoTimeout }}
- name: EG_ALLOWED_KERNELS
value: {{ toJson .Values.kernel.allowedKernels | squote }}
- name: EG_DEFAULT_KERNEL_NAME
Expand Down

0 comments on commit 99578cf

Please sign in to comment.