Skip to content

Commit

Permalink
fix: waitting for ofed-driver to be ready
Browse files Browse the repository at this point in the history
Signed-off-by: ty-dc <[email protected]>
  • Loading branch information
ty-dc committed Sep 5, 2024
1 parent 74c40a4 commit f5bc309
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 14 deletions.
4 changes: 2 additions & 2 deletions ofed-driver/chart/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ spec:
startupProbe:
exec:
command:
[sh, -c, 'ls /.driver-ready']
[sh, -c, 'ls ./entrypoint.sh']
initialDelaySeconds: {{ .Values.startupProbe.initialDelaySeconds }}
failureThreshold: 60
successThreshold: 1
Expand All @@ -115,7 +115,7 @@ spec:
command:
[sh, -c, 'lsmod | grep mlx5_core']
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
failureThreshold: 1
failureThreshold: 3
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
volumes:
- name: run-mlnx-ofed
Expand Down
4 changes: 2 additions & 2 deletions ofed-driver/chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ resources:

startupProbe:
# it maybe take long time the build the driver
initialDelaySeconds: 20
initialDelaySeconds: 60
periodSeconds: 20

livenessProbe:
initialDelaySeconds: 30
initialDelaySeconds: 60
periodSeconds: 30

readinessProbe:
Expand Down
21 changes: 12 additions & 9 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,19 @@ deploy: checkBin
done ; \
set -x ; \
helm upgrade --install $(PROJECT) $(ROOT_DIR)/$(PROJECT)/chart --wait --debug --timeout 10m0s -n $(PROJECT) --create-namespace --kubeconfig $(KIND_KUBECONFIG) ; \
for ((N=0;N<=30;N++)); do \
if kubectl get pod -n $(PROJECT) -o wide --kubeconfig $(KIND_KUBECONFIG) | grep $(PROJECT) | sed '1 d' | grep -qv -i -E "Running|Completed"; then \
echo "Waiting for Pods to be ready..."; \
sleep 10; \
else \
echo "All Pods are in Running or Completed state."; \
exit 0 ; \
kubectl --kubeconfig=$(KIND_KUBECONFIG) wait --for=condition=Ready pods --all -n $(PROJECT) --timeout=300s ; \
if [ $$? -eq 0 ]; then \
echo "all pods are ready"; \
exit 0; \
else \
echo "timeout or some pods are not ready"; \
if [ "$(PROJECT)" = "ofed-driver" ]; then \
if kubectl --kubeconfig=$(KIND_KUBECONFIG) get pods -n $(PROJECT) --field-selector=status.phase=Running ; then \
exit 0 ; \
fi ; \
fi ; \
done; \
exit 1 ; \
exit 1; \
fi

.PHONY: checkBin
checkBin:
Expand Down
6 changes: 5 additions & 1 deletion test/scripts/clean.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ echo "KIND_KUBECONFIG: $KIND_KUBECONFIG"
echo "---------- show pods -----------"
kubectl get pod -A -o wide --kubeconfig ${KIND_KUBECONFIG}

FAILED_POD=` kubectl get pod -A -o wide --kubeconfig ${KIND_KUBECONFIG} | sed '1 d' | grep -v -i -E "Running|completed" | awk '{print $1,$2}' | tr ' ' ',' `
FAILED_POD=` kubectl get pod -A -o wide --kubeconfig ${KIND_KUBECONFIG} | sed '1 d' | grep -v -i -E "Running|completed" | awk '{print $1,$2}' | tr ' ' ',' `
for ITEM in ${FAILED_POD} ; do
POD_INFO=`echo ${ITEM} | tr ',' ' '`
echo "------ describe pod $POD_INFO "
kubectl describe pod -n ${POD_INFO} --kubeconfig ${KIND_KUBECONFIG}
echo "------ logs pod $POD_INFO "
kubectl logs -n ${POD_INFO} --kubeconfig ${KIND_KUBECONFIG}
echo "------ logs pod $POD_INFO "
kubectl logs -n ${POD_INFO} --kubeconfig ${KIND_KUBECONFIG} -p
done

ALL_HELM=` helm list -A --kubeconfig ${KIND_KUBECONFIG} | sed '1 d' | awk '{print $2,$1}' | tr ' ' ',' `
Expand Down

0 comments on commit f5bc309

Please sign in to comment.