run_operator_locally.sh: more retrying, debuggability (#2218)
actually retry kubectl port-forward and better messages
This commit is contained in:
parent
1e64ae788e
commit
ec701f6d30
|
|
@ -30,8 +30,8 @@ function retry(){
|
||||||
local -r retry_cmd="$1"
|
local -r retry_cmd="$1"
|
||||||
local -r retry_msg="$2"
|
local -r retry_msg="$2"
|
||||||
|
|
||||||
# times out after 1 minute
|
# Time out after three minutes.
|
||||||
for i in {1..20}; do
|
for i in {1..60}; do
|
||||||
if eval "$retry_cmd"; then
|
if eval "$retry_cmd"; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
@ -165,11 +165,63 @@ function forward_ports(){
|
||||||
local operator_pod
|
local operator_pod
|
||||||
operator_pod=$(kubectl get pod -l name=postgres-operator -o jsonpath={.items..metadata.name})
|
operator_pod=$(kubectl get pod -l name=postgres-operator -o jsonpath={.items..metadata.name})
|
||||||
|
|
||||||
# runs in the background to keep current terminal responsive
|
# Spawn `kubectl port-forward` in the background to keep current terminal
|
||||||
# stdout redirect removes the info message about forwarded ports; the message sometimes garbles the cli prompt
|
# responsive. Hide stdout because otherwise there is a note about each TCP
|
||||||
kubectl port-forward "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" &> /dev/null &
|
# connection. Do not hide stderr so port-forward setup errors can be
|
||||||
|
# debugged. Sometimes the port-forward setup fails because expected k8s
|
||||||
|
# state isn't achieved yet. Try to detect that case and then run the
|
||||||
|
# command again (in a finite loop).
|
||||||
|
for _attempt in {1..20}; do
|
||||||
|
# Delay between retry attempts. First attempt should already be
|
||||||
|
# delayed.
|
||||||
|
echo "soon: invoke kubectl port-forward command (attempt $_attempt)"
|
||||||
|
sleep 5
|
||||||
|
|
||||||
echo $! > "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
|
# With the --pod-running-timeout=4s argument the process is expected
|
||||||
|
# to terminate within about that time if the pod isn't ready yet.
|
||||||
|
kubectl port-forward --pod-running-timeout=4s "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" 1> /dev/null &
|
||||||
|
_kubectl_pid=$!
|
||||||
|
_pf_success=true
|
||||||
|
|
||||||
|
# A successful `kubectl port-forward` setup can pragmatically be
|
||||||
|
# detected with a time-based criterion: it is a long-running process if
|
||||||
|
# successfully set up. If it does not terminate within deadline then
|
||||||
|
# consider the setup successful. Overall, observe the process for
|
||||||
|
# roughly 7 seconds. If it terminates before that it's certainly an
|
||||||
|
# error. If it did not terminate within that time frame then consider
|
||||||
|
# setup successful.
|
||||||
|
for ib in {1..7}; do
|
||||||
|
sleep 1
|
||||||
|
# Portable and non-blocking test: is process still running?
|
||||||
|
if kill -s 0 -- "${_kubectl_pid}" >/dev/null 2>&1; then
|
||||||
|
echo "port-forward process is still running"
|
||||||
|
else
|
||||||
|
# port-forward process seems to have terminated, reap zombie
|
||||||
|
set +e
|
||||||
|
# `wait` is now expected to be non-blocking, and exits with the
|
||||||
|
# exit code of pid (first arg).
|
||||||
|
wait $_kubectl_pid
|
||||||
|
_kubectl_rc=$?
|
||||||
|
set -e
|
||||||
|
echo "port-forward process terminated with exit code ${_kubectl_rc}"
|
||||||
|
_pf_success=false
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ ${_pf_success} = true ]; then
|
||||||
|
echo "port-forward setup seems successful. leave retry loop."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "${_pf_success}" = false ]; then
|
||||||
|
echo "port-forward setup failed after retrying. exit."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "${_kubectl_pid}" > "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue