From 2032c67be002f00638d5af1693d247c97c1aa6dd Mon Sep 17 00:00:00 2001 From: Travis Glenn Hansen Date: Sat, 7 May 2022 08:59:57 -0600 Subject: [PATCH] implement generic retry function, use it to make tests more robust Signed-off-by: Travis Glenn Hansen --- ci/bin/run.ps1 | 13 ++++-- src/driver/controller-zfs-generic/index.js | 43 ++++++++++++++++--- src/driver/controller-zfs/index.js | 36 +++++++--------- src/driver/freenas/api.js | 44 ++++++++++++++++++- src/driver/freenas/ssh.js | 50 +++++++++++++++++++--- src/driver/index.js | 23 ++++++---- src/utils/general.js | 41 ++++++++++++++++++ 7 files changed, 203 insertions(+), 47 deletions(-) diff --git a/ci/bin/run.ps1 b/ci/bin/run.ps1 index 3e7be00..e3d30aa 100644 --- a/ci/bin/run.ps1 +++ b/ci/bin/run.ps1 @@ -58,7 +58,9 @@ while (!(Test-Path "${env:CSI_ENDPOINT}")) { $iter++ Write-Output "Waiting for ${env:CSI_ENDPOINT} to appear" Start-Sleep 1 - Get-Job | Receive-Job + try { + Get-Job | Receive-Job + } catch {} if ($iter -gt $max_iter) { Write-Output "${env:CSI_ENDPOINT} failed to appear" $started = 0 @@ -80,13 +82,18 @@ while ($csi_sanity_job -and ($csi_sanity_job.State -eq "Running" -or $csi_sanity if (($job -eq $csi_grpc_proxy_job) -and ($iter -gt 20)) { continue } + if (!$job.HasMoreData) { + continue + } try { $job | Receive-Job } catch { if ($job.State -ne "Failed") { - Write-Output "failure receiving job data: " + $_ - $job | fl + Write-Output "failure receiving job data: ${_}" + # just swallow the errors as it seems there are various reasons errors + # may show up (perhaps no data currently, etc) + #$job | fl #throw $_ } } diff --git a/src/driver/controller-zfs-generic/index.js b/src/driver/controller-zfs-generic/index.js index cd91b71..4d522b8 100644 --- a/src/driver/controller-zfs-generic/index.js +++ b/src/driver/controller-zfs-generic/index.js @@ -1,9 +1,9 @@ const _ = require("lodash"); const { ControllerZfsBaseDriver } = require("../controller-zfs"); const { GrpcError, grpc } = require("../../utils/grpc"); +const GeneralUtils = require("../../utils/general"); const registry = require("../../utils/registry"); const SshClient = require("../../utils/ssh").SshClient; -const sleep = require("../../utils/general").sleep; const { Zetabyte, ZfsSshProcessManager } = require("../../utils/zfs"); const Handlebars = require("handlebars"); @@ -231,8 +231,12 @@ class ControllerZfsGenericDriver extends ControllerZfsBaseDriver { } } - response = await this.targetCliCommand( - ` + await GeneralUtils.retry( + 3, + 2000, + async () => { + await this.targetCliCommand( + ` # create target cd /iscsi create ${basename}:${iscsiName} @@ -250,6 +254,16 @@ create ${iscsiName} /dev/${extentDiskName} cd /iscsi/${basename}:${iscsiName}/tpg1/luns create /backstores/block/${iscsiName} ` + ); + }, + { + retryCondition: (err) => { + if (err.stdout && err.stdout.includes("Ran out of input")) { + return true; + } + return false; + }, + } ); break; default: @@ -313,7 +327,7 @@ create /backstores/block/${iscsiName} } } } - await sleep(2000); // let things settle + await GeneralUtils.sleep(2000); // let things settle break; default: throw new GrpcError( @@ -343,7 +357,7 @@ create /backstores/block/${iscsiName} } } } - await sleep(2000); // let things settle + await GeneralUtils.sleep(2000); // let things settle break; default: throw new GrpcError( @@ -392,8 +406,12 @@ create /backstores/block/${iscsiName} switch (this.options.iscsi.shareStrategy) { case "targetCli": basename = this.options.iscsi.shareStrategyTargetCli.basename; - response = await this.targetCliCommand( - ` + await GeneralUtils.retry( + 3, + 2000, + async () => { + await this.targetCliCommand( + ` # delete target cd /iscsi delete ${basename}:${iscsiName} @@ -402,7 +420,18 @@ delete ${basename}:${iscsiName} cd /backstores/block delete ${iscsiName} ` + ); + }, + { + retryCondition: (err) => { + if (err.stdout && err.stdout.includes("Ran out of input")) { + return true; + } + return false; + }, + } ); + break; default: break; diff --git a/src/driver/controller-zfs/index.js b/src/driver/controller-zfs/index.js index 6adfa90..ccc5b01 100644 --- a/src/driver/controller-zfs/index.js +++ b/src/driver/controller-zfs/index.js @@ -1318,30 +1318,24 @@ class ControllerZfsBaseDriver extends CsiBaseDriver { // NOTE: -R will recursively delete items + dependent filesets // delete dataset try { - let max_tries = 12; - let sleep_time = 5000; - let current_try = 1; - let success = false; - while (!success && current_try <= max_tries) { - try { + await GeneralUtils.retry( + 12, + 5000, + async () => { await zb.zfs.destroy(datasetName, { recurse: true, force: true }); - success = true; - } catch (err) { - if ( - err.toString().includes("dataset is busy") || - err.toString().includes("target is busy") - ) { - current_try++; - if (current_try > max_tries) { - throw err; - } else { - await GeneralUtils.sleep(sleep_time); + }, + { + retryCondition: (err) => { + if ( + err.toString().includes("dataset is busy") || + err.toString().includes("target is busy") + ) { + return true; } - } else { - throw err; - } + return false; + }, } - } + ); } catch (err) { if (err.toString().includes("filesystem has dependent clones")) { throw new GrpcError( diff --git a/src/driver/freenas/api.js b/src/driver/freenas/api.js index 241ea7e..8788d6a 100644 --- a/src/driver/freenas/api.js +++ b/src/driver/freenas/api.js @@ -261,7 +261,27 @@ class FreeNASApiDriver extends CsiBaseDriver { break; } - response = await httpClient.post("/sharing/nfs", share); + response = await GeneralUtils.retry( + 3, + 1000, + async () => { + return await httpClient.post("/sharing/nfs", share); + }, + { + retryCondition: (err) => { + if (err.code == "ECONNRESET") { + return true; + } + if (err.code == "ECONNABORTED") { + return true; + } + if (err.response && err.response.statusCode == 504) { + return true; + } + return false; + }, + } + ); /** * v1 = 201 @@ -482,7 +502,27 @@ class FreeNASApiDriver extends CsiBaseDriver { break; } - response = await httpClient.post(endpoint, share); + response = await GeneralUtils.retry( + 3, + 1000, + async () => { + return await httpClient.post(endpoint, share); + }, + { + retryCondition: (err) => { + if (err.code == "ECONNRESET") { + return true; + } + if (err.code == "ECONNABORTED") { + return true; + } + if (err.response && err.response.statusCode == 504) { + return true; + } + return false; + }, + } + ); /** * v1 = 201 diff --git a/src/driver/freenas/ssh.js b/src/driver/freenas/ssh.js index 0ce6d1a..249f05d 100644 --- a/src/driver/freenas/ssh.js +++ b/src/driver/freenas/ssh.js @@ -6,7 +6,7 @@ const SshClient = require("../../utils/ssh").SshClient; const HttpClient = require("./http").Client; const TrueNASApiClient = require("./http/api").Api; const { Zetabyte, ZfsSshProcessManager } = require("../../utils/zfs"); -const { sleep, stringify } = require("../../utils/general"); +const GeneralUtils = require("../../utils/general"); const Handlebars = require("handlebars"); @@ -308,7 +308,27 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { break; } - response = await httpClient.post("/sharing/nfs", share); + response = await GeneralUtils.retry( + 3, + 1000, + async () => { + return await httpClient.post("/sharing/nfs", share); + }, + { + retryCondition: (err) => { + if (err.code == "ECONNRESET") { + return true; + } + if (err.code == "ECONNABORTED") { + return true; + } + if (err.response && err.response.statusCode == 504) { + return true; + } + return false; + }, + } + ); /** * v1 = 201 @@ -529,7 +549,27 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { break; } - response = await httpClient.post(endpoint, share); + response = await GeneralUtils.retry( + 3, + 1000, + async () => { + return await httpClient.post(endpoint, share); + }, + { + retryCondition: (err) => { + if (err.code == "ECONNRESET") { + return true; + } + if (err.code == "ECONNABORTED") { + return true; + } + if (err.response && err.response.statusCode == 504) { + return true; + } + return false; + }, + } + ); /** * v1 = 201 @@ -1614,7 +1654,7 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { targetId, retries ); - await sleep(retryWait); + await GeneralUtils.sleep(retryWait); response = await httpClient.delete(endpoint); } @@ -2134,7 +2174,7 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { // likely bad creds/url throw new GrpcError( grpc.status.UNKNOWN, - `FreeNAS error getting system version info: ${stringify({ + `FreeNAS error getting system version info: ${GeneralUtils.stringify({ errors: versionErrors, responses: versionResponses, })}` diff --git a/src/driver/index.js b/src/driver/index.js index 8225ecf..2f82745 100644 --- a/src/driver/index.js +++ b/src/driver/index.js @@ -759,16 +759,21 @@ class CsiBaseDriver { } // create 'DB' entry - await iscsi.iscsiadm.createNodeDBEntry( - iscsiConnection.iqn, - iscsiConnection.portal, - nodeDB - ); + await GeneralUtils.retry(5, 2000, async () => { + await iscsi.iscsiadm.createNodeDBEntry( + iscsiConnection.iqn, + iscsiConnection.portal, + nodeDB + ); + }); + // login - await iscsi.iscsiadm.login( - iscsiConnection.iqn, - iscsiConnection.portal - ); + await GeneralUtils.retry(15, 2000, async () => { + await iscsi.iscsiadm.login( + iscsiConnection.iqn, + iscsiConnection.portal + ); + }); // get associated session let session = await iscsi.iscsiadm.getSession( diff --git a/src/utils/general.js b/src/utils/general.js index 6077802..928d613 100644 --- a/src/utils/general.js +++ b/src/utils/general.js @@ -1,3 +1,4 @@ +const _ = require("lodash"); const axios = require("axios"); const crypto = require("crypto"); @@ -177,6 +178,45 @@ function default_supported_file_filesystems() { return ["nfs", "cifs"]; } +async function retry(retries, retriesDelay, code, options = {}) { + let current_try = 0; + let maxwait = _.get(options, "maxwait"); + let logerrors = _.get(options, "logerrors", false); + let retryCondition = options.retryCondition; + do { + current_try++; + try { + return await code(); + } catch (err) { + if (current_try >= retries) { + throw err; + } + if (retryCondition) { + let retry = retryCondition(err); + if (!retry) { + console.log(`retry - failed condition, not trying again`); + throw err; + } + } + if (logerrors === true) { + console.log(`retry - err:`, err); + } + } + let sleep_time = retriesDelay; + if (_.get(options, "exponential", false) === true) { + sleep_time = retriesDelay * current_try; + } + + if (maxwait) { + if (sleep_time > maxwait) { + sleep_time = maxwait; + } + } + console.log(`retry - waiting ${sleep_time}ms before trying again`); + await sleep(sleep_time); + } while (true); +} + module.exports.sleep = sleep; module.exports.md5 = md5; module.exports.crc32 = crc32; @@ -189,3 +229,4 @@ module.exports.default_supported_block_filesystems = default_supported_block_filesystems; module.exports.default_supported_file_filesystems = default_supported_file_filesystems; +module.exports.retry = retry;