From 01113c827039ee8820df432caf11900ec3b9be9a Mon Sep 17 00:00:00 2001 From: Travis Glenn Hansen Date: Tue, 1 Dec 2020 16:52:59 -0700 Subject: [PATCH] attempt to fix missing group behavior for iscsi target creation --- src/driver/freenas/index.js | 139 ++++++++++++++++++++++++++++++++++++ src/driver/index.js | 20 +++++- 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/src/driver/freenas/index.js b/src/driver/freenas/index.js index 813a3c4..80b7e27 100644 --- a/src/driver/freenas/index.js +++ b/src/driver/freenas/index.js @@ -1,6 +1,7 @@ const { ControllerZfsSshBaseDriver } = require("../controller-zfs-ssh"); const { GrpcError, grpc } = require("../../utils/grpc"); const HttpClient = require("./http").Client; +const sleep = require("../../utils/general").sleep; const Handlebars = require("handlebars"); @@ -955,6 +956,47 @@ class FreeNASDriver extends ControllerZfsSshBaseDriver { ); } + // handle situations/race conditions where groups failed to be added/created on the target + // groups":[{"portal":1,"initiator":1,"auth":null,"authmethod":"NONE"},{"portal":2,"initiator":1,"auth":null,"authmethod":"NONE"}] + // TODO: this logic could be more intelligent but this should do for now as it appears in the failure scenario no groups are added + // in other words, I have never seen them invalid, only omitted so this should be enough + if (target.groups.length != targetGroups.length) { + response = await httpClient.put(`/iscsi/target/id/${target.id}`, { + groups: targetGroups, + }); + + if (response.statusCode != 200) { + throw new GrpcError( + grpc.status.UNKNOWN, + `failed setting target groups` + ); + } else { + target = response.body; + + // re-run sanity checks + if (!target) { + throw new GrpcError( + grpc.status.UNKNOWN, + `unknown error creating iscsi target` + ); + } + + if (target.name != iscsiName) { + throw new GrpcError( + grpc.status.UNKNOWN, + `mismatch name error creating iscsi target` + ); + } + + if (target.groups.length != targetGroups.length) { + throw new GrpcError( + grpc.status.UNKNOWN, + `failed setting target groups` + ); + } + } + } + this.ctx.logger.verbose("FreeNAS ISCSI TARGET: %j", target); // set target.id on zvol @@ -1524,6 +1566,103 @@ class FreeNASDriver extends ControllerZfsSshBaseDriver { } } + async failedAttachHelper(call, err) { + const driverShareType = this.getDriverShareType(); + const sshClient = this.getSshClient(); + let response; + + // not fully implemented + return; + + switch (driverShareType) { + case "iscsi": + const isScale = await this.getIsScale(); + const majorMinor = await this.getSystemVersionMajorMinor(); + + // only works for BSD-based and 11.3+ + if (!isScale && majorMinor >= 11.3) { + const sudoEnabled = this.getSudoEnabled(); + const sudoPath = await this.getSudoPath(); + let command; + + //19 - encountered non-retryable iSCSI login failure + // ^ could be missing groups on the target + + //cat /var/run/ctld.pid + // ps -p | grep ctld + // ps -p `cat /var/run/ctld.pid` | grep ctld (if 0 exit status it's running, otherwise no) + + // random settle time + // this could be getting invoked by other instances of the same controller + // or other deployments of controllers in the same of different clusters + // altogether + let maxSettleTime = 10000; + let settleTime = Math.floor(Math.random() * maxSettleTime + 1); + await sleep(settleTime); + + // test if config is bad + // if so regen + command = sshClient.buildCommand("/usr/sbin/ctld", ["-d"]); + if (sudoEnabled) { + command = sudoPath + " " + command; + } + + this.ctx.logger.verbose("FailedAttachHelper command: %s", command); + + response = await sshClient.exec(command); + let configError = false; + let serviceRunning = false; + if (response.stderr.includes("configuration error")) { + configError = true; + } + + // NOTE: this will not be in the output if the config file has an error + if (response.stderr.includes("daemon already running")) { + serviceRunning = true; + } + + if (configError) { + this.ctx.logger.warn( + "FailedAttachHelper: ctld appears to have a bad configuration file, attempting to regenerate" + ); + // regen config + // midclt call etc.generate ctld + command = sshClient.buildCommand("midclt", [ + "call", + "etc.generate", + "ctld", + ]); + if (sudoEnabled) { + command = sudoPath + " " + command; + } + + this.ctx.logger.verbose("FailedAttachHelper command: %s", command); + response = await sshClient.exec(command); + + // reload service (may not be enough) + command = sshClient.buildCommand("/etc/rc.d/ctld", ["reload"]); + if (sudoEnabled) { + command = sudoPath + " " + command; + } + + this.ctx.logger.verbose("FailedAttachHelper command: %s", command); + response = await sshClient.exec(command); + + } + + // note, when the 'bad' state is entered, the status still shows as running + // check if service is running + // /etc/rc.d/ctld status ...exits 0 if running + //command = sshClient.buildCommand("/etc/rc.d/ctld", ["reload"]); + + // if service is not running attempt a restart + // /etc/rc.d/ctld restart + //command = sshClient.buildCommand("/etc/rc.d/ctld", ["reload"]); + } + break; + } + } + async getApiVersion() { const systemVersion = await this.getSystemVersion(); diff --git a/src/driver/index.js b/src/driver/index.js index a5ad8fa..27bc506 100644 --- a/src/driver/index.js +++ b/src/driver/index.js @@ -356,7 +356,15 @@ class CsiBaseDriver { nodeDB ); // login - await iscsi.iscsiadm.login(volume_context.iqn, portal); + try { + await iscsi.iscsiadm.login(volume_context.iqn, portal); + } catch (err) { + if (typeof this.failedAttachHelper === "function") { + // no need to await this + this.failedAttachHelper(call, err); + } + throw err; + } // find device name device = `/dev/disk/by-path/ip-${portal}-iscsi-${volume_context.iqn}-lun-${volume_context.lun}`; @@ -378,6 +386,16 @@ class CsiBaseDriver { let current_time = Math.round(new Date().getTime() / 1000); if (!result && current_time - timer_start > timer_max) { + if (typeof this.failedAttachHelper === "function") { + // no need to await this + this.failedAttachHelper( + call, + new Error( + `hit timeout waiting for device node to appear: ${device}` + ) + ); + } + driver.ctx.logger.warn( `hit timeout waiting for device node to appear: ${device}` );