diff --git a/bin/democratic-csi b/bin/democratic-csi index 3d6b39e..949613b 100755 --- a/bin/democratic-csi +++ b/bin/democratic-csi @@ -486,6 +486,21 @@ const signalMapping = { } } + if (driver && typeof driver.getCleanupHandlers === 'function') { + const cleanup = driver.getCleanupHandlers(); + console.log(`running driver ${cleanup.length} cleanup handlers`); + for (const c of cleanup) { + try { + c(); + } catch (e) { + console.log("cleanup handler failed"); + console.log(e); + } + } + } else { + console.log(`current driver does not support cleanup`); + } + console.log("server fully shutdown, exiting"); process.exit(codeNumber); }); diff --git a/docs/proxy-driver.md b/docs/proxy-driver.md new file mode 100644 index 0000000..70a173d --- /dev/null +++ b/docs/proxy-driver.md @@ -0,0 +1,197 @@ + +# Proxy driver + +Proxy driver allow you to run several drivers in one instance of democratic-csi. + +Usually democratic-csi requires you to run a separate instance for each storage class. +This is heavily implied by the CSI protocol. + +However, it is still _possible_ to have several storage classes work via a single CSI driver. +The `proxy` driver makes the best effort to allow dynamic storage classes. + +This file has user documentation for proxy driver. Technical details that are relevant only for development can be found [here](../src/driver/controller-proxy/compatibility.md). + +## Terminology + +"Proxy" is the driver created via `driver: proxy` in the main config. +Other drivers are referred to as "real driver" or "underlying driver". + +"Connection" is equivalent to a democratic-csi deployment without a proxy. +Each connection has a separate `.yaml` config file. +Each real driver is associated with a single connection name. + +## Compatibility + +Drivers that are tested and work without issues: + +- `freenas-nfs` +- `freenas-iscsi` +- `freenas-api-nfs` +- `freenas-api-iscsi` +- `zfs-generic-nfs` +- `zfs-generic-iscsi` +- `zfs-generic-nvmeof` +- `local-hostpath` + +Drivers that are not tested but should work fine: + +- `freenas-smb` +- `freenas-api-smb` +- `zfs-generic-smb` +- `synology-iscsi` +- `nfs-client` +- `smb-client` +- `lustre-client` +- `node-manual` +- `zfs-local-dataset` +- `zfs-local-zvol` + +Drivers that are known to be incompatible with proxy: + +- `objectivefs` +- `zfs-local-ephemeral-inline` + +All `local` drivers need `proxy.nodeTopology.type == node` to work properly. + +## Config layout + +Like all other drivers, proxy driver needs the main config file. See [proxy.yaml example](../examples/proxy.yaml). +Additionally, proxy needs config files for other drivers to be in the container filesystem. + +Initially proxy doesn't know what real drivers to use. +In Kubernetes you configure real drivers via `parameters.connection` field in a Storage Class. +In other Container Orchestrators look for equivalent settings. + +In the example `proxy.configFolder` value is `/mnt/connections/`. +This means that proxy will look for real driver config files in this folder. + +Config file must have the following name: `.yaml`. + +Connection names are arbitrary, you just need to make sure that name of the config file +matches connection name from the storage class. + +Connection configuration can be added and updated dynamically, +as long as you make sure that files mounted into democratic-csi driver container are updated. + +## Limitations + +Proxy driver has a few limitations. + +Since it provides a common interface for several drivers, these drivers need to be similar enough. +For example, you can't mix `local-hostpath` and `freenas-nfs` drivers, because they simply need different modes of deployment. +Generally, you can mix drivers if it's possible to switch between them by just changing config file. + +Another limitation is connection name length. +Connection name SHOULD be short. It is added as a prefix into Volume Handle value. Volume handles have limited maximum length. +If your volume handle is already very long, adding connection name to it may cause volume creation to fail. +Whether or not this is relevant at all for you depends on your real driver config. +For example, snapshot handles are usually very long. Volume handle length is usually fixed, unless you customized it. + +You will probably be fine when using connection name under 20 symbols. +You can probably use longer names, but the shorter the better. + +Another limitation is that connection name is saved in volumes. +Connection name MUST be immutable. +If you create a volume, and then change connection name, you will get errors at different operations. +You may still be able to mount this volume, but volume size expansion or volume deletion will not work anymore. +It would be analogous to deleting the whole democratic-csi deployment when not using proxy driver. + +If you want to change connection name, you need to add a new config file for new connection, +and create a new storage class that will use this new connection. +You can then delete all volumes that are using the old connection, and only then you can delete the old connection config. + +## Simple k8s example + +Imagine that currently you have several deployments of democratic-csi +with different configs that you want to merge into a single deployment. + +First set democratic-csi [config values for proxy](../examples/proxy.yaml) (here is a minimalistic example): + +```yaml +driver: proxy +proxy: + configFolder: /mnt/connections/ +``` + +Then adjust your helm values for democratic-csi deployment: + +- (optional) delete all built-in storage classes +- (required) add extra volumes to the controller deployment + +```yaml +csiDriver: + name: org.democratic-csi.proxy +controller: + extraVolumes: + - name: connections + secret: + secretName: connections + driver: + extraVolumeMounts: + - name: connections + mountPath: /mnt/connections +``` + +Then create a secret for config files that will contain real driver config later: + +```bash +# don't forget to adjust namespace +kubectl create secret generic connections +``` + +Then you can deploy democratic-csi with proxy driver. +Now you should have an empty deployment that works +but can't create any real drivers or connect to any real backend. + +Let's add connections to proxy: +you need to update the `connections` secret. +Let's say that you need 2 storage classes: `nfs` and `iscsi`. +You need to have 2 separate config files for them. + +```bash +# don't forget to adjust namespace +kubectl create secret generic connections \ + --from-file example-nfs.yaml=./path/to/nfs/config.yaml \ + --from-file example-iscsi.yaml=./path/to/iscsi/config.yaml \ + -o yaml --dry-run=client | kl apply -f - +``` + +As you can see, you don't need to restart democratic-csi to add or update connections. +If you change the `connections` secret too quickly, then you may need to wait a few seconds +for files to get remounted info filesystem, but no restart is needed. + +Then create corresponding storage classes: + +```yaml +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: nfs +provisioner: org.democratic-csi.proxy +reclaimPolicy: Delete +allowVolumeExpansion: true +parameters: + connection: example-nfs + fsType: nfs +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: iscsi +provisioner: org.democratic-csi.proxy +reclaimPolicy: Delete +allowVolumeExpansion: true +parameters: + connection: example-iscsi + fsType: ext4 +``` + +Now you should be able to create volumes using these 2 storage classes. + +Notice that storage class name does not match connection name. +Also, local file names don't match connection name. +This is done to make example easier to understand. + +In real deployment you'll probably want to keep local file names, connection names, +and mounted file names synced for easier management. diff --git a/examples/proxy.yaml b/examples/proxy.yaml new file mode 100644 index 0000000..1084b13 --- /dev/null +++ b/examples/proxy.yaml @@ -0,0 +1,20 @@ + +driver: proxy + +proxy: + # path to folder where real driver config files are located + # this value is required for the proxy to work + configFolder: /mnt/connections/ + # connections that are no longer used are eventually deleted + # when timeout is 0, caching will be completely disabled, each request will create a new driver + # when timeout is -1, cache timeout is disabled, drivers are cached forever + # default: 60 + cacheTimeoutMinutes: 60 + # Node topology defines isolated groups of nodes + # local drivers need node topology + # All other drivers do not depend on topology, and will work fine with simpler cluster topology + nodeTopology: + # 'cluster': the whole cluster has unified storage + # 'node': each node has its own storage. Required for 'local-' drivers + # default: cluster + type: cluster diff --git a/src/driver/controller-proxy/compatibility.md b/src/driver/controller-proxy/compatibility.md new file mode 100644 index 0000000..51756f2 --- /dev/null +++ b/src/driver/controller-proxy/compatibility.md @@ -0,0 +1,111 @@ + +# Proxy driver compatibility + +There are 2 challenges with the proxy driver: + +1. Proxy needs dynamic state. CSI spec implies that dynamic state must be external, +which isn't ideal for small deployments, and is incompatible with democratic-csi. + +2. Proxy must provide a common set of capabilities for all drivers it represents. + +A great discussion of difficulties per storage class state can be found here: +- https://github.com/container-storage-interface/spec/issues/370 + +This file contains implementation details relevant for development. +If you are searching for user documentation and deployment example go [here](../../../docs/proxy-driver.md). + +## Terminology and structure + +"Proxy" is the driver created via `driver: proxy` in the main config. +Other drivers are referred to as "real driver" and "underlying driver". + +"Connection" is a way to distinguish real drivers in proxy driver calls + +- Connection name is set in storage class parameters +- Connection name is stored in volume handle +- Connection name is used as part of config file path + +All config files must be mounted into democratic-csi filesystem. +They can be added, updated and removed dynamically. + +## CSI features + +Generally most features are supported. + +However, some calls will not work: + +- `ListVolumes`: storage class context is missing +- - https://github.com/container-storage-interface/spec/issues/461 +- `ListSnapshots`: TODO: can be implemented. Would require adding snapshot secret + +`NodeGetInfo` works but it brings additional challenges. +Node info is common for all storage classes. +If different drivers need different output in `NodeGetInfo`, they can't coexist. +See [node info support notes](./nodeInfo.md) + +## Driver compatibility + +Proxy driver has the following minimal requirements for real underlying drivers: + +- Node methods should not use config values +- - This can be lifted +- - This is added because drivers use only volume context for mounting, and sometimes secrets +- - - There is one exception to this rule, and I would argue that that driver is just broken +- Driver should not need any exotic capabilities, since capabilities are shared +- Driver should use `CreateVolume`, so that proxy can set proper `volume_id` +- Controller publishing is not supported, see [Controller publish support](#controller-publish-support) + +Proxy advertises that it supports most CSI methods. +If some methods are missing from underlying driver, +proxy will throw `INVALID_ARGUMENT` error. +Some methods are expected to be missing from some of the underlying drivers. In such cases proxy returns default value: + +- `GetCapacity` returns infinite capacity when underlying driver does not report capacity + +## Volume ID format + +- `volume_id` format: `v:connection-name/original-handle` +- `snapshot_id` format: `s:connection-name/original-handle` + +Where: + +- `v`, `s` - fixed prefix +- - Allows to check that volume ID was created using proxy driver +- `connection-name` - identifies connection for all CSI calls +- `original-handle` - `volume_id` handle created by the underlying driver + +## Controller publish support + +`ControllerPublishVolume` is not implemented because currently no driver need this. +Implementation would need to replace `node_id` just like other methods replace `volume_id`. + +See [node info support notes](./nodeInfo.md) + +## Incompatible drivers + +- `zfs-local-ephemeral-inline`: proxy can't set volume_id in `CreateVolume` to identify underlying driver +- - are inline-ephemeral and standard drivers even compatible? +- `objectivefs`: `NodeStageVolume` uses driver parameters +- - `NodeStageVolume` needs `this.options` in `getDefaultObjectiveFSInstance` +- - Other node methods don't need driver options +- - Possible fix: add support for config values for node methods +- - Possible fix: add public pool data into volume attributes, move private data (if any) into a secret + +## Volume cloning and snapshots + +Cloning works without any adjustments when both volumes use the same connection. +If the connection is different: +- TODO: Same driver, same server +- - It's up to driver to add support +- - Support is easy: just need to get proper source location in the CreateVolume +- TODO: Same driver, different servers +- - It's up to driver to add support +- - Example: zfs send-receive +- - Example: file copy between nfs servers +- Different drivers: block <-> file: unlikely to be practical +- - Users should probably do such things manually, by mounting both volumes into a pod +- Different drivers: same filesystem type +- - Drivers should implement generic export and import functions +- - For example: TrueNas -> generic-zfs can theoretically be possible via zfs send +- - For example: nfs -> nfs can theoretically be possible via file copy +- - How to coordinate different drivers? diff --git a/src/driver/controller-proxy/index.js b/src/driver/controller-proxy/index.js new file mode 100644 index 0000000..805212c --- /dev/null +++ b/src/driver/controller-proxy/index.js @@ -0,0 +1,630 @@ +const _ = require("lodash"); +const semver = require("semver"); +const { CsiBaseDriver } = require("../index"); +const yaml = require("js-yaml"); +const fs = require('fs'); +const { Registry } = require("../../utils/registry"); +const { GrpcError, grpc } = require("../../utils/grpc"); +const path = require('path'); +const os = require("os"); + +const volumeIdPrefix = 'v:'; +const snapshotIdPrefix = 's:'; +const TOPOLOGY_DEFAULT_PREFIX = 'org.democratic-csi.topology'; + +class CsiProxyDriver extends CsiBaseDriver { + constructor(ctx, options) { + super(...arguments); + + this.initCapabilities(); + + let configFolder = path.normalize(this.options.proxy.configFolder); + if (configFolder.slice(-1) == '/') { + configFolder = configFolder.slice(0, -1); + } + this.nodeIdSerializer = new NodeIdSerializer(ctx, options.proxy.nodeId || {}); + + const timeoutMinutes = this.options.proxy.cacheTimeoutMinutes ?? 60; + const defaultOptions = this.options; + this.driverCache = new DriverCache(ctx, configFolder, timeoutMinutes, defaultOptions); + } + + initCapabilities() { + const options = this.options; + options.service = options.service || {}; + options.service.identity = options.service.identity || {}; + options.service.controller = options.service.controller || {}; + options.service.node = options.service.node || {}; + + options.service.identity.capabilities = + options.service.identity.capabilities || {}; + + options.service.controller.capabilities = + options.service.controller.capabilities || {}; + + options.service.node.capabilities = options.service.node.capabilities || {}; + + if (!("service" in options.service.identity.capabilities)) { + this.ctx.logger.debug("setting default identity service caps"); + + options.service.identity.capabilities.service = [ + //"UNKNOWN", + "CONTROLLER_SERVICE", + "VOLUME_ACCESSIBILITY_CONSTRAINTS", + ]; + } + + if (!("volume_expansion" in options.service.identity.capabilities)) { + this.ctx.logger.debug("setting default identity volume_expansion caps"); + + options.service.identity.capabilities.volume_expansion = [ + //"UNKNOWN", + "ONLINE", + //"OFFLINE" + ]; + } + + if (!("rpc" in options.service.controller.capabilities)) { + this.ctx.logger.debug("setting default controller caps"); + + options.service.controller.capabilities.rpc = [ + //"UNKNOWN", + "CREATE_DELETE_VOLUME", + //"PUBLISH_UNPUBLISH_VOLUME", + //"LIST_VOLUMES_PUBLISHED_NODES", + // "LIST_VOLUMES", + "GET_CAPACITY", + "CREATE_DELETE_SNAPSHOT", + // "LIST_SNAPSHOTS", + "CLONE_VOLUME", + //"PUBLISH_READONLY", + "EXPAND_VOLUME", + ]; + + if (semver.satisfies(this.ctx.csiVersion, ">=1.3.0")) { + options.service.controller.capabilities.rpc.push( + //"VOLUME_CONDITION", + // "GET_VOLUME" + ); + } + + if (semver.satisfies(this.ctx.csiVersion, ">=1.5.0")) { + options.service.controller.capabilities.rpc.push( + "SINGLE_NODE_MULTI_WRITER" + ); + } + } + + if (!("rpc" in options.service.node.capabilities)) { + this.ctx.logger.debug("setting default node caps"); + options.service.node.capabilities.rpc = [ + //"UNKNOWN", + "STAGE_UNSTAGE_VOLUME", + "GET_VOLUME_STATS", + "EXPAND_VOLUME", + //"VOLUME_CONDITION", + ]; + + if (semver.satisfies(this.ctx.csiVersion, ">=1.3.0")) { + //options.service.node.capabilities.rpc.push("VOLUME_CONDITION"); + } + + if (semver.satisfies(this.ctx.csiVersion, ">=1.5.0")) { + options.service.node.capabilities.rpc.push("SINGLE_NODE_MULTI_WRITER"); + /** + * This is for volumes that support a mount time gid such as smb or fat + */ + //options.service.node.capabilities.rpc.push("VOLUME_MOUNT_GROUP"); // in k8s is sent in as the security context fsgroup + } + } + } + + getCleanupHandlers() { + const cacheCleanup = this.driverCache.getCleanupHandlers(); + // this.cleanup is not modified, concat returns a new object + return this.cleanup.concat(cacheCleanup); + } + + parseVolumeHandle(handle, prefix = volumeIdPrefix) { + if (!handle.startsWith(prefix)) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `invalid volume handle: ${handle}: expected prefix ${prefix}` + ); + } + handle = handle.substring(prefix.length); + return { + connectionName: handle.substring(0, handle.indexOf('/')), + realHandle: handle.substring(handle.indexOf('/') + 1), + }; + } + + decorateVolumeHandle(connectionName, handle, prefix = volumeIdPrefix) { + return prefix + connectionName + '/' + handle; + } + + async checkAndRun(driver, methodName, call, defaultValue) { + if (typeof driver[methodName] !== 'function') { + if (defaultValue) return defaultValue; + // UNIMPLEMENTED could possibly confuse CSI CO into thinking + // that driver does not support methodName at all. + // INVALID_ARGUMENT should allow CO to use methodName with other storage classes. + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `underlying driver does not support ` + methodName + ); + } + return await driver[methodName](call); + } + + async controllerRunWrapper(methodName, call, defaultValue) { + const volumeHandle = this.parseVolumeHandle(call.request.volume_id); + const driver = this.driverCache.lookUpConnection(volumeHandle.connectionName); + call.request.volume_id = volumeHandle.realHandle; + return await this.checkAndRun(driver, methodName, call, defaultValue); + } + + // =========================================== + // Controller methods below + // =========================================== + + async GetCapacity(call) { + const parameters = call.request.parameters; + if (!parameters.connection) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `connection missing from parameters` + ); + } + const connectionName = parameters.connection; + const driver = this.driverCache.lookUpConnection(connectionName); + return await this.checkAndRun(driver, 'GetCapacity', call, { + available_capacity: Number.MAX_SAFE_INTEGER, + }); + } + + async CreateVolume(call) { + const parameters = call.request.parameters; + if (!parameters.connection) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `connection missing from parameters` + ); + } + const connectionName = parameters.connection; + const driver = this.driverCache.lookUpConnection(connectionName); + + switch (call.request.volume_content_source?.type) { + case "snapshot": { + const snapshotHandle = this.parseVolumeHandle(call.request.volume_content_source.snapshot.snapshot_id, snapshotIdPrefix); + if (snapshotHandle.connectionName != connectionName) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `can not inflate snapshot from a different connection` + ); + } + call.request.volume_content_source.snapshot.snapshot_id = snapshotHandle.realHandle; + break; + } + case "volume": { + const volumeHandle = this.parseVolumeHandle(call.request.volume_content_source.volume.volume_id); + if (volumeHandle.connectionName != connectionName) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `can not clone volume from a different connection` + ); + } + call.request.volume_content_source.volume.volume_id = volumeHandle.realHandle; + break; + } + case undefined: + case null: + break; + default: + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `unknown volume_content_source type: ${call.request.volume_content_source.type}` + ); + } + const result = await this.checkAndRun(driver, 'CreateVolume', call); + result.volume.volume_id = this.decorateVolumeHandle(connectionName, result.volume.volume_id); + return result; + } + + async DeleteVolume(call) { + return await this.controllerRunWrapper('DeleteVolume', call); + } + + async ControllerGetVolume(call) { + return await this.controllerRunWrapper('ControllerGetVolume', call); + } + + async ControllerExpandVolume(call) { + return await this.controllerRunWrapper('ControllerExpandVolume', call); + } + + async CreateSnapshot(call) { + const volumeHandle = this.parseVolumeHandle(call.request.source_volume_id); + const driver = this.driverCache.lookUpConnection(volumeHandle.connectionName); + call.request.source_volume_id = volumeHandle.realHandle; + const result = await this.checkAndRun(driver, 'CreateSnapshot', call); + result.snapshot.source_volume_id = this.decorateVolumeHandle(volumeHandle.connectionName, result.snapshot.source_volume_id); + result.snapshot.snapshot_id = this.decorateVolumeHandle(volumeHandle.connectionName, result.snapshot.snapshot_id, snapshotIdPrefix); + return result; + } + + async DeleteSnapshot(call) { + const volumeHandle = this.parseVolumeHandle(call.request.snapshot_id, snapshotIdPrefix); + const driver = this.driverCache.lookUpConnection(volumeHandle.connectionName); + call.request.snapshot_id = volumeHandle.realHandle; + return await this.checkAndRun(driver, 'DeleteSnapshot', call); + } + + async ValidateVolumeCapabilities(call) { + return await this.controllerRunWrapper('ValidateVolumeCapabilities', call); + } + + // =========================================== + // Node methods below + // =========================================== + // + // Theoretically, controller setup with config files could be replicated in node deployment, + // and node could create proper drivers for each call. + // But it doesn't seem like node would benefit from this. + // - CsiBaseDriver.NodeStageVolume calls this.assertCapabilities which should be run in the real driver + // but no driver-specific functions or options are used. + // So we can just create an empty driver with default options + // - Other Node* methods don't use anything driver specific + + lookUpNodeDriver(call) { + const driverType = call.request.volume_context.provisioner_driver; + // there is no cache timeout for node drivers + // because drivers are not updated dynamically + return this.ctx.registry.get(`node:driver/${driverType}`, () => { + const driverOptions = structuredClone(this.options); + driverOptions.driver = driverType; + return this.driverCache.createRealDriver(driverOptions); + }); + } + + async NodeStageVolume(call) { + const driver = this.lookUpNodeDriver(call); + return await this.checkAndRun(driver, 'NodeStageVolume', call); + } + + async NodeGetInfo(call) { + const result = { + node_id: this.nodeIdSerializer.serialize(), + max_volumes_per_node: 0, + }; + const topologyType = this.options.proxy.nodeTopology?.type ?? 'cluster'; + const prefix = this.options.proxy.nodeTopology?.prefix ?? TOPOLOGY_DEFAULT_PREFIX; + switch (topologyType) { + case 'cluster': + result.accessible_topology = { + segments: { + [prefix + '/cluster']: 'local', + }, + }; + break + case 'node': + result.accessible_topology = { + segments: { + [prefix + '/node']: NodeIdSerializer.getLocalNodeName(), + }, + }; + break + default: + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `proxy: unknown node topology type: ${topologyType}` + ); + } + return result; + } +} + +class DriverCache { + constructor(ctx, configFolder, timeoutMinutes, defaultOptions) { + this.driverCache = {}; + this.ctx = ctx; + this.defaultOptions = defaultOptions; + this.configFolder = configFolder; + + // Corresponding storage class could be deleted without notice. + // We can delete drivers that weren't requested for a long time. + // User can configure cache timeout so that driver re-creation is not too frequent. + + // TODO how do we make sure that cache timeout is bigger than RPC call duration? + // If the driver has cleanup, it can break the call. + // The smallest timeout is 1 minute. + // Any PRC SHOULD finish in this time, but it's possible that something hangs, or some storage backend is just slow. + + const oneMinuteInMs = 1000 * 60; + this.cacheTimeoutMs = timeoutMinutes * oneMinuteInMs; + + let cacheType = ''; + if (timeoutMinutes === -1) { + this.ctx.logger.info("driver cache is disabled"); + cacheType = 'force-create'; + } else if (timeoutMinutes === 0) { + this.ctx.logger.info("driver cache is permanent"); + cacheType = 'permanent'; + } else { + this.ctx.logger.info(`driver cache timeout is ${timeoutMinutes} minutes`); + cacheType = 'timeout'; + } + this.cacheType = cacheType; + } + + getCleanupHandlers() { + const result = []; + for (const connectionName in this.driverCache) { + result.push(() => this.removeCacheEntry(connectionName)); + } + return result; + } + + // returns real driver object + // internally drivers are cached and deleted on timeout + lookUpConnection(connectionName) { + const configPath = this.configFolder + '/' + connectionName + '.yaml'; + + if (this.cacheType == 'force-create') { + // when timeout is 0, force creating a new driver on each request + const driver = this.createDriverFromFile(configPath); + const oneMinuteInMs = 1000 * 60; + setTimeout(() => { + this.runDriverCleanup(driver); + }, oneMinuteInMs); + return driver; + } + + let cachedDriver = this.driverCache[connectionName]; + if (!cachedDriver) { + cachedDriver = { + connectionName: connectionName, + fileTime: 0, + driver: null, + }; + this.driverCache[connectionName] = cachedDriver; + } + if (cachedDriver.timer !== null) { + clearTimeout(cachedDriver.timer); + cachedDriver.timer = null; + } + if (this.cacheType == 'timeout') { + cachedDriver.timer = setTimeout(() => { + this.ctx.logger.info("removing inactive connection: %s", connectionName); + this.removeCacheEntry(cachedDriver.driver); + }, this.cacheTimeoutMs); + } + + const fileTime = this.getFileTime(configPath); + if (cachedDriver.fileTime != fileTime) { + this.ctx.logger.debug("connection version is old: file time %d != %d", cachedDriver.fileTime, fileTime); + this.runDriverCleanup(cachedDriver.driver); + cachedDriver.fileTime = fileTime; + cachedDriver.driver = this.createDriverFromFile(configPath); + } + return cachedDriver.driver; + } + + removeCacheEntry(connectionName) { + const cacheEntry = this.driverCache[connectionName]; + if (!cacheEntry) { + return; + } + this.ctx.logger.debug("removing %s from cache", connectionName); + delete this.driverCache[connectionName]; + if (cacheEntry.timer) { + clearTimeout(cacheEntry.timer); + cacheEntry.timer = null; + } + const driver = cacheEntry.driver; + cachedDriver.fileTime = 0; + cacheEntry.driver = null; + this.runDriverCleanup(driver); + } + + runDriverCleanup(driver) { + if (!driver) { + return; + } + if (typeof driver.getCleanupHandlers !== 'function') { + this.ctx.logger.debug("old driver does not support cleanup"); + return; + } + const cleanup = driver.getCleanupHandlers(); + if (cleanup.length == 0) { + this.ctx.logger.debug("old driver does not require any cleanup"); + return; + } + this.ctx.logger.debug("running %d cleanup functions", cleanup.length); + for (const cleanupFunc of cleanup) { + cleanupFunc(); + } + } + + getFileTime(path) { + try { + const configFileStats = fs.statSync(path); + this.ctx.logger.debug("file time for '%s' is: %d", path, configFileStats.mtime); + return configFileStats.mtime.getTime(); + } catch (e) { + this.ctx.logger.error("fs.statSync failed: %s", e.toString()); + throw e; + } + } + + createDriverFromFile(configPath) { + this.ctx.logger.info("creating new driver from file: %s", configPath); + const fileOptions = this.createOptionsFromFile(configPath); + const mergedOptions = structuredClone(this.defaultOptions); + _.merge(mergedOptions, fileOptions); + return this.createRealDriver(mergedOptions); + } + + createOptionsFromFile(configPath) { + this.ctx.logger.debug("loading config: %s", configPath); + try { + return yaml.load(fs.readFileSync(configPath, "utf8")); + } catch (e) { + this.ctx.logger.error("failed parsing config file: %s", e.toString()); + throw e; + } + } + + createRealDriver(options) { + this.validateDriverType(options.driver); + const realContext = Object.assign({}, this.ctx); + realContext.registry = new Registry(); + const realDriver = this.ctx.factory(realContext, options); + if (realDriver.constructor.name == this.constructor.name) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `cyclic dependency: proxy on proxy` + ); + } + this.ctx.logger.debug("using driver %s", realDriver.constructor.name); + return realDriver; + } + + validateDriverType(driver) { + const unsupportedDrivers = [ + "zfs-local-ephemeral-inline", + "objectivefs", + "proxy", + ]; + for (const prefix in unsupportedDrivers) { + if (driver.startsWith(prefix)) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `proxy is not supported for driver: ${mergedOptions.driver}` + ); + } + } + } +} + +const nodeIdCode_NodeName = 'n'; +const nodeIdCode_Hostname = 'h'; +const nodeIdCode_ISCSI = 'i'; +const nodeIdCode_NVMEOF = 'v'; +class NodeIdSerializer { + constructor(ctx, nodeIdConfig) { + this.ctx = ctx; + this.config = nodeIdConfig; + } + + static getLocalNodeName() { + if (!process.env.CSI_NODE_ID) { + throw 'CSI_NODE_ID is required for proxy driver'; + } + return process.env.CSI_NODE_ID; + } + static getLocalIqn() { + const iqnPath = '/etc/iscsi/initiatorname.iscsi'; + const lines = fs.readFileSync(iqnPath, "utf8").split('\n'); + for (let line of lines) { + line = line.replace(/#.*/, '').replace(/\s+$/, ''); + if (line == '') { + continue; + } + const linePrefix = 'InitiatorName='; + if (line.startsWith(linePrefix)) { + const iqn = line.slice(linePrefix.length); + return iqn; + } + } + throw 'iqn is not found'; + } + static getLocalNqn() { + const nqnPath = '/etc/nvme/hostnqn'; + return fs.readFileSync(nqnPath, "utf8").replace(/\s+$/, ''); + } + + // returns { prefixName, suffix } + findPrefix(value, prefixMap) { + for (const prefixInfo of prefixMap) { + if (value.startsWith(prefixInfo.prefix)) { + return { + prefixName: prefixInfo.shortName, + suffix: value.split(prefixInfo.prefix.length), + }; + } + } + } + + serializeByPrefix(code, value, prefixMap) { + const prefixInfo = prefixMap.find(prefixInfo => value.startsWith(prefixInfo.prefix)); + if (!prefixInfo) { + throw `node id: prefix is not found for value: ${value}`; + } + if (!prefixInfo.shortName.match(/^[0-9a-z]+$/)) { + throw `prefix short name must be alphanumeric, invalid name: '${prefixInfo.shortName}'`; + } + const suffix = value.substring(prefixInfo.prefix.length); + return code + prefixInfo.shortName + '=' + suffix; + } + + deserializeFromPrefix(value, prefixMap, humanName) { + const prefixName = value.substring(0, value.indexOf('=')); + const suffix = value.substring(value.indexOf('=') + 1); + const prefixInfo = prefixMap.find(prefixInfo => prefixInfo.shortName === prefixName); + if (!prefixInfo) { + throw new GrpcError( + grpc.status.INVALID_ARGUMENT, + `unknown node prefix short name for ${humanName}: ${value}` + ); + } + return prefixInfo.prefix + suffix; + } + + // returns a single string that incorporates node id components specified in config.parts + serialize() { + let result = ''; + if (this.config.parts?.nodeName ?? true) { + result += '/' + nodeIdCode_NodeName + '=' + NodeIdSerializer.getLocalNodeName(); + } + if (this.config.parts?.hostname ?? false) { + result += '/' + nodeIdCode_Hostname + '=' + os.hostname(); + } + if (this.config.parts?.iqn ?? false) { + result += '/' + this.serializeByPrefix(nodeIdCode_ISCSI, NodeIdSerializer.getLocalIqn(), this.config.iqnPrefix); + } + if (this.config.parts?.nqn ?? false) { + result += '/' + this.serializeByPrefix(nodeIdCode_NVMEOF, NodeIdSerializer.getLocalNqn(), this.config.nqnPrefix); + } + if (result === '') { + throw 'node id can not be empty'; + } + // remove starting / + return result.slice(1); + } + + // takes a string generated by NodeIdSerializer.serialize + // returns an { nodeName, iqn, nqn } if they exist in nodeId + deserialize(nodeId) { + const result = {}; + for (const v in nodeId.split("/")) { + switch (v[0]) { + case nodeIdCode_NodeName: + result.nodeName = v.substring(v.indexOf('=') + 1); + continue; + case nodeIdCode_Hostname: + result.hostname = v.substring(v.indexOf('=') + 1); + continue; + case nodeIdCode_ISCSI: + result.iqn = this.deserializeFromPrefix(v, this.config.iqnPrefix, 'iSCSI'); + continue; + case nodeIdCode_NVMEOF: + result.nqn = this.deserializeFromPrefix(v, this.config.nqnPrefix, 'NVMEoF'); + continue; + } + } + return result; + } +} + +module.exports.CsiProxyDriver = CsiProxyDriver; diff --git a/src/driver/controller-proxy/nodeInfo.md b/src/driver/controller-proxy/nodeInfo.md new file mode 100644 index 0000000..ab65d1b --- /dev/null +++ b/src/driver/controller-proxy/nodeInfo.md @@ -0,0 +1,144 @@ + +# Node info + +Node info is common for all storage classes. +Proxy driver must report some values that are compatible with all real drivers. + +There are 2 important values: +- topology +- node ID + +# Node ID + +Node ID is a bit tricky to solve, because of limited field length. + +Currently no real driver actually needs `node_id` to work, +so all of this is mostly a proof-of-concept. +A proof that you can create a functional proxy driver even with current CSI spec. + +We can replace `node_id` with fixed value, just like we do with `volume_id` field, +before calling the actual real driver method. + +Node ID docs are not a part of user documentation because currently this is very theoretical. +Current implementation works fine but doesn't do anything useful for users. + +## Node ID: config example + +```yaml +# configured in root proxy config +proxy: + nodeId: + parts: + # when value is true, corresponding node info is included into node_id, + # and can be accessed by proxy driver in controller + # it allows you to cut info from node_id to make it shorter + nodeName: true + hostname: false + iqn: false + nqn: false + # prefix allows you to save shorter values into node_id, so it can fit more than one value + # on node prefix is replaced with short name, on controller the reverse [can] happen + nqnPrefix: + - shortName: '1' + prefix: 'nqn.2000-01.com.example.nvmeof:' + - shortName: '2' + prefix: 'nqn.2014-08.org.nvmexpress:uuid:' + iqnPrefix: + - shortName: '1' + prefix: 'iqn.2000-01.com.example:' + nodeTopology: + # 'cluster': all nodes have the same value + # 'node': each node will get its own topology group + type: cluster +``` + +```yaml +# add to each _real_ driver config +proxy: + perDriver: + # allowed values: nodeName, hostname, iqn, nqn + # proxy will use this to decide how to fill node_id for current driver + nodeIdType: nodeName +``` + +## Node ID: Reasoning why such complex node_id is required + +`node_name + iqn + nqn` can be very long. + +Each of these values can theoretically exceed 200 symbols in length. +It's unreasonable to expect users to always use short values. + +But it's reasonable to expect that IQNs and NQNs in the cluster will have only a few patterns. +Many clusters likely only use one pattern with only a short variable suffix. +Even if not all nodes follow the same pattern, the amount of patterns is limited. + +Saving short suffix allows you to fit all identifiers into node_id without dynamic state. + +Values example: + +- node name: `node-name.cluster-name.customer-name.suffix` +- iqn: `iqn.2000-01.com.example:qwerty1234` +- nqn: `nqn.2014-08.org.nvmexpress:uuid:68f1d462-633b-4085-a634-899b88e5af74` +- node_id: `n=node-name.cluster-name.customer-name.suffix/i1=qwerty1234/v2=68f1d462-633b-4085-a634-899b88e5af74` +- - Note: even with kinda long node name and default debian IQN and NQN values this still comfortably fits into `node_id` length limit of 256 chars. +- - Maybe we could add prefix and suffix mechanism for node name if very long node name is an issue in real production clusters. + I'm not too familiar with managed k8s node name practices. + +For example, if driver needs iqn, proxy will find field in node id starting with `i`, +search `proxy.nodeId.iqnPrefix` for entry with `shortName = 1`, and then set `node_id` to +`proxy.nodeId.iqnPrefix[name=1].prefix` + `qwerty` + +## Node ID: Alternatives to prefixes + +Each driver can override `node_id` based on node name. + +Each driver can use template for `node_id` based on node name and/or hostname. + +Config example: + +```yaml +# add to each _real_ driver config +proxy: + perDriver: + # local means that this driver uses node ID template instead of using values from NodeGetInfo + # Individual nodes can use nodeIdMap instead of template. + # Possibly, even all nodes could use nodeIdMap. + nodeIdType: local + nodeIdMap: + - nodeName: node1 + value: nqn.2000-01.com.example:qwerty + - nodeName: node2 + value: nqn.2000-01.com.example:node2 + nodeIdTemplate: iqn.2000-01.com.example:{{ hostname }}:{{ nodeName }}-suffix +``` + +The obvious disadvantage is that it requires a lot more configuration from the user. +Still, if this were to be useful for some reason, this is fully compatible with the current `node_id` format in proxy. + +Theoretically, more info can be extracted from node to be used in `nodeIdTemplate`, +provided the info is short enough to fit into `node_id` length limit. + +# Topology + +There are 3 cases of cluster topology: + +- Each node has unique topology domain (`local` drivers) +- All nodes are the same (usually the case for non-local drivers) +- Several availability zones that can contain several nodes + +Simple cases are currently supported by the proxy. +Custom availability zones are TBD. + +Example configuration: + +```yaml +proxy: + nodeTopology: + # allowed values: + # node - each node has its own storage + # cluster - the whole cluster has unified storage + type: node + # topology reported by CSI driver is reflected in k8s as node labels. + # you may want to set unique prefixes on different drivers to avoid collisions + prefix: org.democratic-csi.topology +``` diff --git a/src/driver/controller-synology/index.js b/src/driver/controller-synology/index.js index bbae748..9c1396b 100644 --- a/src/driver/controller-synology/index.js +++ b/src/driver/controller-synology/index.js @@ -127,7 +127,7 @@ class ControllerSynologyDriver extends CsiBaseDriver { case "synology-iscsi": return "volume"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } @@ -140,7 +140,7 @@ class ControllerSynologyDriver extends CsiBaseDriver { case "synology-iscsi": return "iscsi"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } diff --git a/src/driver/controller-zfs-generic/index.js b/src/driver/controller-zfs-generic/index.js index 3e4836f..aefe0c1 100644 --- a/src/driver/controller-zfs-generic/index.js +++ b/src/driver/controller-zfs-generic/index.js @@ -16,10 +16,12 @@ class ControllerZfsGenericDriver extends ControllerZfsBaseDriver { getExecClient() { return this.ctx.registry.get(`${__REGISTRY_NS__}:exec_client`, () => { if (this.options.sshConnection) { - return new SshClient({ + const sshClient = new SshClient({ logger: this.ctx.logger, connection: this.options.sshConnection, }); + this.cleanup.push(() => sshClient.finalize()); + return sshClient; } else { return new LocalCliExecClient({ logger: this.ctx.logger, @@ -70,7 +72,7 @@ class ControllerZfsGenericDriver extends ControllerZfsBaseDriver { case "zfs-generic-nvmeof": return "volume"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } diff --git a/src/driver/controller-zfs-local/index.js b/src/driver/controller-zfs-local/index.js index bfc3295..c117543 100644 --- a/src/driver/controller-zfs-local/index.js +++ b/src/driver/controller-zfs-local/index.js @@ -86,7 +86,7 @@ class ControllerZfsLocalDriver extends ControllerZfsBaseDriver { case "zfs-local-zvol": return "volume"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } diff --git a/src/driver/factory.js b/src/driver/factory.js index 0235758..1822af0 100644 --- a/src/driver/factory.js +++ b/src/driver/factory.js @@ -15,9 +15,13 @@ const { ControllerLustreClientDriver } = require("./controller-lustre-client"); const { ControllerObjectiveFSDriver } = require("./controller-objectivefs"); const { ControllerSynologyDriver } = require("./controller-synology"); const { NodeManualDriver } = require("./node-manual"); +const { CsiProxyDriver } = require("./controller-proxy"); function factory(ctx, options) { + ctx.factory = factory; switch (options.driver) { + case "proxy": + return new CsiProxyDriver(ctx, options); case "freenas-nfs": case "freenas-smb": case "freenas-iscsi": diff --git a/src/driver/freenas/api.js b/src/driver/freenas/api.js index c5df0a0..559783a 100644 --- a/src/driver/freenas/api.js +++ b/src/driver/freenas/api.js @@ -1974,7 +1974,7 @@ class FreeNASApiDriver extends CsiBaseDriver { case "truenas-api-iscsi": return "volume"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } @@ -1990,7 +1990,7 @@ class FreeNASApiDriver extends CsiBaseDriver { case "truenas-api-iscsi": return "iscsi"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } diff --git a/src/driver/freenas/ssh.js b/src/driver/freenas/ssh.js index 3f635ac..b622a81 100644 --- a/src/driver/freenas/ssh.js +++ b/src/driver/freenas/ssh.js @@ -57,10 +57,12 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { getExecClient() { return this.ctx.registry.get(`${__REGISTRY_NS__}:exec_client`, () => { - return new SshClient({ + const sshClient = new SshClient({ logger: this.ctx.logger, connection: this.options.sshConnection, }); + this.cleanup.push(() => sshClient.finalize()); + return sshClient; }); } @@ -104,7 +106,7 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { case "truenas-iscsi": return "volume"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } @@ -160,7 +162,7 @@ class FreeNASSshDriver extends ControllerZfsBaseDriver { case "truenas-iscsi": return "iscsi"; default: - throw new Error("unknown driver: " + this.ctx.args.driver); + throw new Error("unknown driver: " + this.options.driver); } } diff --git a/src/driver/index.js b/src/driver/index.js index f0fe630..06038f2 100644 --- a/src/driver/index.js +++ b/src/driver/index.js @@ -33,6 +33,7 @@ class CsiBaseDriver { constructor(ctx, options) { this.ctx = ctx; this.options = options || {}; + this.cleanup = []; if (!this.options.hasOwnProperty("node")) { this.options.node = {}; @@ -51,6 +52,10 @@ class CsiBaseDriver { } } + getCleanupHandlers() { + return this.cleanup; + } + /** * abstract way of retrieving values from parameters/secrets * in order of preference: diff --git a/src/driver/zfs-local-ephemeral-inline/index.js b/src/driver/zfs-local-ephemeral-inline/index.js index 276b1f4..3cec30e 100644 --- a/src/driver/zfs-local-ephemeral-inline/index.js +++ b/src/driver/zfs-local-ephemeral-inline/index.js @@ -125,10 +125,12 @@ class ZfsLocalEphemeralInlineDriver extends CsiBaseDriver { getSshClient() { return this.ctx.registry.get(`${__REGISTRY_NS__}:ssh_client`, () => { - return new SshClient({ + const sshClient = new SshClient({ logger: this.ctx.logger, connection: this.options.sshConnection, }); + this.cleanup.push(() => sshClient.finalize()); + return sshClient; }); } diff --git a/src/utils/zfs_ssh_exec_client.js b/src/utils/zfs_ssh_exec_client.js index 4a232fa..8a5ee5d 100644 --- a/src/utils/zfs_ssh_exec_client.js +++ b/src/utils/zfs_ssh_exec_client.js @@ -75,8 +75,14 @@ class SshClient { const start_error_event_count = this.error_event_count; try { await this.conn_mutex.runExclusive(async () => { + if (this.finalized) { + throw 'using finalized ssh client'; + } this.conn.connect(this.options.connection); do { + if (this.finalized) { + throw 'ssh client finalized during connection'; + } if (start_error_event_count != this.error_event_count) { throw this.conn_err; } @@ -104,6 +110,12 @@ class SshClient { return this._connect(); } + finalize() { + this.finalized = true; + const conn = this.conn; + conn.end(); + } + async exec(command, options = {}, stream_proxy = null) { // default is to reuse if (process.env.SSH_REUSE_CONNECTION == "0") {