Add max_errors and ability to exit.
This commit is contained in:
parent
a78bf5d482
commit
913565c548
|
|
@ -26,13 +26,15 @@ This daemon polls a Unifi controller at a short interval and stores the collecte
|
|||
-j, --dumpjson <filter>
|
||||
This is a debug option; use this when you are missing data in your graphs,
|
||||
and/or you want to inspect the raw data coming from the controller. The
|
||||
filter only accepts two options: devices or clients. This will print a lot
|
||||
of information. Recommend piping it into a file and/or into jq for better
|
||||
visualization. This requires a valid config file that; one that contains
|
||||
filter accepts three options: devices, clients, other. This will print a
|
||||
lot of information. Recommend piping it into a file and/or into jq for
|
||||
better visualization. This requires a valid config file that contains
|
||||
working authentication details for a Unifi Controller. This only dumps
|
||||
data for sites listed in the config file. The application exits after
|
||||
printing the JSON payload; it does not daemonize or report to InfluxDB
|
||||
with this option.
|
||||
with this option. The `other` option is special. This allows you request
|
||||
any api path. It must be enclosed in quotes with the word other. Example:
|
||||
unifi-poller -j "other /stat/admins"
|
||||
|
||||
-h, --help
|
||||
Display usage and exit.
|
||||
|
|
@ -63,6 +65,18 @@ This daemon polls a Unifi controller at a short interval and stores the collecte
|
|||
errors will be logged. Using this with debug=true adds line numbers to
|
||||
any error logs.
|
||||
|
||||
`max_errors` default: 0
|
||||
If you restart the UniFI controller, the poller will lose access until
|
||||
it is restarted. Specifying a number greater than -1 for max_errors will
|
||||
cause the poller to exit when it reaches the error count specified.
|
||||
This problematic condition can be triggered by InfluxDB having issues
|
||||
too. Generally only 1 error per interval is created, but if more than one
|
||||
backend is having issues > 1 error could be generated per interval. Once
|
||||
the poller exits, it is expected that something will restart it
|
||||
automatically so it gets back in line; something is usually systemd,
|
||||
docker or launchd. The default setting of 0 will cause an exit after
|
||||
just 1 error. Recommended values are 0-5.
|
||||
|
||||
`influx_url` default: http://127.0.0.1:8086
|
||||
This is the URL where the Influx web server is available.
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,12 @@
|
|||
# Recommend using debug with this setting for better error logging.
|
||||
#quiet = false
|
||||
|
||||
# If the poller experiences an error from the Unifi Controller or from InfluxDB
|
||||
# it will exit. If you do not want it to exit, change max_errors to -1. You can
|
||||
# adjust the config to tolerate more errors by setting this to a higher value.
|
||||
# Recommend setting this between 0 and 5. See man page for more explanation.
|
||||
#max_errors = 0
|
||||
|
||||
# InfluxDB does not require auth by default, so the user/password are probably unimportant.
|
||||
#influx_url = "http://127.0.0.1:8086"
|
||||
#influx_user = "unifi"
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ type UnifiPoller struct {
|
|||
DumpJSON string
|
||||
ShowVer bool
|
||||
Flag *pflag.FlagSet
|
||||
errorCount int
|
||||
influx.Client
|
||||
*unifi.Unifi
|
||||
*Config
|
||||
|
|
@ -50,6 +51,7 @@ type Metrics struct {
|
|||
|
||||
// Config represents the data needed to poll a controller and report to influxdb.
|
||||
type Config struct {
|
||||
MaxErrors int `json:"max_errors,_omitempty" toml:"max_errors,_omitempty" xml:"max_errors" yaml:"max_errors"`
|
||||
Interval Dur `json:"interval,_omitempty" toml:"interval,_omitempty" xml:"interval" yaml:"interval"`
|
||||
Debug bool `json:"debug" toml:"debug" xml:"debug" yaml:"debug"`
|
||||
Quiet bool `json:"quiet,_omitempty" toml:"quiet,_omitempty" xml:"quiet" yaml:"quiet"`
|
||||
|
|
|
|||
|
|
@ -15,11 +15,13 @@ func hasErr(errs []error) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// logErrors writes a slice of errors, with a prefix, to log-out.
|
||||
func logErrors(errs []error, prefix string) {
|
||||
// LogErrors writes a slice of errors, with a prefix, to log-out.
|
||||
// It also incriments the error counter.
|
||||
func (u *UnifiPoller) LogErrors(errs []error, prefix string) {
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
log.Println("[ERROR]", prefix+":", err.Error())
|
||||
u.errorCount++
|
||||
log.Printf("[ERROR] (%v/%v) %v: %v", prefix, err.Error(), u.errorCount, u.MaxErrors)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -35,8 +37,8 @@ func StringInSlice(str string, slc []string) bool {
|
|||
}
|
||||
|
||||
// Logf prints a log entry if quiet is false.
|
||||
func (c *Config) Logf(m string, v ...interface{}) {
|
||||
if !c.Quiet {
|
||||
func (u *UnifiPoller) Logf(m string, v ...interface{}) {
|
||||
if !u.Quiet {
|
||||
log.Printf("[INFO] "+m, v...)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ FIRST:
|
|||
}
|
||||
|
||||
// PollController runs forever, polling unifi, and pushing to influx.
|
||||
func (u *UnifiPoller) PollController() {
|
||||
func (u *UnifiPoller) PollController() error {
|
||||
log.Println("[INFO] Everything checks out! Poller started, interval:", u.Interval.value)
|
||||
ticker := time.NewTicker(u.Interval.value)
|
||||
var err error
|
||||
|
|
@ -45,28 +45,28 @@ func (u *UnifiPoller) PollController() {
|
|||
m := &Metrics{}
|
||||
// Get the sites we care about.
|
||||
if m.Sites, err = u.GetFilteredSites(); err != nil {
|
||||
logErrors([]error{err}, "uni.GetSites()")
|
||||
u.LogErrors([]error{err}, "unifi.GetSites()")
|
||||
}
|
||||
// Get all the points.
|
||||
if m.Clients, err = u.GetClients(m.Sites); err != nil {
|
||||
logErrors([]error{err}, "uni.GetClients()")
|
||||
u.LogErrors([]error{err}, "unifi.GetClients()")
|
||||
}
|
||||
if m.Devices, err = u.GetDevices(m.Sites); err != nil {
|
||||
logErrors([]error{err}, "uni.GetDevices()")
|
||||
u.LogErrors([]error{err}, "unifi.GetDevices()")
|
||||
}
|
||||
|
||||
// Make a new Points Batcher.
|
||||
m.BatchPoints, err = influx.NewBatchPoints(influx.BatchPointsConfig{Database: u.InfluxDB})
|
||||
if err != nil {
|
||||
logErrors([]error{err}, "influx.NewBatchPoints")
|
||||
u.LogErrors([]error{err}, "influx.NewBatchPoints")
|
||||
continue
|
||||
}
|
||||
// Batch (and send) all the points.
|
||||
if errs := m.SendPoints(); errs != nil && hasErr(errs) {
|
||||
logErrors(errs, "asset.Points()")
|
||||
u.LogErrors(errs, "asset.Points()")
|
||||
}
|
||||
if err := u.Write(m.BatchPoints); err != nil {
|
||||
logErrors([]error{err}, "infdb.Write(bp)")
|
||||
u.LogErrors([]error{err}, "infdb.Write(bp)")
|
||||
}
|
||||
|
||||
// Talk about the data.
|
||||
|
|
@ -79,7 +79,12 @@ func (u *UnifiPoller) PollController() {
|
|||
u.Logf("Unifi Measurements Recorded. Sites: %d, Clients: %d, "+
|
||||
"Wireless APs: %d, Gateways: %d, Switches: %d, Points: %d, Fields: %d",
|
||||
len(m.Sites), len(m.Clients), len(m.UAPs), len(m.USGs), len(m.USWs), pointcount, fieldcount)
|
||||
|
||||
if u.MaxErrors >= 0 && u.errorCount > u.MaxErrors {
|
||||
return errors.Errorf("reached maximum error count, stopping poller (%d > %d)", u.errorCount, u.MaxErrors)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SendPoints combines all device and client data into influxdb data points.
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ func (u *UnifiPoller) GetConfig() error {
|
|||
if u.DumpJSON != "" {
|
||||
u.Quiet = true
|
||||
}
|
||||
u.Config.Logf("Loaded Configuration: %s", u.ConfigFile)
|
||||
u.Logf("Loaded Configuration: %s", u.ConfigFile)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -71,8 +71,7 @@ func (u *UnifiPoller) Run() (err error) {
|
|||
if err = u.GetInfluxDB(); err != nil {
|
||||
return err
|
||||
}
|
||||
u.PollController()
|
||||
return nil
|
||||
return u.PollController()
|
||||
}
|
||||
|
||||
// GetInfluxDB returns an influxdb interface.
|
||||
|
|
|
|||
Loading…
Reference in New Issue