Add max_errors and ability to exit.

This commit is contained in:
David Newhall II 2019-06-18 23:32:35 -07:00
parent a78bf5d482
commit 913565c548
6 changed files with 47 additions and 19 deletions

View File

@ -26,13 +26,15 @@ This daemon polls a Unifi controller at a short interval and stores the collecte
-j, --dumpjson <filter>
This is a debug option; use this when you are missing data in your graphs,
and/or you want to inspect the raw data coming from the controller. The
filter only accepts two options: devices or clients. This will print a lot
of information. Recommend piping it into a file and/or into jq for better
visualization. This requires a valid config file that; one that contains
filter accepts three options: devices, clients, other. This will print a
lot of information. Recommend piping it into a file and/or into jq for
better visualization. This requires a valid config file that contains
working authentication details for a Unifi Controller. This only dumps
data for sites listed in the config file. The application exits after
printing the JSON payload; it does not daemonize or report to InfluxDB
with this option.
with this option. The `other` option is special. This allows you request
any api path. It must be enclosed in quotes with the word other. Example:
unifi-poller -j "other /stat/admins"
-h, --help
Display usage and exit.
@ -63,6 +65,18 @@ This daemon polls a Unifi controller at a short interval and stores the collecte
errors will be logged. Using this with debug=true adds line numbers to
any error logs.
`max_errors` default: 0
If you restart the UniFI controller, the poller will lose access until
it is restarted. Specifying a number greater than -1 for max_errors will
cause the poller to exit when it reaches the error count specified.
This problematic condition can be triggered by InfluxDB having issues
too. Generally only 1 error per interval is created, but if more than one
backend is having issues > 1 error could be generated per interval. Once
the poller exits, it is expected that something will restart it
automatically so it gets back in line; something is usually systemd,
docker or launchd. The default setting of 0 will cause an exit after
just 1 error. Recommended values are 0-5.
`influx_url` default: http://127.0.0.1:8086
This is the URL where the Influx web server is available.

View File

@ -18,6 +18,12 @@
# Recommend using debug with this setting for better error logging.
#quiet = false
# If the poller experiences an error from the Unifi Controller or from InfluxDB
# it will exit. If you do not want it to exit, change max_errors to -1. You can
# adjust the config to tolerate more errors by setting this to a higher value.
# Recommend setting this between 0 and 5. See man page for more explanation.
#max_errors = 0
# InfluxDB does not require auth by default, so the user/password are probably unimportant.
#influx_url = "http://127.0.0.1:8086"
#influx_user = "unifi"

View File

@ -35,6 +35,7 @@ type UnifiPoller struct {
DumpJSON string
ShowVer bool
Flag *pflag.FlagSet
errorCount int
influx.Client
*unifi.Unifi
*Config
@ -50,6 +51,7 @@ type Metrics struct {
// Config represents the data needed to poll a controller and report to influxdb.
type Config struct {
MaxErrors int `json:"max_errors,_omitempty" toml:"max_errors,_omitempty" xml:"max_errors" yaml:"max_errors"`
Interval Dur `json:"interval,_omitempty" toml:"interval,_omitempty" xml:"interval" yaml:"interval"`
Debug bool `json:"debug" toml:"debug" xml:"debug" yaml:"debug"`
Quiet bool `json:"quiet,_omitempty" toml:"quiet,_omitempty" xml:"quiet" yaml:"quiet"`

View File

@ -15,11 +15,13 @@ func hasErr(errs []error) bool {
return false
}
// logErrors writes a slice of errors, with a prefix, to log-out.
func logErrors(errs []error, prefix string) {
// LogErrors writes a slice of errors, with a prefix, to log-out.
// It also incriments the error counter.
func (u *UnifiPoller) LogErrors(errs []error, prefix string) {
for _, err := range errs {
if err != nil {
log.Println("[ERROR]", prefix+":", err.Error())
u.errorCount++
log.Printf("[ERROR] (%v/%v) %v: %v", prefix, err.Error(), u.errorCount, u.MaxErrors)
}
}
}
@ -35,8 +37,8 @@ func StringInSlice(str string, slc []string) bool {
}
// Logf prints a log entry if quiet is false.
func (c *Config) Logf(m string, v ...interface{}) {
if !c.Quiet {
func (u *UnifiPoller) Logf(m string, v ...interface{}) {
if !u.Quiet {
log.Printf("[INFO] "+m, v...)
}
}

View File

@ -37,7 +37,7 @@ FIRST:
}
// PollController runs forever, polling unifi, and pushing to influx.
func (u *UnifiPoller) PollController() {
func (u *UnifiPoller) PollController() error {
log.Println("[INFO] Everything checks out! Poller started, interval:", u.Interval.value)
ticker := time.NewTicker(u.Interval.value)
var err error
@ -45,28 +45,28 @@ func (u *UnifiPoller) PollController() {
m := &Metrics{}
// Get the sites we care about.
if m.Sites, err = u.GetFilteredSites(); err != nil {
logErrors([]error{err}, "uni.GetSites()")
u.LogErrors([]error{err}, "unifi.GetSites()")
}
// Get all the points.
if m.Clients, err = u.GetClients(m.Sites); err != nil {
logErrors([]error{err}, "uni.GetClients()")
u.LogErrors([]error{err}, "unifi.GetClients()")
}
if m.Devices, err = u.GetDevices(m.Sites); err != nil {
logErrors([]error{err}, "uni.GetDevices()")
u.LogErrors([]error{err}, "unifi.GetDevices()")
}
// Make a new Points Batcher.
m.BatchPoints, err = influx.NewBatchPoints(influx.BatchPointsConfig{Database: u.InfluxDB})
if err != nil {
logErrors([]error{err}, "influx.NewBatchPoints")
u.LogErrors([]error{err}, "influx.NewBatchPoints")
continue
}
// Batch (and send) all the points.
if errs := m.SendPoints(); errs != nil && hasErr(errs) {
logErrors(errs, "asset.Points()")
u.LogErrors(errs, "asset.Points()")
}
if err := u.Write(m.BatchPoints); err != nil {
logErrors([]error{err}, "infdb.Write(bp)")
u.LogErrors([]error{err}, "infdb.Write(bp)")
}
// Talk about the data.
@ -79,7 +79,12 @@ func (u *UnifiPoller) PollController() {
u.Logf("Unifi Measurements Recorded. Sites: %d, Clients: %d, "+
"Wireless APs: %d, Gateways: %d, Switches: %d, Points: %d, Fields: %d",
len(m.Sites), len(m.Clients), len(m.UAPs), len(m.USGs), len(m.USWs), pointcount, fieldcount)
if u.MaxErrors >= 0 && u.errorCount > u.MaxErrors {
return errors.Errorf("reached maximum error count, stopping poller (%d > %d)", u.errorCount, u.MaxErrors)
}
}
return nil
}
// SendPoints combines all device and client data into influxdb data points.

View File

@ -50,7 +50,7 @@ func (u *UnifiPoller) GetConfig() error {
if u.DumpJSON != "" {
u.Quiet = true
}
u.Config.Logf("Loaded Configuration: %s", u.ConfigFile)
u.Logf("Loaded Configuration: %s", u.ConfigFile)
return nil
}
@ -71,8 +71,7 @@ func (u *UnifiPoller) Run() (err error) {
if err = u.GetInfluxDB(); err != nil {
return err
}
u.PollController()
return nil
return u.PollController()
}
// GetInfluxDB returns an influxdb interface.