example_isi_data_insights_d.cfg

[isi_data_insights_d]
# Parameters specified on the command line will supersede the parameters
# in this section.
# pid_file: /var/run/isi_data_insights_d.pid
# log_file: /var/run/isi_data_insights_d.log
# default log_level is INFO
# log_level: DEBUG
stats_processor: influxdb_plugin
# Use "auth" as the 4th arg in order to be prompted for the
# InfluxDB username and password stats_processor_args
# or
# define the credentials in this config
# examples:
# localhost 8086 isi_data_insights auth
# or without prompting
# localhost 8086 isi_data_insights username password ssl=True/False verify_ssl=True/False
stats_processor_args: localhost 8086 isi_data_insights

# clusters in this section are queried for all stat groups
# clusters: [username1:password1@]<ip-or-host-address1>[:True|False]
#	[[username2:password2]@<ip-or-host-address2>[:True|False]]
#	[[username3:password3]@<ip-or-host-address3>[:True|False]]
#	...
# If you don't specify the username and password then you will be prompted
# for them when the daemon starts up.
# Use the optional True or False on the end to specify whether the cluster's
# SSL certificate should be verified. If it is omitted then the default is
# False (i.e. don't verify SSL cert).
clusters:

# Specifies the active list of stat groups to query, each stat group name
# specified here should have a corresponding section in the config file.
active_stat_groups: cluster_cpu_stats
    cluster_network_traffic_stats
    cluster_client_activity_stats
    cluster_health_stats
    ifs_space_stats
    ifs_rate_stats
    node_load_stats
    node_disk_stats
    node_net_stats
    cluster_disk_rate_stats
    cluster_proto_stats
    cache_stats
    heat_total_stats

# The min_update_interval_override param provides ability to override the
# minimum interval that the daemon will query for a set of stats. The purpose
# of the minimum interval, which defaults to 30 seconds, is to prevent
# the daemon's queries from putting too much stress on the cluster.
# The default value is 30 seconds.
# min_update_interval_override: 15

[cluster_cpu_stats]
# The clusters (optional) param defines a list of clusters specific to this
# group.
# clusters: 10.25.69.74 10.25.69.75
# update interval is in seconds or use *<number> to base the update interval
# off each stat's collection interval (i.e. *2 == 2 times the collection
# interval, *1 == * == 1 times the collection invterval of each stat)
update_interval: *
stats: cluster.cpu.sys.avg
    cluster.cpu.user.avg
    cluster.cpu.idle.avg
    cluster.cpu.intr.avg

[cluster_network_traffic_stats]
update_interval: *
stats: cluster.net.ext.bytes.in.rate
    cluster.net.ext.bytes.out.rate
    cluster.net.ext.packets.in.rate
    cluster.net.ext.packets.out.rate
    cluster.net.ext.errors.in.rate
    cluster.net.ext.errors.out.rate

[cluster_client_activity_stats]
update_interval: *
stats: node.clientstats.active.ftp
    node.clientstats.active.hdfs
    node.clientstats.active.http
    node.clientstats.active.lsass_out
    node.clientstats.active.jobd
    node.clientstats.active.nfs
    node.clientstats.active.nfs4
    node.clientstats.active.nlm
    node.clientstats.active.papi
    node.clientstats.active.siq
    node.clientstats.active.cifs
    node.clientstats.active.smb2
    node.clientstats.connected.ftp
    node.clientstats.connected.hdfs
    node.clientstats.connected.http
    node.clientstats.connected.nfs
    node.clientstats.connected.nlm
    node.clientstats.connected.papi
    node.clientstats.connected.siq
    node.clientstats.connected.cifs

[cluster_health_stats]
update_interval: *
stats: cluster.health
  cluster.node.count.all
  cluster.node.count.down
 
[ifs_space_stats]
update_interval: *
stats: ifs.bytes.avail
  ifs.bytes.free
  ifs.bytes.used
  ifs.bytes.total
  ifs.percent.free
  ifs.percent.avail
  ifs.percent.used

[ifs_rate_stats]
update_interval: *
stats: ifs.bytes.in.rate
  ifs.bytes.out.rate
  ifs.ops.in.rate
  ifs.ops.out.rate
 
[node_load_stats]
update_interval: *
stats: node.load.1min
  node.load.5min
  node.load.15min
  node.memory.used
  node.memory.free
  node.open.files
 
[node_disk_stats]
update_interval: *
stats: node.disk.bytes.out.rate.avg
  node.disk.bytes.in.rate.avg
  node.disk.busy.avg
  node.disk.xfers.out.rate.avg
  node.disk.xfers.in.rate.avg
  node.disk.xfer.size.out.avg
  node.disk.xfer.size.in.avg
  node.disk.access.latency.avg
  node.disk.access.slow.avg
  node.disk.iosched.queue.avg
  node.disk.iosched.latency.avg

[node_net_stats]
update_interval: *
stats: node.net.int.bytes.in.rate
  node.net.int.bytes.out.rate
  node.net.ext.bytes.in.rate
  node.net.ext.bytes.out.rate
  node.net.int.errors.in.rate
  node.net.int.errors.out.rate
  node.net.ext.errors.in.rate
  node.net.ext.errors.out.rate

[cluster_disk_rate_stats]
update_interval: *
stats: cluster.disk.xfers.rate
  cluster.disk.xfers.in.rate
  cluster.disk.xfers.out.rate
  cluster.disk.bytes.in.rate
  cluster.disk.bytes.out.rate

[cluster_proto_stats]
update_interval: *
stats: cluster.protostats.nfs
  cluster.protostats.nlm
  cluster.protostats.cifs
  cluster.protostats.ftp
  cluster.protostats.http
  cluster.protostats.siq
  cluster.protostats.jobd
  cluster.protostats.smb2
  cluster.protostats.nfs4
  cluster.protostats.irp
  cluster.protostats.lsass_in
  cluster.protostats.lsass_out
  cluster.protostats.papi
  cluster.protostats.hdfs
  cluster.protostats.nfs.total
  cluster.protostats.nlm.total
  cluster.protostats.cifs.total
  cluster.protostats.ftp.total
  cluster.protostats.http.total
  cluster.protostats.siq.total
  cluster.protostats.jobd.total
  cluster.protostats.smb2.total
  cluster.protostats.nfs4.total
  cluster.protostats.irp.total
  cluster.protostats.lsass_in.total
  cluster.protostats.lsass_out.total
  cluster.protostats.papi.total
  cluster.protostats.hdfs.total

[cache_stats]
update_interval: *
stats: node.ifs.cache

[heat_total_stats]
update_interval: *
stats: node.ifs.heat.lock.total
  node.ifs.heat.blocked.total
  node.ifs.heat.contended.total
  node.ifs.heat.deadlocked.total
  node.ifs.heat.write.total
  node.ifs.heat.read.total
  node.ifs.heat.lookup.total
  node.ifs.heat.rename.total
  node.ifs.heat.link.total
  node.ifs.heat.unlink.total
  node.ifs.heat.getattr.total
  node.ifs.heat.setattr.total

# These stats are not currently active by default. They are here to serve as an example of how to use the
# derived stats functionality. See the comments below for more details.
[concurrency_stats]
update_interval: *
stats: node.ifs.ops.in node.ifs.ops.out node.disk.iosched.latency.avg
  cluster.protostats.nfs.total
  cluster.protostats.nfs.total
  cluster.protostats.smb2.total
  cluster.protostats.nlm.total
  cluster.protostats.cifs.total
  cluster.protostats.http.total
  cluster.protostats.siq.total
  cluster.protostats.nfs4.total
  cluster.protostats.hdfs.total
  cluster.protostats.ftp.total
# The composite_stats, equation_stats, percent_change_stats, final_equation_stats sections allow you to
# specify new stats that are derived from the values of other stats. You can derive stats from base stats
# or even specific fields or indices within a base stat's value, which is actually required if the
# base stat's value is not a float or integer (i.e. it is a dict or list). See below for more
# info on each type of derived stat.

#### Composite Stats Description #####
# The composite_stats parameter specifies a list of node specific stats (i.e. stats whose name
# start with "node.") where each stat is composited across the entire cluster using the specified
# operation. Supported operations at this time are avg, max, min, and sum.
# The output name of a composite_stat is: cluster.<name of original stat>.[<field1>[...<fieldN>]].<name of operation>,
# so for the three stats above it would be cluster.node.ifs.ops.in.sum,
# cluster.node.ifs.ops.out.sum, and cluster.node.disk.iosched.latency.avg.avg. If the base stat
# contains one of more fields then those are appended to the name with '.' as delimiter, e.g.:
# sum(node.protostats.nfs.total:op_count) -> cluster.node.protostats.nfs.total.op_count.sum
composite_stats: sum(node.ifs.ops.in) sum(node.ifs.ops.out) avg(node.disk.iosched.latency.avg)


#### Equation Stats Description #####
# The equation_stats parameter specifies a list of output stat names for stats that will be
# derived from an equation that takes as input either base stat values or composite_stats values.
# The equation for each equation stat is specified in a parameter named the same as the equation
# stat.
equation_stats: cluster.ifs.concurrency cluster.protostats.all.total.op_count cluster.protostats.all.total.time_avg
# This is the definition of the equation used to compute the the cluster.ifs.concurrency stat.
# Any of the base stats or any composite stat can be used in the equation expression. Any
# expression supported by the Equation package of Python can be used:
# https://pypi.python.org/pypi/Equation
cluster.ifs.concurrency: (cluster.node.ifs.ops.in.sum + cluster.node.ifs.ops.out.sum) * cluster.node.disk.iosched.latency.avg.avg
# The cluster.protostats.all.total.op_count is a sum of all 9 of the different protocols' op_count.
# This equation shows an example of how to select a specific field within a stat that returns a dict, in this case the op_count
# field. Note that some stats are returned as list with always only a single dict item - in those cases the value is treated
# as if it was just a dict. Otherwise, to index into a list you would use numeric field names after the colon. Multiple field
# names or list indices are allowed (i.e. node.example.stat:field1:field2:field3...).
cluster.protostats.all.total.op_count: cluster.protostats.nfs.total:op_count + cluster.protostats.nfs.total:op_count + cluster.protostats.smb2.total:op_count + cluster.protostats.nlm.total:op_count + cluster.protostats.cifs.total:op_count + cluster.protostats.http.total:op_count + cluster.protostats.siq.total:op_count + cluster.protostats.nfs4.total:op_count + cluster.protostats.hdfs.total:op_count + cluster.protostats.ftp.total:op_count
# This stat computes the sum of the time_avg field and then takes an average.
cluster.protostats.all.total.time_avg: (cluster.protostats.nfs.total:time_avg + cluster.protostats.nfs.total:time_avg + cluster.protostats.smb2.total:time_avg + cluster.protostats.nlm.total:time_avg + cluster.protostats.cifs.total:time_avg + cluster.protostats.http.total:time_avg + cluster.protostats.siq.total:time_avg + cluster.protostats.nfs4.total:time_avg + cluster.protostats.hdfs.total:time_avg + cluster.protostats.ftp.total:time_avg) / 10.0

#### Percent Change Stats Description #####
# The percent_change_stats section specifies a list of base stats, composite stats, and/or equation
# stats whose percent change from one measurement to the next will be stored in a new stat whose
# name will be <name of original stat>.percentchange
percent_change_stats: cluster.node.disk.iosched.latency.avg.avg cluster.protostats.all.total.time_avg

#### Final Equation Stats Description #####
# The final_equation_stats is the same as the equation_stats section except these equations have access to base stats and all of the previously
# defined derived stats as input. Again list the names of the output stats and then list the equation for each output stat in section of that same
# name.
final_equation_stats: cluster.ifs.concurrency.importance
# Definition of the cluster.ifs.concurrency.importance final equation stat
cluster.ifs.concurrency.importance: (cluster.protostats.all.total.op_count * cluster.protostats.all.total.time_avg) * cluster.node.disk.iosched.latency.avg.avg.percentchange