diff --git a/telegraf/metaslab-alloc-stats.sh b/telegraf/metaslab-alloc-stats.sh new file mode 100755 index 0000000..aaee3fc --- /dev/null +++ b/telegraf/metaslab-alloc-stats.sh @@ -0,0 +1,9 @@ +#!/bin/sh +# +# Wrapper around "estat metaslab-alloc -jm 10" that filters out metrics whose +# "name" tag contains garbage characters (DLPX-88427). A kernel bug causes +# estat to occasionally emit stat names containing raw memory bytes or C macro +# strings. Only names consisting of printable ASCII letters, digits, spaces, +# and common punctuation are passed through. +# +estat metaslab-alloc -jm 10 | grep -E '"name":"[A-Za-z0-9 ,_()/.-]+"' diff --git a/telegraf/telegraf.inputs.playbook b/telegraf/telegraf.inputs.playbook index 5ed7e21..cd1f2cc 100644 --- a/telegraf/telegraf.inputs.playbook +++ b/telegraf/telegraf.inputs.playbook @@ -91,9 +91,10 @@ ] json_string_fields = ["iops(/s)", "avg latency(us)", "stddev(us)", "throughput(k/s)", "microseconds"] -# Collect output from "estat metaslab-alloc -jm 10" +# Collect output from "estat metaslab-alloc -jm 10" via wrapper script. +# The wrapper filters out metrics with garbage "name" tags (DLPX-88427). [[inputs.execd]] - command = ["estat", "metaslab-alloc", "-jm", "10"] + command = ["/etc/telegraf/metaslab-alloc-stats.sh"] name_override = "estat_metaslab-alloc" signal = "none" restart_delay = "30s"