Re: [chrony-users] Monitoring chrony, Prometheus-friendly metrics |
[ Thread Index |
Date Index
| More chrony.tuxfamily.org/chrony-users Archives
]
- To: chrony-users@xxxxxxxxxxxxxxxxxxxx
- Subject: Re: [chrony-users] Monitoring chrony, Prometheus-friendly metrics
- From: Watson Ladd <watson@xxxxxxxxxxxxxx>
- Date: Thu, 9 Apr 2020 15:08:15 -0700
- Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cloudflare.com; s=google; h=mime-version:references:in-reply-to:from:date:message-id:subject:to; bh=ZvE5C8imSw597NNUvvFKcoaT1iK3y6b3hB1zdYfky7E=; b=sc3HRH9Fx48fKTV1kpfrsuxd8UPQUJk6D/r/3UCAlwKaMfszsY1iyGUQgrH6Iu8AK2 uhtra2ZNxAA/jMLYXrsHeSvKKic/oS3uglfn8AWR/HbcL/aJOsqNg4dsUksY/l++Gifv pnlKjn3DGhjokB3weNRUeu9C2NaTcJx86lUQo=
On Wed, Apr 8, 2020 at 3:23 PM Watson Ladd <watson@xxxxxxxxxxxxxx> wrote:
>
> On Wed, Apr 8, 2020 at 5:58 AM Luca BRUNO <lucab@xxxxxxxxxxxxx> wrote:
> >
> > Hi all,
> > I'm following up from this old thread from 2016 regarding monitoring
> > chrony [0], and from this more recent discussion in Prometheus land [1].
> >
> > [0] https://listengine.tuxfamily.org/chrony.tuxfamily.org/chrony-users/2016/02/msg00003.html
> > [1] https://github.com/prometheus/node_exporter/issues/1666
>
> I've got a python script lying around for exactly this. Let me get the
> approvals sorted out to submit it/send it to you.
Here is the script. Same licensing terms as Chrony itself. I'll submit
a patch to put it in the contrib section shortly.
Note that we have a framework to turn a tool like this into part of
the scrape, so maybe a standalone monitor suits you a bit better.
#!/usr/bin/env python
#
# Description: Extract chronyd metrics from chronyc -c.
# Author: Aanchal Malhotra <aanchal4@xxxxxx>
#
# Works with chrony version 2.4 and higher
import subprocess
import sys
chrony_sourcestats_cmd = ['chronyc', '-c', 'sourcestats']
chrony_source_cmd = ['chronyc', '-c', 'sources']
chrony_tracking_cmd = ['chronyc', '-c', 'tracking']
metrics_fields = [
"Name/IP Address",
"NP",
"NR",
"Span",
"Frequency",
"Freq Skew",
"Offset",
"Std Dev"]
status_types = {'x': 0, '?': 1, '-': 2, '+': 3, '*': 4}
metrics_source = {
"*": "synchronized (system peer)",
"+": "synchronized",
"?": "unreachable",
"x": "Falseticker",
"-": "reference clock"}
metrics_mode = {
'^': "server",
'=': "peer",
"#": "reference clock"}
def get_cmdoutput(command):
proc = subprocess.Popen(command, stdout=subprocess.PIPE)
out, err = proc.communicate()
return_code = proc.poll()
if return_code:
raise RuntimeError('Call to "{}" returned error: \
{}'.format(command, return_code))
return out
def printPrometheusformat(metric, values):
print("# HELP chronyd_%s chronyd metric for %s" % (metric, metric))
print("# TYPE chronyd_%s gauge" % (metric))
for labels in values:
if labels is None:
print("chronyd_%s %f" % (metric, values[labels]))
else:
print("chronyd_%s{%s} %f" % (metric, labels, values[labels]))
def printPrometheusscalar(metric, value):
print("# HELP chronyd_%s chronyd metric for %s" %(metric, metric))
print("# TYPE chronyd_%s gauge" %(metric))
print ("chronyd_%s %f" % (metric, value))
def printPrometheusEnum(metric, name):
print("# HELP chronyd_%s enum for %s" %(metric, metric))
print("# TYPE chronyd_%s gauge" %(metric))
print("chronyd_%s{value=\"%s\"} 1"%(metric, name))
def weight(value):
val_int = int(value, 8)
return bin(val_int).count('1')/8.0
def main(argv):
peer_status_metrics = {}
peer_reach_metrics = {}
offset_metrics = {}
freq_skew_metrics = {}
freq_metrics = {}
std_dev_metrics = {}
chrony_sourcestats = get_cmdoutput(chrony_sourcestats_cmd)
for line in chrony_sourcestats.split('\n'):
if (len(line)) > 0:
x = line.split(',')
common_labels = "remote=\"%s\"" % (x[0])
freq_metrics[common_labels] = float(x[4])
freq_skew_metrics[common_labels] = float(x[5])
std_dev_metrics[common_labels] = float(x[7])
printPrometheusformat('freq_skew_ppm', freq_skew_metrics)
printPrometheusformat('freq_ppm', freq_metrics)
printPrometheusformat('std_dev_seconds', std_dev_metrics)
chrony_source = get_cmdoutput(chrony_source_cmd)
for line in chrony_source.split('\n'):
if (len(line)) > 0:
x = line.split(',')
stratum = x[3]
reach = x[5]
mode = metrics_mode[x[0]]
common_labels = "remote=\"%s\"" % (x[2])
peer_labels = "%s,stratum=\"%s\",mode=\"%s\"" % (
common_labels,
stratum,
mode,
)
peer_status_metrics[peer_labels] = float(status_types[x[1]])
peer_reach_metrics[peer_labels] = weight(reach)
offset_metrics[common_labels] = float(x[8])
printPrometheusformat('peer_status', peer_status_metrics)
printPrometheusformat('offset_seconds', offset_metrics)
printPrometheusformat('peer_reachable', peer_reach_metrics)
chrony_tracking_stats = get_cmdoutput(chrony_tracking_cmd).rstrip()
fields = chrony_tracking_stats.split(",")
printPrometheusEnum("tracking_source", fields[1])
printPrometheusscalar("tracking_stratum", float(fields[2]))
printPrometheusscalar("tracking_ref_time", float(fields[3]))
printPrometheusscalar("tracking_system_time", float(fields[4]))
printPrometheusscalar("tracking_last_offset" , float(fields[5]))
printPrometheusscalar("tracking_rms_offset", float(fields[6]))
printPrometheusscalar("tracking_frequency_error", float(fields[7]))
printPrometheusscalar("tracking_frequency_residual", float(fields[8]))
printPrometheusscalar("tracking_frequency_skew", float(fields[9]))
printPrometheusscalar("tracking_root_delay", float(fields[10]))
printPrometheusscalar("tracking_root_dispersion", float(fields[11]))
printPrometheusscalar("tracking_update_interval", float(fields[12]))
printPrometheusEnum("tracking_leap_status", fields[13])
if __name__ == "__main__":
main(sys.argv[1:])