Skip to content

Commit

Permalink
Add unbound plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
grobie committed Sep 11, 2014
1 parent 4221f6b commit 5b47ee4
Show file tree
Hide file tree
Showing 3 changed files with 311 additions and 0 deletions.
88 changes: 88 additions & 0 deletions unbound/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# unbound

Pythond module for ganglia. Reads stats from `unbound-control stats`.

http://unbound.net/documentation/unbound-control.html

## privileges

The ganglia user needs to execute the unbound-control stats command, so it's
probably necessary to add this to your sudoers file:

ganglia ALL=(root) NOPASSWD: /usr/sbin/unbound-control stats

## stats

* unbound_queries

number of queries received

* unbound_cachehits

number of queries that were successfully answered using a cache lookup

* unbound_cachemiss

number of queries that needed recursive processing

* unbound_prefetch

number of cache prefetches performed. This number is included
in cachehits, as the original query had the unprefetched answer
from cache, and resulted in recursive processing, taking a slot
in the requestlist. Not part of the recursivereplies (or the
histogram thereof) or cachemiss, as a cache response was sent.

* unbound_recursivereplies

The number of replies sent to queries that needed recursive pro-
cessing. Could be smaller than threadX.num.cachemiss if due to
timeouts no replies were sent for some queries.

* unbound_requestlist_avg

The average number of requests in the internal recursive pro-
cessing request list on insert of a new incoming recursive pro-
cessing query.

* unbound_requestlist_max

Maximum size attained by the internal recursive processing
request list.

* unbound_requestlist_overwritten

Number of requests in the request list that were overwritten by
newer entries. This happens if there is a flood of queries that
recursive processing and the server has a hard time.

* unbound_requestlist_exceeded

Queries that were dropped because the request list was full.
This happens if a flood of queries need recursive processing,
and the server can not keep up.

* unbound_requestlist_current_all

Current size of the request list, includes internally generated
queries (such as priming queries and glue lookups).

* unbound_requestlist_current_user

Current size of the request list, only the requests from client
queries.

* unbound_recursion_time_avg

Average time it took to answer queries that needed recursive
processing. Note that queries that were answered from the cache
are not in this average.

* unbound_recursion_time_median

The median of the time it took to answer queries that needed
recursive processing. The median means that 50% of the user
queries were answered in less than this time. Because of big
outliers (usually queries to non responsive servers), the aver-
age can be bigger than the median. This median has been calcu-
lated by interpolation from a histogram.
18 changes: 18 additions & 0 deletions unbound/conf.d/unbound.pyconf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
modules {
module {
name = "unbound"
language = "python"
param stats_command {
value = "sudo /usr/sbin/unbound-control stats"
}
}
}

collection_group {
collect_every = 30
time_threshold = 60

metric {
name_match = "unbound_(.+)"
}
}
205 changes: 205 additions & 0 deletions unbound/python_modules/unbound.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# unbound gmond module for Ganglia
#
# Copyright (C) 2014 by Tobias Schmidt <ts@soundcloud.com>, SoundCloud Inc.
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#

import os
import time

NAME_PREFIX = 'unbound_'
PARAMS = {
'stats_command': 'sudo /usr/sbin/unbound-control stats'
}
METRICS = {
'time': 0,
'data': {}
}
METRICS_CACHE_MAX = 5


def create_desc(skel, prop):
d = skel.copy()
for k, v in prop.iteritems():
d[k] = v
return d


def get_metrics():
"""Return all metrics"""

global METRICS

if (time.time() - METRICS['time']) > METRICS_CACHE_MAX:
# get raw metric data
io = os.popen(PARAMS['stats_command'])

# convert to dict
metrics = {}
for line in io.readlines():
key, value = line.split('=')[:2]
metrics[key] = float(value)

# update cache
METRICS = {
'time': time.time(),
'data': metrics
}

return METRICS


def get_value(name):
"""Return a value for the requested metric"""

metrics = get_metrics()
name = 'total.' + name[len(NAME_PREFIX):].replace('_', '.')

try:
result = metrics['data'][name]
except StandardError:
result = 0.0

return result


def metric_init(lparams):
"""Initialize metric descriptors"""

global PARAMS, Desc_Skel

# set parameters
for key in lparams:
PARAMS[key] = lparams[key]

Desc_Skel = {
'name': 'XXX',
'call_back': get_value,
'time_max': 60,
'value_type': 'float',
'format': '%f',
'units': 'XXX',
'slope': 'both',
'description': 'XXX',
'groups': 'unbound',
}

descriptors = []

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'num_queries',
'units': 'Queries',
'description': 'Unbound queries',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'num_cachehits',
'units': 'Queries',
'description': 'Unbound cachehits',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'num_cachemiss',
'units': 'Queries',
'description': 'Unbound cachemiss',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'num_prefetch',
'units': 'Prefetches',
'description': 'Unbound cache prefetches',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'num_recursivereplies',
'units': 'Replies',
'description': 'Replies to recursive queries',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_avg',
'units': 'Requests',
'description': 'Number of requests (avg.)',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_max',
'units': 'Requests',
'description': 'Number of requests (max.)',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_overwritten',
'units': 'Requests',
'description': 'Overwritten number of requests',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_exceeded',
'units': 'Requests',
'description': 'Dropped number of requests',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_current_all',
'units': 'Requests',
'description': 'Unbound requestlist size (all)',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'requestlist_current_user',
'units': 'Requests',
'description': 'Unbound requestlist size (user)',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'recursion_time_avg',
'units': 'Seconds',
'description': 'Unbound recursion latency (avg.)',
}))

descriptors.append(create_desc(Desc_Skel, {
'name': NAME_PREFIX + 'recursion_time_median',
'units': 'Seconds',
'description': 'Unbound recursion latency (50th)',
}))

return descriptors


def metric_cleanup():
"""Cleanup"""

pass


# the following code is for debugging and testing
if __name__ == '__main__':
descriptors = metric_init(PARAMS)
while True:
for d in descriptors:
fmt = (('%s = %s') % (d['name'], d['format']))
print fmt % (d['call_back'](d['name']))
print 'Sleeping 15 seconds'
time.sleep(15)

0 comments on commit 5b47ee4

Please sign in to comment.