From 5b47ee4aee3d5871ebf49d716f4f6dae5c997ca1 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Wed, 10 Sep 2014 21:19:41 -0400 Subject: [PATCH] Add unbound plugin --- unbound/README.md | 88 +++++++++++++ unbound/conf.d/unbound.pyconf | 18 +++ unbound/python_modules/unbound.py | 205 ++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+) create mode 100644 unbound/README.md create mode 100644 unbound/conf.d/unbound.pyconf create mode 100755 unbound/python_modules/unbound.py diff --git a/unbound/README.md b/unbound/README.md new file mode 100644 index 00000000..0727c529 --- /dev/null +++ b/unbound/README.md @@ -0,0 +1,88 @@ +# unbound + +Pythond module for ganglia. Reads stats from `unbound-control stats`. + +http://unbound.net/documentation/unbound-control.html + +## privileges + +The ganglia user needs to execute the unbound-control stats command, so it's +probably necessary to add this to your sudoers file: + + ganglia ALL=(root) NOPASSWD: /usr/sbin/unbound-control stats + +## stats + +* unbound_queries + + number of queries received + +* unbound_cachehits + + number of queries that were successfully answered using a cache lookup + +* unbound_cachemiss + + number of queries that needed recursive processing + +* unbound_prefetch + + number of cache prefetches performed. This number is included + in cachehits, as the original query had the unprefetched answer + from cache, and resulted in recursive processing, taking a slot + in the requestlist. Not part of the recursivereplies (or the + histogram thereof) or cachemiss, as a cache response was sent. + +* unbound_recursivereplies + + The number of replies sent to queries that needed recursive pro- + cessing. Could be smaller than threadX.num.cachemiss if due to + timeouts no replies were sent for some queries. + +* unbound_requestlist_avg + + The average number of requests in the internal recursive pro- + cessing request list on insert of a new incoming recursive pro- + cessing query. + +* unbound_requestlist_max + + Maximum size attained by the internal recursive processing + request list. + +* unbound_requestlist_overwritten + + Number of requests in the request list that were overwritten by + newer entries. This happens if there is a flood of queries that + recursive processing and the server has a hard time. + +* unbound_requestlist_exceeded + + Queries that were dropped because the request list was full. + This happens if a flood of queries need recursive processing, + and the server can not keep up. + +* unbound_requestlist_current_all + + Current size of the request list, includes internally generated + queries (such as priming queries and glue lookups). + +* unbound_requestlist_current_user + + Current size of the request list, only the requests from client + queries. + +* unbound_recursion_time_avg + + Average time it took to answer queries that needed recursive + processing. Note that queries that were answered from the cache + are not in this average. + +* unbound_recursion_time_median + + The median of the time it took to answer queries that needed + recursive processing. The median means that 50% of the user + queries were answered in less than this time. Because of big + outliers (usually queries to non responsive servers), the aver- + age can be bigger than the median. This median has been calcu- + lated by interpolation from a histogram. diff --git a/unbound/conf.d/unbound.pyconf b/unbound/conf.d/unbound.pyconf new file mode 100644 index 00000000..978ff3d1 --- /dev/null +++ b/unbound/conf.d/unbound.pyconf @@ -0,0 +1,18 @@ +modules { + module { + name = "unbound" + language = "python" + param stats_command { + value = "sudo /usr/sbin/unbound-control stats" + } + } +} + +collection_group { + collect_every = 30 + time_threshold = 60 + + metric { + name_match = "unbound_(.+)" + } +} diff --git a/unbound/python_modules/unbound.py b/unbound/python_modules/unbound.py new file mode 100755 index 00000000..4e607205 --- /dev/null +++ b/unbound/python_modules/unbound.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# unbound gmond module for Ganglia +# +# Copyright (C) 2014 by Tobias Schmidt , SoundCloud Inc. +# All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import os +import time + +NAME_PREFIX = 'unbound_' +PARAMS = { + 'stats_command': 'sudo /usr/sbin/unbound-control stats' +} +METRICS = { + 'time': 0, + 'data': {} +} +METRICS_CACHE_MAX = 5 + + +def create_desc(skel, prop): + d = skel.copy() + for k, v in prop.iteritems(): + d[k] = v + return d + + +def get_metrics(): + """Return all metrics""" + + global METRICS + + if (time.time() - METRICS['time']) > METRICS_CACHE_MAX: + # get raw metric data + io = os.popen(PARAMS['stats_command']) + + # convert to dict + metrics = {} + for line in io.readlines(): + key, value = line.split('=')[:2] + metrics[key] = float(value) + + # update cache + METRICS = { + 'time': time.time(), + 'data': metrics + } + + return METRICS + + +def get_value(name): + """Return a value for the requested metric""" + + metrics = get_metrics() + name = 'total.' + name[len(NAME_PREFIX):].replace('_', '.') + + try: + result = metrics['data'][name] + except StandardError: + result = 0.0 + + return result + + +def metric_init(lparams): + """Initialize metric descriptors""" + + global PARAMS, Desc_Skel + + # set parameters + for key in lparams: + PARAMS[key] = lparams[key] + + Desc_Skel = { + 'name': 'XXX', + 'call_back': get_value, + 'time_max': 60, + 'value_type': 'float', + 'format': '%f', + 'units': 'XXX', + 'slope': 'both', + 'description': 'XXX', + 'groups': 'unbound', + } + + descriptors = [] + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'num_queries', + 'units': 'Queries', + 'description': 'Unbound queries', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'num_cachehits', + 'units': 'Queries', + 'description': 'Unbound cachehits', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'num_cachemiss', + 'units': 'Queries', + 'description': 'Unbound cachemiss', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'num_prefetch', + 'units': 'Prefetches', + 'description': 'Unbound cache prefetches', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'num_recursivereplies', + 'units': 'Replies', + 'description': 'Replies to recursive queries', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_avg', + 'units': 'Requests', + 'description': 'Number of requests (avg.)', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_max', + 'units': 'Requests', + 'description': 'Number of requests (max.)', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_overwritten', + 'units': 'Requests', + 'description': 'Overwritten number of requests', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_exceeded', + 'units': 'Requests', + 'description': 'Dropped number of requests', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_current_all', + 'units': 'Requests', + 'description': 'Unbound requestlist size (all)', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'requestlist_current_user', + 'units': 'Requests', + 'description': 'Unbound requestlist size (user)', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'recursion_time_avg', + 'units': 'Seconds', + 'description': 'Unbound recursion latency (avg.)', + })) + + descriptors.append(create_desc(Desc_Skel, { + 'name': NAME_PREFIX + 'recursion_time_median', + 'units': 'Seconds', + 'description': 'Unbound recursion latency (50th)', + })) + + return descriptors + + +def metric_cleanup(): + """Cleanup""" + + pass + + +# the following code is for debugging and testing +if __name__ == '__main__': + descriptors = metric_init(PARAMS) + while True: + for d in descriptors: + fmt = (('%s = %s') % (d['name'], d['format'])) + print fmt % (d['call_back'](d['name'])) + print 'Sleeping 15 seconds' + time.sleep(15)