From 5b47ee4aee3d5871ebf49d716f4f6dae5c997ca1 Mon Sep 17 00:00:00 2001
From: Tobias Schmidt <ts@soundcloud.com>
Date: Wed, 10 Sep 2014 21:19:41 -0400
Subject: [PATCH] Add unbound plugin

---
 unbound/README.md                 |  88 +++++++++++++
 unbound/conf.d/unbound.pyconf     |  18 +++
 unbound/python_modules/unbound.py | 205 ++++++++++++++++++++++++++++++
 3 files changed, 311 insertions(+)
 create mode 100644 unbound/README.md
 create mode 100644 unbound/conf.d/unbound.pyconf
 create mode 100755 unbound/python_modules/unbound.py

diff --git a/unbound/README.md b/unbound/README.md
new file mode 100644
index 00000000..0727c529
--- /dev/null
+++ b/unbound/README.md
@@ -0,0 +1,88 @@
+# unbound
+
+Pythond module for ganglia. Reads stats from `unbound-control stats`.
+
+http://unbound.net/documentation/unbound-control.html
+
+## privileges
+
+The ganglia user needs to execute the unbound-control stats command, so it's
+probably necessary to add this to your sudoers file:
+
+	ganglia ALL=(root) NOPASSWD: /usr/sbin/unbound-control stats
+
+## stats
+
+* unbound_queries
+
+  number of queries received
+
+* unbound_cachehits
+
+  number of queries that were successfully answered using a cache lookup
+
+* unbound_cachemiss
+
+  number of queries that needed recursive processing
+
+* unbound_prefetch
+
+  number  of  cache prefetches performed.  This number is included
+  in cachehits, as the original query had the unprefetched  answer
+  from  cache, and resulted in recursive processing, taking a slot
+  in the requestlist.  Not part of the  recursivereplies  (or  the
+  histogram thereof) or cachemiss, as a cache response was sent.
+
+* unbound_recursivereplies
+
+  The number of replies sent to queries that needed recursive pro-
+  cessing. Could be smaller than threadX.num.cachemiss if  due  to
+  timeouts no replies were sent for some queries.
+
+* unbound_requestlist_avg
+
+  The  average  number  of requests in the internal recursive pro-
+  cessing request list on insert of a new incoming recursive  pro-
+  cessing query.
+
+* unbound_requestlist_max
+
+  Maximum  size  attained  by  the  internal  recursive processing
+  request list.
+
+* unbound_requestlist_overwritten
+
+  Number of requests in the request list that were overwritten  by
+  newer  entries. This happens if there is a flood of queries that
+  recursive processing and the server has a hard time.
+
+* unbound_requestlist_exceeded
+
+  Queries that were dropped because the  request  list  was  full.
+  This  happens  if  a flood of queries need recursive processing,
+  and the server can not keep up.
+
+* unbound_requestlist_current_all
+
+  Current size of the request list, includes internally  generated
+  queries (such as priming queries and glue lookups).
+
+* unbound_requestlist_current_user
+
+  Current  size of the request list, only the requests from client
+  queries.
+
+* unbound_recursion_time_avg
+
+  Average time it took to answer  queries  that  needed  recursive
+  processing.  Note that queries that were answered from the cache
+  are not in this average.
+
+* unbound_recursion_time_median
+
+  The median of the time it took to  answer  queries  that  needed
+  recursive  processing.   The  median  means that 50% of the user
+  queries were answered in less than this time.   Because  of  big
+  outliers  (usually queries to non responsive servers), the aver-
+  age can be bigger than the median.  This median has been  calcu-
+  lated by interpolation from a histogram.
diff --git a/unbound/conf.d/unbound.pyconf b/unbound/conf.d/unbound.pyconf
new file mode 100644
index 00000000..978ff3d1
--- /dev/null
+++ b/unbound/conf.d/unbound.pyconf
@@ -0,0 +1,18 @@
+modules {
+    module {
+        name = "unbound"
+        language = "python"
+        param stats_command {
+            value = "sudo /usr/sbin/unbound-control stats"
+        }
+    }
+}
+
+collection_group {
+    collect_every = 30
+    time_threshold = 60
+
+    metric {
+        name_match = "unbound_(.+)"
+    }
+}
diff --git a/unbound/python_modules/unbound.py b/unbound/python_modules/unbound.py
new file mode 100755
index 00000000..4e607205
--- /dev/null
+++ b/unbound/python_modules/unbound.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# unbound gmond module for Ganglia
+#
+# Copyright (C) 2014 by Tobias Schmidt <ts@soundcloud.com>, SoundCloud Inc.
+# All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+import os
+import time
+
+NAME_PREFIX = 'unbound_'
+PARAMS = {
+    'stats_command': 'sudo /usr/sbin/unbound-control stats'
+}
+METRICS = {
+    'time': 0,
+    'data': {}
+}
+METRICS_CACHE_MAX = 5
+
+
+def create_desc(skel, prop):
+    d = skel.copy()
+    for k, v in prop.iteritems():
+        d[k] = v
+    return d
+
+
+def get_metrics():
+    """Return all metrics"""
+
+    global METRICS
+
+    if (time.time() - METRICS['time']) > METRICS_CACHE_MAX:
+        # get raw metric data
+        io = os.popen(PARAMS['stats_command'])
+
+        # convert to dict
+        metrics = {}
+        for line in io.readlines():
+            key, value = line.split('=')[:2]
+            metrics[key] = float(value)
+
+        # update cache
+        METRICS = {
+            'time': time.time(),
+            'data': metrics
+        }
+
+    return METRICS
+
+
+def get_value(name):
+    """Return a value for the requested metric"""
+
+    metrics = get_metrics()
+    name = 'total.' + name[len(NAME_PREFIX):].replace('_', '.')
+
+    try:
+        result = metrics['data'][name]
+    except StandardError:
+        result = 0.0
+
+    return result
+
+
+def metric_init(lparams):
+    """Initialize metric descriptors"""
+
+    global PARAMS, Desc_Skel
+
+    # set parameters
+    for key in lparams:
+        PARAMS[key] = lparams[key]
+
+    Desc_Skel = {
+        'name':        'XXX',
+        'call_back':   get_value,
+        'time_max':    60,
+        'value_type':  'float',
+        'format':      '%f',
+        'units':       'XXX',
+        'slope':       'both',
+        'description': 'XXX',
+        'groups':      'unbound',
+    }
+
+    descriptors = []
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'num_queries',
+        'units':       'Queries',
+        'description': 'Unbound queries',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'num_cachehits',
+        'units':       'Queries',
+        'description': 'Unbound cachehits',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'num_cachemiss',
+        'units':       'Queries',
+        'description': 'Unbound cachemiss',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'num_prefetch',
+        'units':       'Prefetches',
+        'description': 'Unbound cache prefetches',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'num_recursivereplies',
+        'units':       'Replies',
+        'description': 'Replies to recursive queries',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_avg',
+        'units':       'Requests',
+        'description': 'Number of requests (avg.)',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_max',
+        'units':       'Requests',
+        'description': 'Number of requests (max.)',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_overwritten',
+        'units':       'Requests',
+        'description': 'Overwritten number of requests',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_exceeded',
+        'units':       'Requests',
+        'description': 'Dropped number of requests',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_current_all',
+        'units':       'Requests',
+        'description': 'Unbound requestlist size (all)',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'requestlist_current_user',
+        'units':       'Requests',
+        'description': 'Unbound requestlist size (user)',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'recursion_time_avg',
+        'units':       'Seconds',
+        'description': 'Unbound recursion latency (avg.)',
+    }))
+
+    descriptors.append(create_desc(Desc_Skel, {
+        'name':        NAME_PREFIX + 'recursion_time_median',
+        'units':       'Seconds',
+        'description': 'Unbound recursion latency (50th)',
+    }))
+
+    return descriptors
+
+
+def metric_cleanup():
+    """Cleanup"""
+
+    pass
+
+
+# the following code is for debugging and testing
+if __name__ == '__main__':
+    descriptors = metric_init(PARAMS)
+    while True:
+        for d in descriptors:
+            fmt = (('%s = %s') % (d['name'], d['format']))
+            print fmt % (d['call_back'](d['name']))
+        print 'Sleeping 15 seconds'
+        time.sleep(15)