From 33ea22ecd632e9dba208d930c72558c310d477ee Mon Sep 17 00:00:00 2001 From: Evan Fraser Date: Fri, 21 Mar 2014 15:35:55 +1300 Subject: [PATCH 1/4] Updated Recoverpoint module to perform SSH queries as separate thread --- recoverpoint/README.mkdn | 3 +++ recoverpoint/recoverpoint.py | 31 +++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/recoverpoint/README.mkdn b/recoverpoint/README.mkdn index 5202cd0f..043b664e 100644 --- a/recoverpoint/README.mkdn +++ b/recoverpoint/README.mkdn @@ -18,6 +18,9 @@ Currently gathers: * Save the recoverpoint.py into your ganglia python module dir eg: /usr/lib/ganglia/python_modules. Update the username/passwords if necessary. * Restart gmond and a "recoverpoint" host should appear in ganglia. +## UPDATE + * 21/03/2014 - Now performs SSH queries asynchronously as a separate thread. (This should stop it slowing and breaking gmond) + ## AUTHOR Author: Evan Fraser <evan.fraser@trademe.co.nz> diff --git a/recoverpoint/recoverpoint.py b/recoverpoint/recoverpoint.py index ab93f140..d58658d1 100755 --- a/recoverpoint/recoverpoint.py +++ b/recoverpoint/recoverpoint.py @@ -9,6 +9,7 @@ import warnings import pprint import time +import threading import re with warnings.catch_warnings(): @@ -23,6 +24,7 @@ #This is the minimum interval between querying the RPA for metrics. #Each ssh query takes 1.6s so we limit the interval between getting metrics to this interval. NIMETRICS_CACHE_MAX = 10 +RAWDATA = "" ipaddr = '' @@ -116,20 +118,27 @@ def create_desc(skel, prop): for k,v in prop.iteritems(): d[k] = v return d + + +def run_ssh_thread(foo,bar): + global RAWDATA + sshcon = paramiko.SSHClient() + sshcon.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + sshcon.connect(ipaddr, username='monitor',password='monitor',look_for_keys='False') + stdin, stdout, sterr = sshcon.exec_command("get_system_statistics;get_group_statistics") + RAWDATA = stdout.read() + + def get_metrics(name): global NIMETRICS,ipaddr # if interval since last check > NIMETRICS_CACHE_MAX get metrics again metrics = {} if (time.time() - NIMETRICS['time']) > NIMETRICS_CACHE_MAX: - - sshcon = paramiko.SSHClient() - sshcon.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - sshcon.connect(ipaddr, username='monitor',password='monitor',look_for_keys='False') - stdin, stdout, sterr = sshcon.exec_command("get_system_statistics;get_group_statistics") - rawdata = stdout.read() + threading.Thread(run_ssh_thread(1,1)) + rawdata = RAWDATA #Group stats don't leave a space after the colon in some places - rawmetrics = yaml.safe_load(rawdata.replace(':N',': N')) + rawmetrics = yaml.safe_load(rawdata.replace(':N',': N').replace("Compression","\n Compression")) #Get RPA metrics for rpa in rawmetrics['RPA statistics']: for metric in rawmetrics['RPA statistics'][rpa]: @@ -210,7 +219,7 @@ def get_metrics(name): def metric_init(params): - global descriptors, Desc_Skel, ipaddr + global descriptors, Desc_Skel, ipaddr, RAWDATA print '[recoverpoint] Recieved the following parameters' print params ipaddr = params['mgmtip'] @@ -234,8 +243,10 @@ def metric_init(params): sshcon.connect(ipaddr, username='monitor',password='monitor',look_for_keys='False') stdin, stdout, sterr = sshcon.exec_command("get_system_statistics;get_group_statistics") rawdata = stdout.read() + RAWDATA = rawdata +# f = #Group stats don't leave a space after the colon in some places - statsDict = yaml.safe_load(rawdata.replace(':N',': N')) + statsDict = yaml.safe_load(rawdata.replace(':N',': N').replace("Compression","\n Compression")) sshcon.close() descriptors = define_metrics(Desc_Skel, statsDict) @@ -244,7 +255,7 @@ def metric_init(params): # For CLI Debuging: if __name__ == '__main__': params = { - 'mgmtip' : '192.168.1.100', + 'mgmtip' : '10.10.9.170', } descriptors = metric_init(params) From 16819fe91715e40263a449b8923efb53a651a663 Mon Sep 17 00:00:00 2001 From: Evan Fraser Date: Fri, 21 Mar 2014 15:38:24 +1300 Subject: [PATCH 2/4] changed default ip --- recoverpoint/recoverpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recoverpoint/recoverpoint.py b/recoverpoint/recoverpoint.py index d58658d1..9f273af0 100755 --- a/recoverpoint/recoverpoint.py +++ b/recoverpoint/recoverpoint.py @@ -255,7 +255,7 @@ def metric_init(params): # For CLI Debuging: if __name__ == '__main__': params = { - 'mgmtip' : '10.10.9.170', + 'mgmtip' : '192.168.1.100', } descriptors = metric_init(params) From 30917fdf6c30a1d67c71bbcd267973dc84c14011 Mon Sep 17 00:00:00 2001 From: Evan Fraser Date: Mon, 24 Mar 2014 09:37:48 +1300 Subject: [PATCH 3/4] Fixed CG SAN unit bug, added compatibility note into readme --- recoverpoint/README.mkdn | 4 ++++ recoverpoint/recoverpoint.py | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/recoverpoint/README.mkdn b/recoverpoint/README.mkdn index 043b664e..88d84954 100644 --- a/recoverpoint/README.mkdn +++ b/recoverpoint/README.mkdn @@ -8,6 +8,9 @@ Currently gathers: * Per Consistency Group Write, Data, Time and Journal Lags, as well as WAN and SAN traffic. * Per Consistency Group Protection Window metrics. +## Compatibility + * Compatible with Recoverpoint 3.5 + ## DEPENDS * python YAML * paramiko modules @@ -20,6 +23,7 @@ Currently gathers: ## UPDATE * 21/03/2014 - Now performs SSH queries asynchronously as a separate thread. (This should stop it slowing and breaking gmond) + * 24/03/2014 - Corrected a bug in reporting SAN traffic when SAN traffic is sometimes reported by recoverpoint in units less than Mbit/s ## AUTHOR diff --git a/recoverpoint/recoverpoint.py b/recoverpoint/recoverpoint.py index 9f273af0..7f36ef65 100755 --- a/recoverpoint/recoverpoint.py +++ b/recoverpoint/recoverpoint.py @@ -3,6 +3,7 @@ # Desc: Ganglia Python module for gathering EMC recoverpoint statistics via SSH # Author: Evan Fraser (evan.fraser@trademe.co.nz) # Date: 01/08/2012 +# Compatibility note: Compatible with Recoverpoint version 3.5 import yaml @@ -153,9 +154,18 @@ def get_metrics(name): for group in rawmetrics['Group']: #CG SAN and Journal lag are under the policies for policyname in rawmetrics['Group'][group]['Copy stats']: - #Get CG SAN metrics (remove 'Mbps' from end + convert to float and then bits) + #Get CG SAN metrics (Work out the unit from end + convert to float and then bits) if 'SAN traffic' in rawmetrics['Group'][group]['Copy stats'][policyname]: - metrics[group + '_SAN_Traffic'] = float(rawmetrics['Group'][group]['Copy stats'][policyname]['SAN traffic']['Current throughput'][:-4]) * 1024 * 1024 + cg_san_str = rawmetrics['Group'][group]['Copy stats'][policyname]['SAN traffic']['Current throughput'] + cg_san_bw = float(cg_san_str[:-4]) + cg_san_unit = cg_san_str[-4:] + if 'Mbps' in cg_san_unit: + cg_san_bw = cg_san_bw * 1024 * 1024 + else: + cg_san_bw = cg_san_bw * 1024 + metrics[group + '_SAN_Traffic'] = cg_san_bw + + elif 'Journal' in rawmetrics['Group'][group]['Copy stats'][policyname]: datastr = rawmetrics['Group'][group]['Copy stats'][policyname]['Journal']['Journal lag'] amount = float(datastr[:-2]) From fca0fb5a6ae15b13d55b224de192fb8ae942f3cc Mon Sep 17 00:00:00 2001 From: Evan Fraser Date: Wed, 26 Mar 2014 09:38:01 +1300 Subject: [PATCH 4/4] Fixed the recoverpoint consistency group WAN units when traffic is low --- recoverpoint/recoverpoint.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/recoverpoint/recoverpoint.py b/recoverpoint/recoverpoint.py index 7f36ef65..24f99f12 100755 --- a/recoverpoint/recoverpoint.py +++ b/recoverpoint/recoverpoint.py @@ -191,9 +191,18 @@ def get_metrics(name): #CG Lag and WAN stats are in the Link stats section for repname in rawmetrics['Group'][group]['Link stats']: - #Get CG WAN metrics (remove 'Mbps' from end + convert to float and then bits) - metrics[group + '_WAN_Traffic'] = float(rawmetrics['Group'][group]['Link stats'][repname]['Replication']['WAN traffic'][:-4]) * 1024 * 1024 - + #Get CG WAN metrics (Work out the unit from end + convert to float and then bits) + ##(remove 'Mbps' from end + convert to float and then bits) + #metrics[group + '_WAN_Traffic'] = float(rawmetrics['Group'][group]['Link stats'][repname]['Replication']['WAN traffic'][:-4]) * 1024 * 1024 + cg_wan_str = rawmetrics['Group'][group]['Link stats'][repname]['Replication']['WAN traffic'] + cg_wan_bw = float(cg_wan_str[:-4]) + cg_wan_unit = cg_wan_str[-4:] + if 'Mbps' in cg_wan_unit: + cg_wan_bw = cg_wan_bw * 1024 * 1024 + else: + cg_wan_bw = cg_wan_bw * 1024 + metrics[group + '_WAN_Traffic'] = cg_wan_bw + #Get CG Lag metrics for lagfields in rawmetrics['Group'][group]['Link stats'][repname]['Replication']['Lag']: if 'Data' in lagfields: