Skip to content

Commit

Permalink
[warm-reboot] Preboot sad path automation for n lag members (sonic-ne…
Browse files Browse the repository at this point in the history
…t#1036)

* Preboot sad path automation for n lag members

Signed-off-by: Neetha John <nejo@microsoft.com>
  • Loading branch information
neethajohn authored Aug 28, 2019
1 parent 095b466 commit 3f8edd3
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 45 deletions.
38 changes: 28 additions & 10 deletions ansible/roles/test/files/ptftests/advanced-reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,16 +319,16 @@ def get_portchannel_info(self):
for member in content[key]['members']:
for vm_key in self.vm_dut_map.keys():
if member in self.vm_dut_map[vm_key]['dut_ports']:
self.vm_dut_map[vm_key]['dut_portchannel'] = key
self.vm_dut_map[vm_key]['dut_portchannel'] = str(key)
self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel1'
break

def get_neigh_port_info(self):
content = self.read_json('neigh_port_info')
for key in content.keys():
if content[key]['name'] in self.vm_dut_map.keys():
self.vm_dut_map[content[key]['name']]['dut_ports'].append(key)
self.vm_dut_map[content[key]['name']]['neigh_ports'].append(content[key]['port'])
self.vm_dut_map[content[key]['name']]['dut_ports'].append(str(key))
self.vm_dut_map[content[key]['name']]['neigh_ports'].append(str(content[key]['port']))
self.vm_dut_map[content[key]['name']]['ptf_ports'].append(self.port_indices[key])

def build_peer_mapping(self):
Expand All @@ -355,6 +355,30 @@ def populate_fail_info(self, fails):
self.fails[key] = set()
self.fails[key] |= fails[key]

def get_preboot_info(self):
'''
Prepares the msg string to log when a preboot_oper is defined.
preboot_oper can be represented in the following ways
eg. 'preboot_oper' - a single VM will be selected and preboot_oper will be applied to it
'neigh_bgp_down:2' - 2 VMs will be selected and preboot_oper will be applied to the selected 2 VMs
'neigh_lag_member_down:3:1' - this case is used for lag member down operation only. This indicates that
3 VMs will be selected and 1 of the lag members in the porchannel will be brought down
'''
msg = ''
if self.preboot_oper:
msg = 'Preboot oper: %s ' % self.preboot_oper
if ':' in self.preboot_oper:
oper_list = self.preboot_oper.split(':')
msg = 'Preboot oper: %s ' % oper_list[0] # extract the preboot oper_type
if len(oper_list) > 2:
# extract the number of VMs and the number of LAG members. preboot_oper will be of the form oper:no of VMS:no of lag members
msg += 'Number of sad path VMs: %s Lag member down in a portchannel: %s' % (oper_list[-2], oper_list[-1])
else:
# extract the number of VMs. preboot_oper will be of the form oper:no of VMS
msg += 'Number of sad path VMs: %s' % oper_list[-1]

return msg

def setUp(self):
self.fails['dut'] = set()
self.port_indices = self.read_port_indices()
Expand Down Expand Up @@ -427,13 +451,7 @@ def setUp(self):
self.generate_arp_ping_packet()

if self.reboot_type == 'warm-reboot':
# get the number of members down for sad path
if self.preboot_oper:
if ':' in self.preboot_oper:
oper_type, cnt = self.preboot_oper.split(':')
else:
oper_type, cnt = self.preboot_oper, 1
self.log("Preboot Oper: %s Number down: %s" % (oper_type, cnt))
self.log(self.get_preboot_info())

# Pre-generate list of packets to be sent in send_in_background method.
generate_start = datetime.datetime.now()
Expand Down
15 changes: 10 additions & 5 deletions ansible/roles/test/files/ptftests/arista.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,18 +396,23 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"):
self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver)
return self.fails, bgp_state

def change_neigh_lag_state(self, lag, is_up=True):
def change_neigh_lag_state(self, intf, is_up=True):
state = ['shut', 'no shut']
self.do_cmd('configure')
is_match = re.match('(Port-Channel|Ethernet)\d+', lag)
is_match = re.match('(Port-Channel|Ethernet)\d+', intf)
if is_match:
output = self.do_cmd('interface %s' % lag)
output = self.do_cmd('interface %s' % intf)
if 'Invalid' not in output:
self.do_cmd(state[is_up])
self.do_cmd('exit')
self.do_cmd('exit')
self.do_cmd('exit')

def change_neigh_intfs_state(self, intfs, is_up=True):
for intf in intfs:
self.change_neigh_lag_state(intf, is_up=is_up)

def verify_neigh_lag_state(self, lag, state="connected", pre_check=True):
states = state.split(',')
lag_state = False
msg_prefix = ['Postboot', 'Preboot']
is_match = re.match('(Port-Channel|Ethernet)\d+', lag)
Expand All @@ -418,7 +423,7 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True):
obj = json.loads(data)

if 'interfaces' in obj and lag in obj['interfaces']:
lag_state = (obj['interfaces'][lag]['interfaceStatus'] == state)
lag_state = (obj['interfaces'][lag]['interfaceStatus'] in states)
else:
self.fails.add('%s: Verify LAG %s: Object missing in output' % (msg_prefix[pre_check], lag))
return self.fails, lag_state
Expand Down
119 changes: 91 additions & 28 deletions ansible/roles/test/files/ptftests/sad_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ def revert(self):

class SadPath(object):
def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args):
(self.oper_type, self.cnt) = oper_type.split(':') if ':' in oper_type else (oper_type, 1)
self.cnt = int(self.cnt)
self.oper_type = ''
self.cnt = 1
self.memb_cnt = 0
self.vm_list = vm_list
self.portchannel_ports = portchannel_ports
self.vm_dut_map = vm_dut_map
Expand All @@ -50,6 +51,21 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args)
self.log = []
self.fails = dict()
self.fails['dut'] = set()
self.tot_memb_cnt = 0
self.memb_index = 0
self.extract_oper_info(oper_type)

def extract_oper_info(self, oper_type):
if oper_type and ':' in oper_type:
temp = oper_type.split(':')
self.oper_type = temp[0]
# get number of VMs where the sad pass oper needs to be done
self.cnt = int(temp[1])
if len(temp) > 2:
# get the number of lag members in a portchannel that should be brought down
self.memb_cnt = int(temp[-1])
else:
self.oper_type = oper_type

def cmd(self, cmds):
process = subprocess.Popen(cmds,
Expand All @@ -74,7 +90,7 @@ def select_vm(self):
else:
self.neigh_vms.extend(self.vm_list[vm_index:])
self.neigh_vms.extend(self.vm_list[0:exceed_len])
self.vm_list = self.vm_list[exceed_len:vm_len - self.cnt]
self.vm_list = self.vm_list[exceed_len:exceed_len + vm_len - self.cnt]

def get_neigh_name(self):
for key in self.vm_dut_map:
Expand All @@ -101,11 +117,25 @@ def vm_disconnect(self):
for vm in self.vm_handles:
self.vm_handles[vm].disconnect()

def select_member(self):
# select index of lag member to put down
if self.tot_memb_cnt != 0:
self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt

def setup(self):
self.select_vm()
self.get_neigh_name()
self.down_neigh_port()
self.vm_connect()

# decide if its all member down or few members down for lag member oper type
if 'member' in self.oper_type:
self.tot_memb_cnt = len(self.vm_dut_map[self.neigh_names.values()[0]]['dut_ports'])
if self.memb_cnt == 0:
self.memb_cnt = self.tot_memb_cnt
if self.tot_memb_cnt != self.memb_cnt:
self.select_member()

for vm in self.vm_handles:
self.neigh_bgps[vm], self.dut_bgps[vm] = self.vm_handles[vm].get_bgp_info()
self.fails[vm] = set()
Expand All @@ -128,9 +158,11 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args,
self.dut_ssh = dut_ssh
self.dut_needed = dict()
self.lag_members_down = dict()
self.neigh_lag_members_down = dict()
self.neigh_lag_state = None
self.po_neigh_map = dict()
self.msg_prefix = ['Postboot', 'Preboot']
self.memb_str = 'member' if 'member' in self.oper_type else ''

def populate_bgp_state(self):
[self.dut_needed.setdefault(vm, self.dut_bgps[vm]) for vm in self.neigh_vms]
Expand All @@ -141,11 +173,11 @@ def populate_bgp_state(self):
elif self.oper_type == 'dut_bgp_down':
self.neigh_bgps['changed_state'] = 'Active'
self.dut_bgps['changed_state'] = 'Idle'
elif self.oper_type == 'neigh_lag_down':
elif 'neigh_lag' in self.oper_type:
# on the DUT side, bgp states are different pre and post boot. hence passing multiple values
self.neigh_bgps['changed_state'] = 'Idle'
self.dut_bgps['changed_state'] = 'Connect,Active,Idle'
elif self.oper_type == 'dut_lag_down':
elif 'dut_lag' in self.oper_type:
self.neigh_bgps['changed_state'] = 'Idle'
self.dut_bgps['changed_state'] = 'Active,Connect,Idle'

Expand All @@ -169,13 +201,22 @@ def sad_setup(self, is_up=True):
time.sleep(30)

elif 'lag' in self.oper_type:
self.log.append('LAG state change will be for %s' % ", ".join(self.neigh_vms))
if self.oper_type == 'neigh_lag_down':
self.log.append('LAG %s state change will be for %s' % (self.memb_str, ", ".join(self.neigh_vms)))
if 'neigh_lag' in self.oper_type:
for vm in self.neigh_vms:
self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'])
self.vm_handles[vm].change_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], is_up=is_up)
elif self.oper_type == 'dut_lag_down':

# populate entity to be brought down on neigh end (portchannel/portchannel members)
if 'member' in self.oper_type:
down_intfs = self.neigh_lag_members_down[self.neigh_names[vm]]
else:
down_intfs = [self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']]

self.log.append('Changing state of LAG %s %s to shut' % (self.memb_str, ", ".join(down_intfs)))
self.vm_handles[vm].change_neigh_intfs_state(down_intfs, is_up=is_up)

elif 'dut_lag' in self.oper_type:
self.change_dut_lag_state(is_up=is_up)

# wait for sometime for lag members state to sync
time.sleep(120)

Expand Down Expand Up @@ -234,41 +275,63 @@ def sad_bgp_verify(self):
else:
self.fails['dut'].add('BGP state not down on DUT')

def populate_lag_member_down(self, neigh_name):
po_name = self.vm_dut_map[neigh_name]['dut_portchannel']
# build DUT portchannel to down members mapping and neigh name to down members mapping
# if only single member is down, extract the member and convert it into list otherwise assign the list directly
if self.tot_memb_cnt != self.memb_cnt:
self.lag_members_down[po_name] = [self.vm_dut_map[neigh_name]['dut_ports'][self.memb_index]]
self.neigh_lag_members_down[neigh_name] = [self.vm_dut_map[neigh_name]['neigh_ports'][self.memb_index]]
else:
self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports']
self.neigh_lag_members_down[neigh_name] = self.vm_dut_map[neigh_name]['neigh_ports']

def populate_lag_state(self):
if self.oper_type == 'neigh_lag_down':
self.neigh_lag_state = 'disabled'
elif self.oper_type == 'dut_lag_down':
if 'neigh_lag' in self.oper_type:
self.neigh_lag_state = 'disabled,notconnect'
elif 'dut_lag' in self.oper_type:
self.neigh_lag_state = 'notconnect'

for neigh_name in self.neigh_names.values():
# build portchannel to down members mapping
po_name = self.vm_dut_map[neigh_name]['dut_portchannel']
self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports']
self.populate_lag_member_down(neigh_name)

def change_dut_lag_state(self, is_up=True):
state = ['shutdown', 'startup']
for neigh_name in self.neigh_names.values():
dut_portchannel = self.vm_dut_map[neigh_name]['dut_portchannel']
if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): continue
self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up]))
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)])
if return_code != 0:
self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr))

# populate the entity that needs to be brought down (portchannel or portchannel member)
if 'member' in self.oper_type:
down_intfs = self.lag_members_down[dut_portchannel]
else:
self.log.append('%s: State change successful on DUT for %s' % (self.msg_prefix[1 - is_up], dut_portchannel))
down_intfs = [dut_portchannel]

for intf in down_intfs:
if not re.match('(PortChannel|Ethernet)\d+', intf): continue
self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up]))
stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)])
if return_code != 0:
self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf))
self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code))
self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr))
else:
self.log.append('State change successful on DUT for %s' % intf)

def verify_dut_lag_member_state(self, match, pre_check=True):
success = True
po_name = match.group(1)
lag_memb_output = match.group(2)
neigh_name = self.po_neigh_map[po_name]
for member in self.vm_dut_map[neigh_name]['dut_ports']:
if po_name in self.lag_members_down and member in self.lag_members_down[po_name]:
search_str = '%s(D)' % member
else:
search_str = '%s(S)' % member
# default state for the lag member
search_str = '%s(S)' % member

if po_name in self.lag_members_down:
if member in self.lag_members_down[po_name]:
search_str = '%s(D)' % member
# single member case. state of non down member of the down portchannel
elif self.tot_memb_cnt != self.memb_cnt:
search_str = '%s(S*)' % member

if lag_memb_output.find(search_str) != -1:
self.log.append('Lag member %s state as expected' % member)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- set_fact:
item_cnt: "{{ item.split(':')[1]|int }}"
item_cnt: "{{ item.split(':')[-1]|int }}"
host_max_len: "{{ vm_hosts|length - 1 }}"
member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"

Expand Down
13 changes: 12 additions & 1 deletion ansible/roles/test/tasks/warm-reboot-multi-sad.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,20 @@
reboot_limit: 1
when: reboot_limit is not defined

# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped
- name: Set vars
set_fact:
pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1']
lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}"

- name: Add all lag member down case
set_fact:
pre_list: "{{ pre_list + ['dut_lag_member_down:2:{{ lag_memb_cnt }}', 'neigh_lag_member_down:3:{{ lag_memb_cnt }}']}}"
when: testbed_type in ['t0-64', 't0-116', 't0-64-32']

- name: Warm-reboot test
include: advanced-reboot.yml
vars:
reboot_type: warm-reboot
preboot_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3']
preboot_list: "{{ pre_list }}"
preboot_files: "peer_dev_info,neigh_port_info"

0 comments on commit 3f8edd3

Please sign in to comment.