diff --git a/ansible/roles/test/files/ptftests/advanced-reboot.py b/ansible/roles/test/files/ptftests/advanced-reboot.py index 7a94bc4d4b6..9a015aa09ce 100644 --- a/ansible/roles/test/files/ptftests/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/advanced-reboot.py @@ -319,7 +319,7 @@ def get_portchannel_info(self): for member in content[key]['members']: for vm_key in self.vm_dut_map.keys(): if member in self.vm_dut_map[vm_key]['dut_ports']: - self.vm_dut_map[vm_key]['dut_portchannel'] = key + self.vm_dut_map[vm_key]['dut_portchannel'] = str(key) self.vm_dut_map[vm_key]['neigh_portchannel'] = 'Port-Channel1' break @@ -327,8 +327,8 @@ def get_neigh_port_info(self): content = self.read_json('neigh_port_info') for key in content.keys(): if content[key]['name'] in self.vm_dut_map.keys(): - self.vm_dut_map[content[key]['name']]['dut_ports'].append(key) - self.vm_dut_map[content[key]['name']]['neigh_ports'].append(content[key]['port']) + self.vm_dut_map[content[key]['name']]['dut_ports'].append(str(key)) + self.vm_dut_map[content[key]['name']]['neigh_ports'].append(str(content[key]['port'])) self.vm_dut_map[content[key]['name']]['ptf_ports'].append(self.port_indices[key]) def build_peer_mapping(self): @@ -355,6 +355,30 @@ def populate_fail_info(self, fails): self.fails[key] = set() self.fails[key] |= fails[key] + def get_preboot_info(self): + ''' + Prepares the msg string to log when a preboot_oper is defined. + preboot_oper can be represented in the following ways + eg. 'preboot_oper' - a single VM will be selected and preboot_oper will be applied to it + 'neigh_bgp_down:2' - 2 VMs will be selected and preboot_oper will be applied to the selected 2 VMs + 'neigh_lag_member_down:3:1' - this case is used for lag member down operation only. This indicates that + 3 VMs will be selected and 1 of the lag members in the porchannel will be brought down + ''' + msg = '' + if self.preboot_oper: + msg = 'Preboot oper: %s ' % self.preboot_oper + if ':' in self.preboot_oper: + oper_list = self.preboot_oper.split(':') + msg = 'Preboot oper: %s ' % oper_list[0] # extract the preboot oper_type + if len(oper_list) > 2: + # extract the number of VMs and the number of LAG members. preboot_oper will be of the form oper:no of VMS:no of lag members + msg += 'Number of sad path VMs: %s Lag member down in a portchannel: %s' % (oper_list[-2], oper_list[-1]) + else: + # extract the number of VMs. preboot_oper will be of the form oper:no of VMS + msg += 'Number of sad path VMs: %s' % oper_list[-1] + + return msg + def setUp(self): self.fails['dut'] = set() self.port_indices = self.read_port_indices() @@ -427,13 +451,7 @@ def setUp(self): self.generate_arp_ping_packet() if self.reboot_type == 'warm-reboot': - # get the number of members down for sad path - if self.preboot_oper: - if ':' in self.preboot_oper: - oper_type, cnt = self.preboot_oper.split(':') - else: - oper_type, cnt = self.preboot_oper, 1 - self.log("Preboot Oper: %s Number down: %s" % (oper_type, cnt)) + self.log(self.get_preboot_info()) # Pre-generate list of packets to be sent in send_in_background method. generate_start = datetime.datetime.now() diff --git a/ansible/roles/test/files/ptftests/arista.py b/ansible/roles/test/files/ptftests/arista.py index 04459417849..db967eb0bda 100644 --- a/ansible/roles/test/files/ptftests/arista.py +++ b/ansible/roles/test/files/ptftests/arista.py @@ -396,18 +396,23 @@ def verify_bgp_neigh_state(self, dut=None, state="Active"): self.fails.add('Verify BGP %s neighbor: Object missing in output' % ver) return self.fails, bgp_state - def change_neigh_lag_state(self, lag, is_up=True): + def change_neigh_lag_state(self, intf, is_up=True): state = ['shut', 'no shut'] self.do_cmd('configure') - is_match = re.match('(Port-Channel|Ethernet)\d+', lag) + is_match = re.match('(Port-Channel|Ethernet)\d+', intf) if is_match: - output = self.do_cmd('interface %s' % lag) + output = self.do_cmd('interface %s' % intf) if 'Invalid' not in output: self.do_cmd(state[is_up]) self.do_cmd('exit') - self.do_cmd('exit') + self.do_cmd('exit') + + def change_neigh_intfs_state(self, intfs, is_up=True): + for intf in intfs: + self.change_neigh_lag_state(intf, is_up=is_up) def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): + states = state.split(',') lag_state = False msg_prefix = ['Postboot', 'Preboot'] is_match = re.match('(Port-Channel|Ethernet)\d+', lag) @@ -418,7 +423,7 @@ def verify_neigh_lag_state(self, lag, state="connected", pre_check=True): obj = json.loads(data) if 'interfaces' in obj and lag in obj['interfaces']: - lag_state = (obj['interfaces'][lag]['interfaceStatus'] == state) + lag_state = (obj['interfaces'][lag]['interfaceStatus'] in states) else: self.fails.add('%s: Verify LAG %s: Object missing in output' % (msg_prefix[pre_check], lag)) return self.fails, lag_state diff --git a/ansible/roles/test/files/ptftests/sad_path.py b/ansible/roles/test/files/ptftests/sad_path.py index 958e4be2e58..bf722d917f5 100644 --- a/ansible/roles/test/files/ptftests/sad_path.py +++ b/ansible/roles/test/files/ptftests/sad_path.py @@ -36,8 +36,9 @@ def revert(self): class SadPath(object): def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args): - (self.oper_type, self.cnt) = oper_type.split(':') if ':' in oper_type else (oper_type, 1) - self.cnt = int(self.cnt) + self.oper_type = '' + self.cnt = 1 + self.memb_cnt = 0 self.vm_list = vm_list self.portchannel_ports = portchannel_ports self.vm_dut_map = vm_dut_map @@ -50,6 +51,21 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args) self.log = [] self.fails = dict() self.fails['dut'] = set() + self.tot_memb_cnt = 0 + self.memb_index = 0 + self.extract_oper_info(oper_type) + + def extract_oper_info(self, oper_type): + if oper_type and ':' in oper_type: + temp = oper_type.split(':') + self.oper_type = temp[0] + # get number of VMs where the sad pass oper needs to be done + self.cnt = int(temp[1]) + if len(temp) > 2: + # get the number of lag members in a portchannel that should be brought down + self.memb_cnt = int(temp[-1]) + else: + self.oper_type = oper_type def cmd(self, cmds): process = subprocess.Popen(cmds, @@ -74,7 +90,7 @@ def select_vm(self): else: self.neigh_vms.extend(self.vm_list[vm_index:]) self.neigh_vms.extend(self.vm_list[0:exceed_len]) - self.vm_list = self.vm_list[exceed_len:vm_len - self.cnt] + self.vm_list = self.vm_list[exceed_len:exceed_len + vm_len - self.cnt] def get_neigh_name(self): for key in self.vm_dut_map: @@ -101,11 +117,25 @@ def vm_disconnect(self): for vm in self.vm_handles: self.vm_handles[vm].disconnect() + def select_member(self): + # select index of lag member to put down + if self.tot_memb_cnt != 0: + self.memb_index = datetime.datetime.now().day % self.tot_memb_cnt + def setup(self): self.select_vm() self.get_neigh_name() self.down_neigh_port() self.vm_connect() + + # decide if its all member down or few members down for lag member oper type + if 'member' in self.oper_type: + self.tot_memb_cnt = len(self.vm_dut_map[self.neigh_names.values()[0]]['dut_ports']) + if self.memb_cnt == 0: + self.memb_cnt = self.tot_memb_cnt + if self.tot_memb_cnt != self.memb_cnt: + self.select_member() + for vm in self.vm_handles: self.neigh_bgps[vm], self.dut_bgps[vm] = self.vm_handles[vm].get_bgp_info() self.fails[vm] = set() @@ -128,9 +158,11 @@ def __init__(self, oper_type, vm_list, portchannel_ports, vm_dut_map, test_args, self.dut_ssh = dut_ssh self.dut_needed = dict() self.lag_members_down = dict() + self.neigh_lag_members_down = dict() self.neigh_lag_state = None self.po_neigh_map = dict() self.msg_prefix = ['Postboot', 'Preboot'] + self.memb_str = 'member' if 'member' in self.oper_type else '' def populate_bgp_state(self): [self.dut_needed.setdefault(vm, self.dut_bgps[vm]) for vm in self.neigh_vms] @@ -141,11 +173,11 @@ def populate_bgp_state(self): elif self.oper_type == 'dut_bgp_down': self.neigh_bgps['changed_state'] = 'Active' self.dut_bgps['changed_state'] = 'Idle' - elif self.oper_type == 'neigh_lag_down': + elif 'neigh_lag' in self.oper_type: # on the DUT side, bgp states are different pre and post boot. hence passing multiple values self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Connect,Active,Idle' - elif self.oper_type == 'dut_lag_down': + elif 'dut_lag' in self.oper_type: self.neigh_bgps['changed_state'] = 'Idle' self.dut_bgps['changed_state'] = 'Active,Connect,Idle' @@ -169,13 +201,22 @@ def sad_setup(self, is_up=True): time.sleep(30) elif 'lag' in self.oper_type: - self.log.append('LAG state change will be for %s' % ", ".join(self.neigh_vms)) - if self.oper_type == 'neigh_lag_down': + self.log.append('LAG %s state change will be for %s' % (self.memb_str, ", ".join(self.neigh_vms))) + if 'neigh_lag' in self.oper_type: for vm in self.neigh_vms: - self.log.append('Changing state of LAG %s to shut' % self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']) - self.vm_handles[vm].change_neigh_lag_state(self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel'], is_up=is_up) - elif self.oper_type == 'dut_lag_down': + + # populate entity to be brought down on neigh end (portchannel/portchannel members) + if 'member' in self.oper_type: + down_intfs = self.neigh_lag_members_down[self.neigh_names[vm]] + else: + down_intfs = [self.vm_dut_map[self.neigh_names[vm]]['neigh_portchannel']] + + self.log.append('Changing state of LAG %s %s to shut' % (self.memb_str, ", ".join(down_intfs))) + self.vm_handles[vm].change_neigh_intfs_state(down_intfs, is_up=is_up) + + elif 'dut_lag' in self.oper_type: self.change_dut_lag_state(is_up=is_up) + # wait for sometime for lag members state to sync time.sleep(120) @@ -234,30 +275,47 @@ def sad_bgp_verify(self): else: self.fails['dut'].add('BGP state not down on DUT') + def populate_lag_member_down(self, neigh_name): + po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] + # build DUT portchannel to down members mapping and neigh name to down members mapping + # if only single member is down, extract the member and convert it into list otherwise assign the list directly + if self.tot_memb_cnt != self.memb_cnt: + self.lag_members_down[po_name] = [self.vm_dut_map[neigh_name]['dut_ports'][self.memb_index]] + self.neigh_lag_members_down[neigh_name] = [self.vm_dut_map[neigh_name]['neigh_ports'][self.memb_index]] + else: + self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.neigh_lag_members_down[neigh_name] = self.vm_dut_map[neigh_name]['neigh_ports'] + def populate_lag_state(self): - if self.oper_type == 'neigh_lag_down': - self.neigh_lag_state = 'disabled' - elif self.oper_type == 'dut_lag_down': + if 'neigh_lag' in self.oper_type: + self.neigh_lag_state = 'disabled,notconnect' + elif 'dut_lag' in self.oper_type: self.neigh_lag_state = 'notconnect' for neigh_name in self.neigh_names.values(): - # build portchannel to down members mapping - po_name = self.vm_dut_map[neigh_name]['dut_portchannel'] - self.lag_members_down[po_name] = self.vm_dut_map[neigh_name]['dut_ports'] + self.populate_lag_member_down(neigh_name) def change_dut_lag_state(self, is_up=True): state = ['shutdown', 'startup'] for neigh_name in self.neigh_names.values(): dut_portchannel = self.vm_dut_map[neigh_name]['dut_portchannel'] - if not re.match('(PortChannel|Ethernet)\d+', dut_portchannel): continue - self.log.append('Changing state of %s from DUT side to %s' % (dut_portchannel, state[is_up])) - stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], dut_portchannel)]) - if return_code != 0: - self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) - self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) - self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + + # populate the entity that needs to be brought down (portchannel or portchannel member) + if 'member' in self.oper_type: + down_intfs = self.lag_members_down[dut_portchannel] else: - self.log.append('%s: State change successful on DUT for %s' % (self.msg_prefix[1 - is_up], dut_portchannel)) + down_intfs = [dut_portchannel] + + for intf in down_intfs: + if not re.match('(PortChannel|Ethernet)\d+', intf): continue + self.log.append('Changing state of %s from DUT side to %s' % (intf, state[is_up])) + stdout, stderr, return_code = self.cmd(['ssh', '-oStrictHostKeyChecking=no', self.dut_ssh, 'sudo config interface %s %s' % (state[is_up], intf)]) + if return_code != 0: + self.fails['dut'].add('%s: State change not successful from DUT side for %s' % (self.msg_prefix[1 - is_up], intf)) + self.fails['dut'].add('%s: Return code: %d' % (self.msg_prefix[1 - is_up], return_code)) + self.fails['dut'].add('%s: Stderr: %s' % (self.msg_prefix[1 - is_up], stderr)) + else: + self.log.append('State change successful on DUT for %s' % intf) def verify_dut_lag_member_state(self, match, pre_check=True): success = True @@ -265,10 +323,15 @@ def verify_dut_lag_member_state(self, match, pre_check=True): lag_memb_output = match.group(2) neigh_name = self.po_neigh_map[po_name] for member in self.vm_dut_map[neigh_name]['dut_ports']: - if po_name in self.lag_members_down and member in self.lag_members_down[po_name]: - search_str = '%s(D)' % member - else: - search_str = '%s(S)' % member + # default state for the lag member + search_str = '%s(S)' % member + + if po_name in self.lag_members_down: + if member in self.lag_members_down[po_name]: + search_str = '%s(D)' % member + # single member case. state of non down member of the down portchannel + elif self.tot_memb_cnt != self.memb_cnt: + search_str = '%s(S*)' % member if lag_memb_output.find(search_str) != -1: self.log.append('Lag member %s state as expected' % member) diff --git a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml index bf6f88f113d..5262b0b3172 100644 --- a/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml +++ b/ansible/roles/test/tasks/advanced_reboot/validate_preboot_list.yml @@ -1,5 +1,5 @@ - set_fact: - item_cnt: "{{ item.split(':')[1]|int }}" + item_cnt: "{{ item.split(':')[-1]|int }}" host_max_len: "{{ vm_hosts|length - 1 }}" member_max_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" diff --git a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml index 9555da8ca35..292a5684a94 100644 --- a/ansible/roles/test/tasks/warm-reboot-multi-sad.yml +++ b/ansible/roles/test/tasks/warm-reboot-multi-sad.yml @@ -3,9 +3,20 @@ reboot_limit: 1 when: reboot_limit is not defined +# preboot_list format is 'preboot oper type:number of VMS down:number of lag members down'. for non lag member cases, this parameter will be skipped +- name: Set vars + set_fact: + pre_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3', 'dut_lag_member_down:3:1', 'neigh_lag_member_down:2:1'] + lag_memb_cnt: "{{ minigraph_portchannels.values()[0]['members']|length }}" + +- name: Add all lag member down case + set_fact: + pre_list: "{{ pre_list + ['dut_lag_member_down:2:{{ lag_memb_cnt }}', 'neigh_lag_member_down:3:{{ lag_memb_cnt }}']}}" + when: testbed_type in ['t0-64', 't0-116', 't0-64-32'] + - name: Warm-reboot test include: advanced-reboot.yml vars: reboot_type: warm-reboot - preboot_list: ['neigh_bgp_down:2', 'dut_bgp_down:3', 'dut_lag_down:2', 'neigh_lag_down:3'] + preboot_list: "{{ pre_list }}" preboot_files: "peer_dev_info,neigh_port_info"