Skip to content

Commit

Permalink
Fix Mizar single yaml deployment SSH issue and add feature-gate for e…
Browse files Browse the repository at this point in the history
…BPF EDT QoS feature (#502)

* Fix Mizar single yaml deployment SSH issue and add feature-gate for eBPF EDT QoS feature

* Fix issue with routing traffic to EDT eBPF program when low priority pod is on node that is also a bouncer.
  • Loading branch information
vinaykul authored Jun 7, 2021
1 parent 7f57fba commit 0a5f14d
Show file tree
Hide file tree
Showing 11 changed files with 122 additions and 84 deletions.
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,6 @@ clean::
rm -rf build/tests/*
rm -f *.gcov

.PHONY: test
test:: lcov functest

.PHONY: unittest
unittest::

Expand Down
47 changes: 26 additions & 21 deletions etc/deploy/deploy.mizar.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
---
# mizar CRD bouncers.mizar.com
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
Expand Down Expand Up @@ -444,24 +443,27 @@ spec:
hostNetwork: true
hostPID: true
initContainers:
- image: mizarnet/mizar:0.8
name: node-init
command: [./node-init.sh]
securityContext:
privileged: true
volumeMounts:
- name: mizar
mountPath: /home
- name: node-init
image: mizarnet/mizar:0.8
command: [./node-init.sh]
securityContext:
privileged: true
volumeMounts:
- name: mizar
mountPath: /home
containers:
- image: mizarnet/dropletd:0.8
name: mizar-daemon
securityContext:
privileged: true
- name: mizar-daemon
image: mizarnet/dropletd:0.8
env:
- name: FEATUREGATE_BWQOS
value: 'false'
securityContext:
privileged: true
volumes:
- name: mizar
hostPath:
path: /var
type: Directory
- name: mizar
hostPath:
path: /var
type: Directory
---
# mizar deployment of operator
apiVersion: apps/v1
Expand All @@ -486,7 +488,10 @@ spec:
terminationGracePeriodSeconds: 0
hostNetwork: true
containers:
- image: mizarnet/endpointopr:0.8
name: mizar-operator
securityContext:
privileged: true
- name: mizar-operator
image: mizarnet/endpointopr:0.8
env:
- name: FEATUREGATE_BWQOS
value: 'false'
securityContext:
privileged: true
12 changes: 7 additions & 5 deletions etc/deploy/dev.daemon.deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.

---
apiVersion: apps/v1
kind: DaemonSet
metadata:
Expand All @@ -43,7 +42,10 @@ spec:
hostNetwork: true
hostPID: true
containers:
- image: localhost:5000/dropletd:latest
name: mizar-daemon
securityContext:
privileged: true
- name: mizar-daemon
image: localhost:5000/dropletd:latest
env:
- name: FEATUREGATE_BWQOS
value: 'true'
securityContext:
privileged: true
11 changes: 7 additions & 4 deletions etc/deploy/dev.operator.deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ spec:
terminationGracePeriodSeconds: 0
hostNetwork: true
containers:
- image: localhost:5000/endpointopr:latest
name: mizar-operator
securityContext:
privileged: true
- name: mizar-operator
image: localhost:5000/endpointopr:latest
env:
- name: FEATUREGATE_BWQOS
value: 'true'
securityContext:
privileged: true
1 change: 1 addition & 0 deletions etc/docker/node-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ nsenter -t 1 -m -u -n -i apt-get update -y && nsenter -t 1 -m -u -n -i apt-get i
iproute2 \
net-tools \
iputils-ping \
bridge-utils \
ethtool \
curl \
python3.7 \
Expand Down
76 changes: 47 additions & 29 deletions mizar/daemon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def init(benchmark=False):
output = r.stdout.read().decode().strip()
logging.info("Setup done")

cmd = 'nsenter -t 1 -m -u -n -i ip addr show eth0 | grep "inet\\b" | awk \'{print $2}\' | cut -d/ -f1'
cmd = 'nsenter -t 1 -m -u -n -i ip addr show eth0 | grep "inet\\b" | awk \'{print $2}\''
r = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
ip = r.stdout.read().decode().strip()
nodeipmask = r.stdout.read().decode().strip()
nodeip = nodeipmask.split("/")[0]

cmd = "nsenter -t 1 -m -u -n -i ip link set dev eth0 xdpgeneric off"

Expand All @@ -66,43 +67,60 @@ def init(benchmark=False):
}
config = json.dumps(config)
cmd = (
f'''nsenter -t 1 -m -u -n -i /trn_bin/transit -s {ip} load-transit-xdp -i eth0 -j '{config}' ''')
f'''nsenter -t 1 -m -u -n -i /trn_bin/transit -s {nodeip} load-transit-xdp -i eth0 -j '{config}' ''')

r = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
output = r.stdout.read().decode().strip()
logging.info("Running load-transit-xdp: {}".format(output))

if os.getenv('FEATUREGATE_BWQOS', 'false').lower() in ('false', '0'):
logging.info("Bandwidth QoS feature is disabled.")
return

# Setup mizar bridge, update routes, and load EDT TC eBPF program
brscript = (f''' bash -c '\
nsenter -t 1 -m -u -n -i ip link add {CONSTANTS.MIZAR_BRIDGE} type bridge && \
nsenter -t 1 -m -u -n -i sysctl -w net.bridge.bridge-nf-call-iptables=0 && \
nsenter -t 1 -m -u -n -i ip link set dev {CONSTANTS.MIZAR_BRIDGE} up && \
nsenter -t 1 -m -u -n -i ip link set eth0 master {CONSTANTS.MIZAR_BRIDGE} && \
nsenter -t 1 -m -u -n -i brctl show' ''')
logging.info("Node IP: {}".format(nodeipmask))

brcmd = f'''nsenter -t 1 -m -u -n -i sysctl -w net.bridge.bridge-nf-call-iptables=0 && \
nsenter -t 1 -m -u -n -i ip link add {CONSTANTS.MIZAR_BRIDGE} type bridge && \
nsenter -t 1 -m -u -n -i ip link set dev {CONSTANTS.MIZAR_BRIDGE} up && \
nsenter -t 1 -m -u -n -i ip link set eth0 master {CONSTANTS.MIZAR_BRIDGE} && \
nsenter -t 1 -m -u -n -i ip addr add {nodeip} dev {CONSTANTS.MIZAR_BRIDGE} && \
nsenter -t 1 -m -u -n -i brctl show'''

rtlistcmd = 'nsenter -t 1 -m -u -n -i ip route list | grep "dev eth0"'
r = subprocess.Popen(rtlistcmd, shell=True, stdout=subprocess.PIPE)
rtchanges = []
while True:
line = r.stdout.readline()
if not line:
break
rt = line.decode().strip()
rtkey = rt.partition("dev eth0")[0]
rtdesc = rt.partition("dev eth0")[2]
rnew = 'nsenter -t 1 -m -u -n -i ip route change ' + rtkey + f'''dev {CONSTANTS.MIZAR_BRIDGE}''' + rtdesc
if 'default' in rt:
rtchanges.append(rnew)
else:
rtchanges.insert(0, rnew)

rtchangecmd = ""
if len(rtchanges) > 0:
for rtc in rtchanges:
if not rtchangecmd:
rtchangecmd = rtc
else:
rtchangecmd = rtchangecmd + " && " + rtc
rtchangecmd = rtchangecmd + " || true"
rtchangecmd = rtchangecmd + " && "
rtchangecmd = rtchangecmd + f'''nsenter -t 1 -m -u -n -i ip route list'''

brscript = (f''' bash -c '{brcmd} && {rtchangecmd}' ''')
logging.info("Mizar bridge setup script:\n{}\n".format(brscript))
r = subprocess.Popen(brscript, shell=True, stdout=subprocess.PIPE)
output = r.stdout.read().decode().strip()
#TODO: Restore original network config upon error / cleanup
logging.info("Mizar bridge setup complete.\n{}\n".format(output))

logging.info("Node IP: {}".format(ip))

gwcmd = 'nsenter -t 1 -m -u -n -i ip route | grep default | awk \'{print $3}\''
r = subprocess.Popen(gwcmd, shell=True, stdout=subprocess.PIPE)
defaultgw = r.stdout.read().decode().strip()
logging.info("Default gateway: {}".format(defaultgw))

cidrcmd = 'nsenter -t 1 -m -u -n -i ip route | grep "proto kernel" | awk \'{print $1}\''
r = subprocess.Popen(cidrcmd, shell=True, stdout=subprocess.PIPE)
nodecidr = r.stdout.read().decode().strip()
logging.info("CIDR: {}".format(nodecidr))

rtscript = (f''' bash -c '\
nsenter -t 1 -m -u -n -i ip route change {nodecidr} dev {CONSTANTS.MIZAR_BRIDGE} proto kernel scope link src {ip} && \
nsenter -t 1 -m -u -n -i ip route change default via {defaultgw} dev {CONSTANTS.MIZAR_BRIDGE} && \
nsenter -t 1 -m -u -n -i ip route show' ''')
r = subprocess.Popen(rtscript, shell=True, stdout=subprocess.PIPE)
output = r.stdout.read().decode().strip()
logging.info("Route update complete.\n{}\n".format(output))

tcscript = (f''' bash -c '\
nsenter -t 1 -m -u -n -i tc qdisc add dev eth0 clsact && \
nsenter -t 1 -m -u -n -i tc filter del dev eth0 egress && \
Expand Down
7 changes: 5 additions & 2 deletions mizar/daemon/interface_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,11 @@ def _ProvisionVethInterface(self, interface, cni_params):
# Network namespace operations (Move these to the CNI)

veth_peer_index = get_iface_index(interface.veth.peer, self.iproute)
mzbr_index = get_iface_index(CONSTANTS.MIZAR_BRIDGE, self.iproute)
self.iproute.link('set', index=veth_peer_index, master=mzbr_index, state='up', mtu=9000)
if os.getenv('FEATUREGATE_BWQOS', 'false').lower() in ('false', '0'):
self.iproute.link('set', index=veth_peer_index, state='up', mtu=9000)
else:
mzbr_index = get_iface_index(CONSTANTS.MIZAR_BRIDGE, self.iproute)
self.iproute.link('set', index=veth_peer_index, master=mzbr_index, state='up', mtu=9000)

# Configure the Transit Agent
self._ConfigureTransitAgent(interface)
Expand Down
28 changes: 15 additions & 13 deletions mizar/dp/mizar/workflows/builtins/pods/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import logging
import json
import os
from mizar.common.workflow import *
from mizar.dp.mizar.operators.droplets.droplets_operator import *
from mizar.dp.mizar.operators.endpoints.endpoints_operator import *
Expand Down Expand Up @@ -97,19 +98,20 @@ def run(self):

# Get 'mizar.com/egress-bandwidth' from pod annotations
egress_bw = int(0)
annotations = self.param.body['metadata'].get('annotations', {})
if len(annotations) > 0:
k8s_egress_bw = annotations.get(CONSTANTS.MIZAR_EGRESS_BW_TAG)
# Convert [KB|MB|GB]/s to bytes per second.
if k8s_egress_bw is not None:
if k8s_egress_bw.endswith('K'):
egress_bw = int(float(k8s_egress_bw.replace('K', '')) * 1e3)
elif k8s_egress_bw.endswith('M'):
egress_bw = int(float(k8s_egress_bw.replace('M', '')) * 1e6)
elif k8s_egress_bw.endswith('G'):
egress_bw = int(float(k8s_egress_bw.replace('G', '')) * 1e9)
else:
egress_bw = int(k8s_egress_bw)
if os.getenv('FEATUREGATE_BWQOS', 'false').lower() in ('true', '1'):
annotations = self.param.body['metadata'].get('annotations', {})
if len(annotations) > 0:
k8s_egress_bw = annotations.get(CONSTANTS.MIZAR_EGRESS_BW_TAG)
# Convert [KB|MB|GB]/s to bytes per second.
if k8s_egress_bw is not None:
if k8s_egress_bw.endswith('K'):
egress_bw = int(float(k8s_egress_bw.replace('K', '')) * 1e3)
elif k8s_egress_bw.endswith('M'):
egress_bw = int(float(k8s_egress_bw.replace('M', '')) * 1e6)
elif k8s_egress_bw.endswith('G'):
egress_bw = int(float(k8s_egress_bw.replace('G', '')) * 1e9)
else:
egress_bw = int(k8s_egress_bw)
spec['egress_bandwidth_bytes_per_sec'] = egress_bw

logger.info("Pod spec {}".format(spec))
Expand Down
12 changes: 6 additions & 6 deletions src/xdp/trn_agent_xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,6 @@ static __inline int trn_encapsulate(struct transit_packet *pkt,
pkt->namespace_label_value_opt->length = sizeof(pkt->namespace_label_value_opt->label_value_data) / 4;;
pkt->namespace_label_value_opt->label_value_data.value = namespace_label_value;

if (pkt->ip->tos & IPTOS_MINCOST) {
bpf_debug("[Agent:%ld.0x%x] IP ToS: %u (low priority) -> XDP_PASS!!!!!\n",
pkt->agent_ep_tunid, bpf_ntohl(pkt->agent_ep_ipv4), pkt->ip->tos);
return XDP_PASS;
}

/* If the source and dest address of the tunneled packet is the
* same, then this host is also a transit switch. Just invoke the
* transit XDP program by a tail call;
Expand All @@ -306,6 +300,12 @@ static __inline int trn_encapsulate(struct transit_packet *pkt,
bpf_tail_call(pkt->xdp, &jmp_table, key);
}

if (pkt->ip->tos & IPTOS_MINCOST) {
bpf_debug("[Agent:%ld.0x%x] Low priority pkt to daddr=%x - XDP_PASS\n",
pkt->agent_ep_tunid, bpf_ntohl(pkt->agent_ep_ipv4), pkt->ip->daddr);
return XDP_PASS;
}

/* Send the packet on the egress of the tunneling interface */
bpf_debug("[Agent:%ld.0x%x] REDIRECT: Tunnel to dst=[x%0x].\n",
pkt->agent_ep_tunid, bpf_ntohl(pkt->agent_ep_ipv4),
Expand Down
2 changes: 1 addition & 1 deletion src/xdp/trn_edt_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
#endif


static inline int edt_schedule_departure(struct __sk_buff *skb, __u32 saddr)
static __ALWAYS_INLINE__ int edt_schedule_departure(struct __sk_buff *skb, __u32 saddr)
{
unsigned int key = saddr;
struct edt_config_t *ec;
Expand Down
7 changes: 7 additions & 0 deletions src/xdp/trn_transit_xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ static __inline int trn_rewrite_remote_mac(struct transit_packet *pkt)

trn_set_src_mac(pkt->data, pkt->eth->h_dest);
trn_set_dst_mac(pkt->data, remote_ep->mac);

if (pkt->ip->tos & IPTOS_MINCOST) {
bpf_debug("[Transit:0x%x] Low priority pkt saddr:%x -> daddr:%x - XDP_PASS\n",
(pkt->itf_ipv4), pkt->ip->saddr, pkt->ip->daddr);
return XDP_PASS;
}

return XDP_TX;
}

Expand Down

0 comments on commit 0a5f14d

Please sign in to comment.