Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create custom systemd-networkd-wait-online.service override to wait on individual interfaces. (LP: #2060311) #456

Merged
merged 11 commits into from
Apr 24, 2024
Merged
9 changes: 8 additions & 1 deletion netplan_cli/cli/commands/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,12 @@ def command_generate(self):
if self.mapping:
argv += ['--mapping', self.mapping]
logging.debug('command generate: running %s', argv)
res = subprocess.call(argv)
# reload systemd, as we might have changed service units, such as
# /run/systemd/system/systemd-networkd-wait-online.service.d/10-netplan.conf
try:
utils.systemctl_daemon_reload()
except subprocess.CalledProcessError as e:
logging.warning(e)
# FIXME: os.execv(argv[0], argv) would be better but fails coverage
sys.exit(subprocess.call(argv))
sys.exit(res)
2 changes: 1 addition & 1 deletion netplan_cli/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def systemctl_is_installed(unit_pattern):

def systemctl_daemon_reload():
'''Reload systemd unit files from disk and re-calculate its dependencies'''
subprocess.check_call(['systemctl', 'daemon-reload'])
subprocess.check_call(['systemctl', 'daemon-reload', '--no-ask-password'])


def ip_addr_flush(iface):
Expand Down
27 changes: 17 additions & 10 deletions src/generate.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ reload_udevd(void)
* Create enablement symlink for systemd-networkd.service.
*/
static void
enable_networkd(const char* generator_dir)
enable_networkd(const char* generator_dir, gboolean enable_wait_online)
{
g_autofree char* link = g_build_path(G_DIR_SEPARATOR_S, generator_dir, "multi-user.target.wants", "systemd-networkd.service", NULL);
g_debug("We created networkd configuration, adding %s enablement symlink", link);
Expand All @@ -75,13 +75,15 @@ enable_networkd(const char* generator_dir)
// LCOV_EXCL_STOP
}

g_autofree char* link2 = g_build_path(G_DIR_SEPARATOR_S, generator_dir, "network-online.target.wants", "systemd-networkd-wait-online.service", NULL);
_netplan_safe_mkdir_p_dir(link2);
if (symlink("/lib/systemd/system/systemd-networkd-wait-online.service", link2) < 0 && errno != EEXIST) {
// LCOV_EXCL_START
g_fprintf(stderr, "failed to create enablement symlink: %m\n");
exit(1);
// LCOV_EXCL_STOP
if (enable_wait_online) {
g_autofree char* link2 = g_build_path(G_DIR_SEPARATOR_S, generator_dir, "network-online.target.wants", "systemd-networkd-wait-online.service", NULL);
_netplan_safe_mkdir_p_dir(link2);
if (symlink("/lib/systemd/system/systemd-networkd-wait-online.service", link2) < 0 && errno != EEXIST) {
// LCOV_EXCL_START
g_fprintf(stderr, "failed to create enablement symlink: %m\n");
exit(1);
// LCOV_EXCL_STOP
}
}
}

Expand Down Expand Up @@ -300,10 +302,14 @@ int main(int argc, char** argv)
if (netplan_state_get_backend(np_state) == NETPLAN_BACKEND_NM || any_nm)
_netplan_g_string_free_to_file(g_string_new(NULL), rootdir, "/run/NetworkManager/conf.d/10-globally-managed-devices.conf", NULL);

gboolean enable_wait_online = FALSE;
if (any_networkd)
enable_wait_online = _netplan_networkd_write_wait_online(np_state, rootdir);

if (called_as_generator) {
/* Ensure networkd starts if we have any configuration for it */
if (any_networkd)
enable_networkd(files[0]);
enable_networkd(files[0], enable_wait_online);

/* Leave a stamp file so that we don't regenerate the configuration
* multiple times and userspace can wait for it to finish */
Expand All @@ -324,7 +330,8 @@ int main(int argc, char** argv)
/* covered via 'cloud-init' integration test */
if (any_networkd) {
start_unit_jit("systemd-networkd.socket");
start_unit_jit("systemd-networkd-wait-online.service");
if (enable_wait_online)
start_unit_jit("systemd-networkd-wait-online.service");
start_unit_jit("systemd-networkd.service");
}
g_autofree char* glob_run = g_build_path(G_DIR_SEPARATOR_S,
Expand Down
178 changes: 168 additions & 10 deletions src/networkd.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <net/if.h>
#include <sys/stat.h>

#include <glib.h>
Expand All @@ -32,6 +33,84 @@
#include "util-internal.h"
#include "validation.h"

/**
* Query sysfs for the MAC address (up to 20 bytes for infiniband) of @ifname
* The caller owns the returned string and needs to free it.
*/
STATIC char*
_netplan_sysfs_get_mac_by_ifname(const char* ifname, const char* rootdir)
{
g_autofree gchar* content = NULL;
g_autofree gchar* sysfs_path = NULL;
sysfs_path = g_build_path(G_DIR_SEPARATOR_S, rootdir ?: G_DIR_SEPARATOR_S,
"sys", "class", "net", ifname, "address", NULL);

if (!g_file_get_contents (sysfs_path, &content, NULL, NULL)) {
g_debug("%s: Cannot read file contents.", __FUNCTION__);
return NULL;
}

// Trim whitespace & clone value
return g_strdup(g_strstrip(content));
}

/**
* Query sysfs for the driver used by @ifname
* The caller owns the returned string and needs to free it.
*/
STATIC char*
_netplan_sysfs_get_driver_by_ifname(const char* ifname, const char* rootdir)
{
g_autofree gchar* link = NULL;
g_autofree gchar* sysfs_path = NULL;
sysfs_path = g_build_path(G_DIR_SEPARATOR_S, rootdir ?: G_DIR_SEPARATOR_S,
"sys", "class", "net", ifname, "device", "driver", NULL);

link = g_file_read_link(sysfs_path, NULL);
if (!link) {
g_debug("%s: Cannot read symlink of %s.", __FUNCTION__, sysfs_path);
return NULL;
}

return g_path_get_basename(link);
}

STATIC void
_netplan_query_system_interfaces(GHashTable* tbl)
{
g_assert(tbl);
struct if_nameindex *if_nidxs, *intf;
if_nidxs = if_nameindex();
if (if_nidxs != NULL) {
for (intf = if_nidxs; intf->if_index != 0 || intf->if_name != NULL; intf++)
g_hash_table_add(tbl, g_strdup(intf->if_name));
if_freenameindex(if_nidxs);
}
}

/**
* Enumerate all network interfaces (/sys/clas/net/...) and check
* netplan_netdef_match_interface() to see if they match the current NetDef
*/
STATIC void
_netplan_enumerate_interfaces(const NetplanNetDefinition* def, GHashTable* ifaces, GHashTable* tbl, const char* carrier, const char* set_name, const char* rootdir)
{
g_assert(ifaces);
g_assert(tbl);

GHashTableIter iter;
gpointer key;
g_hash_table_iter_init (&iter, ifaces);
while (g_hash_table_iter_next (&iter, &key, NULL)) {
const char* ifname = key;
if (g_hash_table_contains(tbl, ifname)|| (set_name && g_hash_table_contains(tbl, set_name))) continue;
g_autofree gchar* mac = _netplan_sysfs_get_mac_by_ifname(ifname, rootdir);
g_autofree gchar* driver = _netplan_sysfs_get_driver_by_ifname(ifname, rootdir);
if (netplan_netdef_match_interface(def, ifname, mac, driver))
g_hash_table_replace(tbl, set_name ? g_strdup(set_name) : g_strdup(ifname), g_strdup(carrier));
}
}

/**
* Append WiFi frequencies to wpa_supplicant's freq_list=
*/
Expand Down Expand Up @@ -738,7 +817,6 @@ _netplan_netdef_write_network_file(
g_autoptr(GString) link = NULL;
GString* s = NULL;
mode_t orig_umask;
gboolean is_optional = def->optional;

SET_OPT_OUT_PTR(has_been_written, FALSE);

Expand All @@ -761,17 +839,9 @@ _netplan_netdef_write_network_file(
else /* "off" */
mode = "always-down";
g_string_append_printf(link, "ActivationPolicy=%s\n", mode);
/* When activation-mode is used we default to being optional.
* Otherwise systemd might wait indefinitely for the interface to
* become online.
*/
is_optional = TRUE;
}

if (is_optional || def->optional_addresses) {
if (is_optional) {
g_string_append(link, "RequiredForOnline=no\n");
}
if (def->optional_addresses) {
for (unsigned i = 0; NETPLAN_OPTIONAL_ADDRESS_TYPES[i].name != NULL; ++i) {
if (def->optional_addresses & NETPLAN_OPTIONAL_ADDRESS_TYPES[i].flag) {
g_string_append_printf(link, "OptionalAddresses=%s\n", NETPLAN_OPTIONAL_ADDRESS_TYPES[i].name);
Expand Down Expand Up @@ -1401,6 +1471,93 @@ _netplan_netdef_write_networkd(
return TRUE;
}

gboolean
_netplan_networkd_write_wait_online(const NetplanState* np_state, const char* rootdir)
{
// Set of all current network interfaces, potentially non yet renamed
GHashTable* system_interfaces = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
_netplan_query_system_interfaces(system_interfaces);
Copy link

@blackboxsw blackboxsw Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not certain here about the impact of ignoring devices not currently present in /sys/class/net during systemd generator timeframe. By only considering interfaces already present in /sys/class/net, are we exposed to scenarios where the device drivers for the specified device haven't loaded yet (in initramfs-less environments without the proper drivers compiled into the kernel)? This reminds me of this cloud-init issue canonical/cloud-init#4451 for minimal images. In the event that we don't have device drivers loaded (from initramfs or kernel static compiled), is it possible netplan's generatornetplan generate may not see the device in /sys/class/net via if_nameindex and systemd-networkd-wait-online will be disabled right? Maybe in this case, we prescribe that initramfs/kernel must have the desired device drivers in order to expect systemd-networkd-wait-online to block properly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A similar (the same?) issue will happen with virtual devices. By the time the generator runs they will not be created yet so they will not be in the list.

Copy link

@blackboxsw blackboxsw Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

James corrected me, this isn't systemd generator timeframe, but the point in time when netplan generate is called. For cloud-init init-local boot stage, I think we may still be exposed to this race where cloud-init local gets in and invokes netplan generate possibly before supplemental device drivers have loaded. So this may still be a concern.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! That's a very interesting case.. I just pushed more commits to not require virtual devices (bridges, bonds, dummys, ...) to be available in /sys/class/net just yet. Those will be created later.

For physical devices that are not yet loaded, I don't know. I don't think there's anything that can be done as part of this PR and such interfaces might be ignored by systemd-networkd-wait-online.

A longer-term solution might be splitting up Netplan into a sytemd-generator and a systemd-service (running Before=network-pre.target, similar to systemd-network-generator.service), which we are tracking as spec "FO165 - Netplan generator architecture".

Do you think this should be a blocker?

Copy link

@blackboxsw blackboxsw Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this retains the same gap/issue cloud-init and systemd-networkd-wait-online are already exposed to, so I don't think this needs to be fixed in this PR. The stance we've taken already Ubuntu minimal case was to recompile the kernel to include the module built-ins that are required to support virtio-net to ensure they are loaded in time for network generation and network-online for "initrdless" boots. It feels "ok" to state that if you have a critical device driver needed in early boot for your cloud image and you are booting without initrd, you would either need to statically compile the necessary device drivers into that kernel, or use initrd and provide any supplemental drivers needed earlier boot. This may just be something we take into consideration for future behavior though, and thinking of how to approach this type of concern later in F0165 probably makes the most sense at the moment.


// Hash set of non-optional interfaces to wait for
GHashTable* non_optional_interfaces = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
NetplanStateIterator iter;
netplan_state_iterator_init(np_state, &iter);
while (netplan_state_iterator_has_next(&iter)) {
NetplanNetDefinition* def = netplan_state_iterator_next(&iter);
if (def->backend != NETPLAN_BACKEND_NETWORKD)
continue;

/* When activation-mode is used we default to being optional.
* Otherwise, systemd might wait indefinitely for the interface to
* come online.
*/
if (!(def->optional || def->activation_mode)) {
// Check if we have any IP configuration
// bond and bridge members will never ask for link-local addresses (see above)
struct address_iter* addr_iter = _netplan_netdef_new_address_iter(def);
gboolean routable = _netplan_address_iter_next(addr_iter) != NULL
|| netplan_netdef_get_dhcp4(def)
|| netplan_netdef_get_dhcp6(def);
gboolean degraded = ( netplan_netdef_get_link_local_ipv4(def)
&& !(netplan_netdef_get_bond_link(def) || netplan_netdef_get_bridge_link(def)))
|| ( netplan_netdef_get_link_local_ipv6(def)
&& !(netplan_netdef_get_bond_link(def) || netplan_netdef_get_bridge_link(def)));
gboolean any_ips = routable || degraded;
_netplan_address_iter_free(addr_iter);

// no matching => single physical interface, ignoring non-existing interfaces
// OR: virtual interfaces, those will be created later on and cannot have a matching condition
gboolean physical_no_match_or_virtual = FALSE
|| (!netplan_netdef_has_match(def) && g_hash_table_contains(system_interfaces, def->id))
|| (netplan_netdef_get_type(def) >= NETPLAN_DEF_TYPE_VIRTUAL);
if (physical_no_match_or_virtual) {
g_hash_table_replace(non_optional_interfaces, g_strdup(def->id), any_ips ? g_strdup("degraded") : g_strdup("carrier"));
} else if (def->set_name) { // matching on a single interface, to be renamed
_netplan_enumerate_interfaces(def, system_interfaces, non_optional_interfaces, any_ips ? "degraded" : "carrier", def->set_name, rootdir);
} else { // matching on potentially multiple interfaces
// XXX: we shouldn't run this enumeration for every NetDef...
_netplan_enumerate_interfaces(def, system_interfaces, non_optional_interfaces, any_ips ? "degraded" : "carrier", NULL, rootdir);
}
}
}
g_hash_table_destroy(system_interfaces);

// create run/systemd/system/systemd-networkd-wait-online.service.d/
const char* override = "/run/systemd/system/systemd-networkd-wait-online.service.d/10-netplan.conf";
// The "ConditionPathIsSymbolicLink" is Netplan's s-n-wait-online enablement symlink,
// as we want to run -wait-online only if enabled by Netplan.
GString* content = g_string_new("[Unit]\n"
"ConditionPathIsSymbolicLink=/run/systemd/generator/network-online.target.wants/systemd-networkd-wait-online.service\n");
if (g_hash_table_size(non_optional_interfaces) == 0) {
_netplan_g_string_free_to_file(content, rootdir, override, NULL);
g_hash_table_destroy(non_optional_interfaces);
return FALSE;
}

// We have non-optional interface, so let's wait for those explicitly
GHashTableIter idx;
gpointer key, value;
g_string_append(content, "\n[Service]\nExecStart=\n"
"ExecStart=/lib/systemd/systemd-networkd-wait-online");
g_hash_table_iter_init (&idx, non_optional_interfaces);
while (g_hash_table_iter_next (&idx, &key, &value)) {
const char* ifname = key;
const char* min_oper_state = value;
g_string_append_printf(content, " -i %s", ifname);
// XXX: We should be checking IFF_LOOPBACK instead of interface name.
// But don't have access to the flags here.
if (!g_strcmp0(ifname, "lo"))
g_string_append(content, ":carrier"); // "carrier" as min-oper state for loopback
else if (min_oper_state)
g_string_append_printf(content, ":%s", min_oper_state);
}
g_string_append(content, "\n");

_netplan_g_string_free_to_file(content, rootdir, override, NULL);
g_hash_table_destroy(non_optional_interfaces);
return TRUE;
}

/**
* Clean up all generated configurations in @rootdir from previous runs.
*/
Expand All @@ -1414,6 +1571,7 @@ _netplan_networkd_cleanup(const char* rootdir)
_netplan_unlink_glob(rootdir, "/run/udev/rules.d/99-netplan-*");
_netplan_unlink_glob(rootdir, "/run/systemd/system/network.target.wants/netplan-regdom.service");
_netplan_unlink_glob(rootdir, "/run/systemd/system/netplan-regdom.service");
_netplan_unlink_glob(rootdir, "/run/systemd/system/systemd-networkd-wait-online.service.d/10-netplan*.conf");
/* Historically (up to v0.98) we had netplan-wpa@*.service files, in case of an
* upgraded system, we need to make sure to clean those up. */
_netplan_unlink_glob(rootdir, "/run/systemd/system/systemd-networkd.service.wants/netplan-wpa@*.service");
Expand Down
3 changes: 3 additions & 0 deletions src/networkd.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,8 @@ _netplan_netdef_write_network_file(
gboolean* has_been_written,
GError** error);

NETPLAN_INTERNAL gboolean
_netplan_networkd_write_wait_online(const NetplanState* np_state, const char* rootdir);

NETPLAN_INTERNAL void
_netplan_networkd_cleanup(const char* rootdir);
2 changes: 1 addition & 1 deletion src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ netplan_netdef_match_interface(const NetplanNetDefinition* netdef, const char* n
return !g_strcmp0(name, netdef->id);

if (netdef->match.mac) {
if (g_ascii_strcasecmp(netdef->match.mac, mac))
if (g_ascii_strcasecmp(netdef->match.mac ?: "", mac ?: ""))
return FALSE;
}

Expand Down
3 changes: 1 addition & 2 deletions tests/cli_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,7 @@ def test_with_empty_config(self):
enlol: {dhcp4: yes}''')
os.chmod(path_a, mode=0o600)
os.chmod(path_b, mode=0o600)
out = subprocess.check_output(exe_cli + ['generate', '--root-dir', self.workdir.name], stderr=subprocess.STDOUT)
self.assertEqual(out, b'')
subprocess.check_call(exe_cli + ['generate', '--root-dir', self.workdir.name])
self.assertEqual(os.listdir(os.path.join(self.workdir.name, 'run', 'systemd', 'network')),
['10-netplan-enlol.network'])

Expand Down
1 change: 1 addition & 0 deletions tests/ctests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ tests = {
'test_netplan_misc': false,
'test_netplan_validation': false,
'test_netplan_keyfile': false,
'test_netplan_networkd': false,
'test_netplan_nm': false,
'test_netplan_openvswitch': false,
}
Expand Down
Loading
Loading