diff --git a/test/e2e/idle_connection_test.go b/test/e2e/idle_connection_test.go index 623253de8..ed4a2dc07 100644 --- a/test/e2e/idle_connection_test.go +++ b/test/e2e/idle_connection_test.go @@ -415,7 +415,7 @@ func idleConnectionTerminationPolicyRunTest(t *testing.T, policy operatorv1.Ingr elbAddress := elbHostname if net.ParseIP(elbHostname) == nil { - if err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 5*time.Minute, false, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, dnsResolutionTimeout, false, func(ctx context.Context) (bool, error) { addrs, err := net.LookupHost(elbHostname) if err != nil { t.Logf("%v error resolving %s: %v, retrying...", time.Now(), elbHostname, err) diff --git a/test/e2e/operator_test.go b/test/e2e/operator_test.go index 3aa8745da..9ff927c45 100644 --- a/test/e2e/operator_test.go +++ b/test/e2e/operator_test.go @@ -125,6 +125,23 @@ var ( operandNamespace = operatorcontroller.DefaultOperandNamespace defaultName = types.NamespacedName{Namespace: operatorNamespace, Name: manifests.DefaultIngressControllerName} clusterConfigName = types.NamespacedName{Namespace: operatorNamespace, Name: manifests.ClusterIngressConfigName} + + // Platforms that need a DNS "warmup" period for internal (inside the test cluster) DNS resolution. + // The warmup period is a period of delay before the first query is executed to avoid negative caching. + // This is not intended for external (i.e. the test runner cluster) DNS resolution. + platformsNeedInternalDNSWarmup = map[configv1.PlatformType]time.Duration{ + // 7 minutes of warmup was required past testing for internal IBMCloud DNS queries. + configv1.IBMCloudPlatformType: 7 * time.Minute, + } +) + +const ( + // dnsResolutionTimeout is the maximum time allowed for the test runner cluster to resolve a newly created + // DNS record. This record may be a wildcard DNS record or a load balancer hostname (for AWS & IBMCloud). This + // timeout accounts for both DNS propagation time from the test cluster's hosted zone to the test runner + // cluster (which may be on different platforms) and any negative caching along the way. As of writing this, AWS + // typically resolves within ~1 minute (see OCPBUGS-14966), while IBMCloud takes ~7 minutes (see OCPBUGS-48780). + dnsResolutionTimeout = 10 * time.Minute ) func init() { @@ -3056,7 +3073,7 @@ func TestAWSELBConnectionIdleTimeout(t *testing.T) { elbHostname := wildcardRecord.Spec.Targets[0] // Wait until we can resolve the ELB's hostname - if err := wait.PollImmediate(5*time.Second, 5*time.Minute, func() (bool, error) { + if err := wait.PollImmediate(5*time.Second, dnsResolutionTimeout, func() (bool, error) { _, err := net.LookupIP(elbHostname) if err != nil { t.Log(err) @@ -3270,7 +3287,7 @@ func TestConnectTimeout(t *testing.T) { lbHostname := wildcardRecord.Spec.Targets[0] // Wait until we can resolve the LB's hostname - if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + if err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, dnsResolutionTimeout, true, func(ctx context.Context) (bool, error) { _, err := net.LookupIP(lbHostname) if err != nil { t.Log(err) diff --git a/test/e2e/util_gatewayapi_test.go b/test/e2e/util_gatewayapi_test.go index 8d505772c..4e350d494 100644 --- a/test/e2e/util_gatewayapi_test.go +++ b/test/e2e/util_gatewayapi_test.go @@ -527,7 +527,7 @@ func assertHttpRouteConnection(t *testing.T, hostname string, gateway *gatewayap // Wait and check that the dns name resolves first. Takes a long time, so // if the hostname is actually an IP address, skip this. if net.ParseIP(hostname) == nil { - if err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 5*time.Minute, false, func(context context.Context) (bool, error) { + if err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, dnsResolutionTimeout, false, func(context context.Context) (bool, error) { _, err := net.LookupHost(hostname) if err != nil { t.Logf("%v waiting for HTTP route name %s to resolve (%v)", time.Now(), hostname, err) diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index b174e755c..a3f693b96 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -688,7 +688,7 @@ func verifyExternalIngressController(t *testing.T, name types.NamespacedName, ho // If we have a DNS as an external IP address, make sure we can resolve it before moving on. // This just limits the number of "could not resolve host" errors which can be confusing. if net.ParseIP(address) == nil { - if err := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) { + if err := wait.PollImmediate(10*time.Second, dnsResolutionTimeout, func() (bool, error) { _, err := net.LookupIP(address) if err != nil { t.Logf("waiting for loadbalancer domain %s to resolve...", address) @@ -766,6 +766,10 @@ func verifyInternalIngressController(t *testing.T, name types.NamespacedName, ho } }() + // Since we've created a new IngressController, and we are about to use a curl pod to query the + // wildcard DNS record, we need to wait for platforms that require a warmup period for internal queries. + waitForInternalDNSWarmup(t) + extraArgs := []string{ "--header", "HOST:" + echoRoute.Spec.Host, "-v", @@ -849,6 +853,16 @@ func verifyInternalIngressController(t *testing.T, name types.NamespacedName, ho } } +// waitForInternalDNSWarmup waits for a designated "warmup" period +// to prevent negative caching on platforms that require it. +func waitForInternalDNSWarmup(t *testing.T) { + warmup, _ := platformsNeedInternalDNSWarmup[infraConfig.Status.PlatformStatus.Type] + if warmup > 0 { + t.Logf("this platform requires DNS warmup time to avoid negative caching...waiting for %s", warmup) + time.Sleep(warmup) + } +} + // assertDeleted tries to delete a cluster resource, and causes test failure if the delete fails. func assertDeleted(t *testing.T, cl client.Client, thing client.Object) { t.Helper()