diff --git a/e2e/stress/IoTClientPerf/Configuration.Stress.cs b/e2e/stress/IoTClientPerf/Configuration.Stress.cs index 419be86c4a..a9b7dad909 100644 --- a/e2e/stress/IoTClientPerf/Configuration.Stress.cs +++ b/e2e/stress/IoTClientPerf/Configuration.Stress.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System; +using System.Globalization; using System.Security.Cryptography.X509Certificates; namespace Microsoft.Azure.Devices.E2ETests @@ -25,6 +26,41 @@ public static partial class Stress private static Lazy s_cert = new Lazy(() => { return Configuration.IoTHub.GetCertificateWithPrivateKey(); }); + /// + /// Gets the import export BLOB URI. + /// + public static string ImportExportBlobUri => GetValue("IOTHUB_IMPORTEXPORT_BLOB_URI"); + + /// + /// Gets the connected devices percentage expected by the runner after the test ended. + /// + public static long? ConnectedDevicesPercentage => ParseNullable(GetValue("IOTHUB_PERF_CONNECTED_PERCENTAGE", "")); + + /// + /// Gets the connected devices percentage expected by the runner after the test ended. + /// + public static long? TcpConnectionsPercentage => ParseNullable(GetValue("IOTHUB_PERF_TCP_PERCENTAGE", "")); + + /// + /// Gets the requests per second minimum average after the test ended. + /// + public static long? RequestsPerSecondMinAvg => ParseNullable(GetValue("IOTHUB_PERF_RPS_MIN_AVG", "")); + + /// + /// Gets the requests per second minimum standard deviation after the test ended. + /// + public static long? RequestsPerSecondMaxStd => ParseNullable(GetValue("IOTHUB_PERF_RPS_MAX_STD", "")); + + /// + /// Gets the requests per second minimum standard deviation after the test ended. + /// + public static long? GCMemoryBytes => ParseNullable(GetValue("IOTHUB_PERF_GC_MEM_BYTES_MAX", "")); + + /// + /// Success rate defined as operations completed / (completed + failed + cancelled). + /// + public static long? SuccessRate => ParseNullable(GetValue("IOTHUB_PERF_SUCCESS_RATE_PERCENTAGE", "")); + public static string GetDeviceNameById(int id, string authType) { return $"{NamePrefix}_{authType}_{id}"; @@ -43,6 +79,12 @@ public static string GetConnectionStringById(int id, string authType) public static string Key2 => s_key2.Value; public static X509Certificate2 Certificate => s_cert.Value; + + private static long? ParseNullable(string s) + { + if (long.TryParse(s, out long l)) return l; + return null; + } } } } diff --git a/e2e/stress/IoTClientPerf/IoTClientPerf.csproj b/e2e/stress/IoTClientPerf/IoTClientPerf.csproj index 42a9626332..af3386a26e 100644 --- a/e2e/stress/IoTClientPerf/IoTClientPerf.csproj +++ b/e2e/stress/IoTClientPerf/IoTClientPerf.csproj @@ -2,19 +2,27 @@ Exe - netcoreapp2.1 + netcoreapp2.2 Microsoft.Azure.Devices.E2ETests + $(MSBuildProjectDirectory)\..\..\.. + $(RootDir)\common\test - - + + - - - - + + + + + + + + + + diff --git a/e2e/stress/IoTClientPerf/ParallelRun.cs b/e2e/stress/IoTClientPerf/ParallelRun.cs index 39b7b83715..1cd989be3e 100644 --- a/e2e/stress/IoTClientPerf/ParallelRun.cs +++ b/e2e/stress/IoTClientPerf/ParallelRun.cs @@ -4,6 +4,8 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.IO; +using System.Runtime.ExceptionServices; using System.Threading; using System.Threading.Tasks; @@ -43,10 +45,15 @@ public ParallelRun( public async Task RunAsync(bool runOnce, CancellationToken ct) { - int cursor_left, cursor_top; - cursor_left = Console.CursorLeft; - cursor_top = Console.CursorTop; + int cursor_left = 0, cursor_top = 0; + try + { + cursor_left = Console.CursorLeft; + cursor_top = Console.CursorTop; + } + catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ } + int actualParallel = Math.Min(_parallelOperations, _tests.Length); int currentInstance = 0; @@ -82,6 +89,14 @@ public async Task RunAsync(bool runOnce, CancellationToken ct) break; case TaskStatus.Faulted: statInterimFaulted++; + foreach (Exception ex in finished.Exception.InnerExceptions) + { + if (ex is ParallelRunFatalException) + { + // Crash the process to simplify analysis. Recover original stack. + ((ParallelRunFatalException)ex).ThrowInner(); + } + } break; case TaskStatus.RanToCompletion: statInterimCompleted++; @@ -103,9 +118,13 @@ public async Task RunAsync(bool runOnce, CancellationToken ct) double statInterimSeconds = statInterimSw.Elapsed.TotalSeconds; statTotalCompleted += statInterimCompleted; - Console.SetCursorPosition(cursor_left, cursor_top); - cursor_left = Console.CursorLeft; - cursor_top = Console.CursorTop; + try + { + Console.SetCursorPosition(cursor_left, cursor_top); + cursor_left = Console.CursorLeft; + cursor_top = Console.CursorTop; + } + catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ } _updateStatistics(statInterimCompleted, statInterimFaulted, statInterimCancelled, statInterimSeconds); if (drain) Console.Write("Waiting for tasks to finish...\r"); @@ -142,4 +161,19 @@ public async Task RunAsync(bool runOnce, CancellationToken ct) } } } + + public class ParallelRunFatalException : Exception + { + private ExceptionDispatchInfo _exceptionDispatchInfo; + + public ParallelRunFatalException(ExceptionDispatchInfo innerExceptionDispatchInfo) + { + _exceptionDispatchInfo = innerExceptionDispatchInfo; + } + + public void ThrowInner() + { + _exceptionDispatchInfo.Throw(); + } + } } diff --git a/e2e/stress/IoTClientPerf/PerfTestRunner.cs b/e2e/stress/IoTClientPerf/PerfTestRunner.cs index eba9b2e218..3e3252dfdf 100644 --- a/e2e/stress/IoTClientPerf/PerfTestRunner.cs +++ b/e2e/stress/IoTClientPerf/PerfTestRunner.cs @@ -25,8 +25,8 @@ public class PerfTestRunner private readonly int _timeSeconds; private readonly Func _scenarioFactory; - private PerfScenario[] _tests; - private Stopwatch _sw = new Stopwatch(); + private readonly PerfScenario[] _tests; + private readonly Stopwatch _sw = new Stopwatch(); public PerfTestRunner( ResultWriter writer, @@ -80,8 +80,9 @@ private void FilterTcpStatistics() } } - public async Task RunTestAsync() + public async Task RunTestAsync() { + int ret = 0; _sw.Restart(); try @@ -92,14 +93,16 @@ public async Task RunTestAsync() catch (OperationCanceledException) { Console.WriteLine($"Setup FAILED (timeout:{_sw.Elapsed})"); + ret = 1; + return ret; } _sw.Restart(); Console.WriteLine(); - + try { - await LoopAsync().ConfigureAwait(false); + ret = await LoopAsync().ConfigureAwait(false); } catch (OperationCanceledException) { @@ -111,9 +114,11 @@ public async Task RunTestAsync() await TeardownAllAsync().ConfigureAwait(false); Console.WriteLine("Done. "); + + return ret; } - private async Task LoopAsync() + private async Task LoopAsync() { using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(_timeSeconds))) { @@ -121,6 +126,10 @@ private async Task LoopAsync() ulong statTotalFaulted = 0; ulong statTotalCancelled = 0; double statTotalSeconds = 0.0; + int cpuLoad = 0; + long memoryBytes = 0, gcBytes = 0, tcpConn = 0, devConn = 0; + double avgRps = 0.0, stdDevRps = 0.0; + List statRps = new List(); var runner = new ParallelRun( @@ -145,16 +154,16 @@ private async Task LoopAsync() double totalRequestsPerSec = statTotalCompleted / statTotalSeconds; double totalTransferPerSec = totalRequestsPerSec * _messageSizeBytes; - (double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps); + (avgRps, stdDevRps) = CalculateAvgAndStDev(statRps); double avgBps = avgRps * _messageSizeBytes; double stdDevBps = stdDevRps * _messageSizeBytes; - SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); + SystemMetrics.GetMetrics(out cpuLoad, out memoryBytes, out gcBytes, out tcpConn, out devConn); Console.WriteLine($"[{_sw.Elapsed}] Loop Statistics:"); Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s"); - Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(avgRps)}/s "); + Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(stdDevBps)}/s "); Console.WriteLine($"Connected : {devConn,10:N0} "); - Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); + Console.WriteLine($"CPU Load : {(float)cpuLoad / 100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); Console.WriteLine("----"); Console.WriteLine($"TOTALs: "); Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}"); @@ -162,9 +171,112 @@ private async Task LoopAsync() }); await runner.RunAsync(runOnce: false, ct: cts.Token).ConfigureAwait(false); + + Console.WriteLine(); + int ret = 0; + ret = CheckKPI(statTotalCompleted, statTotalFaulted, statTotalCancelled, gcBytes, tcpConn, devConn, avgRps, stdDevRps, ret); + + if (ret != 0) Console.WriteLine("^^^^^^^^^^^^^^^^^^^\n"); + return ret; } } + private int CheckKPI(ulong statTotalCompleted, ulong statTotalFaulted, ulong statTotalCancelled, long gcBytes, long tcpConn, long devConn, double avgRps, double stdDevRps, int ret) + { + float? expectedDeviceConn = (float)_n * Configuration.Stress.ConnectedDevicesPercentage / 100; + float? expectedTcpConn = (float)_poolSize * Configuration.Stress.TcpConnectionsPercentage / 100; + + if (expectedDeviceConn.HasValue) + { + string status = $"Connected Devices. Expected: >={expectedDeviceConn}; Actual: {devConn}."; + if (devConn < expectedDeviceConn) + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 1; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + if (expectedTcpConn.HasValue) + { + string status = $"TCP Connections. Expected: ={expectedTcpConn}; Actual: {tcpConn}."; + + if (tcpConn != expectedTcpConn) // Ensure all are connected and no connection leaks exist. + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 2; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + if (Configuration.Stress.RequestsPerSecondMinAvg.HasValue) + { + string status = $"RPS Average.Expected: >={Configuration.Stress.RequestsPerSecondMinAvg}; Actual: {avgRps}."; + + if (avgRps < Configuration.Stress.RequestsPerSecondMinAvg) + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 3; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + if (Configuration.Stress.RequestsPerSecondMaxStd.HasValue) + { + string status = $"RPS StdDev.Expected: <={ Configuration.Stress.RequestsPerSecondMaxStd}; Actual: { stdDevRps}."; + if (stdDevRps > Configuration.Stress.RequestsPerSecondMaxStd) + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 4; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + if (Configuration.Stress.GCMemoryBytes.HasValue) + { + string status = $"GC Memory.Expected: <={GetHumanReadableBytes(Configuration.Stress.GCMemoryBytes.Value)}; Actual: {GetHumanReadableBytes(gcBytes)}."; + if (gcBytes > Configuration.Stress.GCMemoryBytes) + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 5; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + if (Configuration.Stress.SuccessRate.HasValue) + { + float successRate = ((float)statTotalCompleted * 100) / (statTotalCompleted + statTotalFaulted + statTotalCancelled); + string status = $"Success Rate.Expected: >={Configuration.Stress.SuccessRate}; Actual: {successRate}."; + + if (Configuration.Stress.SuccessRate > successRate) + { + Console.Error.WriteLine($"FAILED KPI: {status}"); + ret = 6; + } + else + { + Console.WriteLine($"PASSED KPI: {status}"); + } + } + + return ret; + } + private async Task SetupAllAsync() { using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(_timeSeconds))) @@ -211,12 +323,12 @@ private async Task SetupAllAsync() double totalRequestsPerSec = statTotalCompleted / statTotalSeconds; (double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps); - SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); + SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); Console.WriteLine($"[{_sw.Elapsed}] Setup Statistics:"); Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s"); Console.WriteLine($"Connected : {devConn,10:N0} "); - Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); + Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); Console.WriteLine("----"); Console.WriteLine($"TOTALs: "); Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}"); @@ -257,13 +369,13 @@ private async Task TeardownAllAsync() double totalRequestsPerSec = statTotalCompleted / statTotalSeconds; (double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps); - SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); + SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); Console.WriteLine($"[{_sw.Elapsed}] Teardown Statistics:"); Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s"); Console.WriteLine($"Connected : {devConn,10:N0} "); - Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); + Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}"); Console.WriteLine("----"); Console.WriteLine($"TOTALs: "); Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}"); diff --git a/e2e/stress/IoTClientPerf/Program.cs b/e2e/stress/IoTClientPerf/Program.cs index 7a570610d2..f6d67a0f70 100644 --- a/e2e/stress/IoTClientPerf/Program.cs +++ b/e2e/stress/IoTClientPerf/Program.cs @@ -1,11 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. +using Microsoft.Azure.Devices.E2ETests.Scenarios; using System; using System.Collections.Generic; -using System.Diagnostics; using System.Globalization; -using System.Net.NetworkInformation; namespace Microsoft.Azure.Devices.E2ETests { @@ -16,9 +15,14 @@ private static Dictionary>( - "Generate the IoT Hub configuration required for the test (creates multiple devices).", + "Generate the IoT Hub configuration required for the test (creates multiple devices). Uses -a (auth type) and -n (device count).", (c) => {return new GenerateIotHubConfigTest(c);})}, + {"import_iothub_config", + new Tuple>( + "Imports the IoT Hub configuration from the Azure Blob URI specified in the IOTHUB_IMPORTEXPORT_BLOB_URI environment variable. (No other argument used).", + (c) => {return new ImportIotHubConfigTest(c);})}, + { "device_all", new Tuple>( "Devices connecting to IoT Hub then using multiple features.", @@ -49,21 +53,6 @@ private static Dictionary {return new DeviceMethodTest(c);}) }, - { "device_d2c_noretry", - new Tuple>( - "Like device_d2c but will disable retries and create a new DeviceClient when the previous enters a faulted state.", - (c) => {return new DeviceD2CNoRetry(c);})}, - - { "device_c2d_noretry", - new Tuple>( - "Like device_c2d but will disable retries and create a new DeviceClient when the previous enters a faulted state.", - (c) => {return new DeviceC2DNoRetry(c);})}, - - { "device_methods_noretry", - new Tuple>( - "Like device_methods but will disable retries and create a new DeviceClient when the previous enters a faulted state.", - (c) => {return new DeviceMethodsNoRetry(c);})}, - {"service_c2d", new Tuple>( "ServiceClient sending events to devices through IoT Hub.", @@ -253,18 +242,32 @@ public static int Main(string[] args) i, scenarioFactory); + int ret = 0; + try { - runner.RunTestAsync().GetAwaiter().GetResult(); + ret = runner.RunTestAsync().GetAwaiter().GetResult(); } - finally + catch (Exception ex) { - Console.Write("Writing output . . . "); - resultWriter.FlushAsync().GetAwaiter().GetResult(); - Console.WriteLine("OK"); + Console.WriteLine(ex); + ret = -1; + } + + Console.Write("Writing output . . . "); + resultWriter.FlushAsync().GetAwaiter().GetResult(); + Console.WriteLine("OK"); + + if (ret == 0) + { + Console.WriteLine("Test PASSED."); + } + else + { + Console.WriteLine("Test FAILED. See logs above for reason."); } - return 0; + return ret; } } } diff --git a/e2e/stress/IoTClientPerf/Properties/launchSettings.json b/e2e/stress/IoTClientPerf/Properties/launchSettings.json index 56d71f712c..96a7702b47 100644 --- a/e2e/stress/IoTClientPerf/Properties/launchSettings.json +++ b/e2e/stress/IoTClientPerf/Properties/launchSettings.json @@ -2,7 +2,7 @@ "profiles": { "IoTClientPerf": { "commandName": "Project", - "commandLineArgs": "-o s:\\tmp\\device.csv -t 60 -n 10 -f device_d2c" + "commandLineArgs": "-o device.csv -t 60 -n 10 -f device_d2c" } } } \ No newline at end of file diff --git a/e2e/stress/IoTClientPerf/Reporting/ResultWriter.cs b/e2e/stress/IoTClientPerf/Reporting/ResultWriter.cs index dfce46db5f..a2a52fbe70 100644 --- a/e2e/stress/IoTClientPerf/Reporting/ResultWriter.cs +++ b/e2e/stress/IoTClientPerf/Reporting/ResultWriter.cs @@ -8,12 +8,15 @@ namespace Microsoft.Azure.Devices.E2ETests public abstract class ResultWriter { protected string _header; + private const string NullInstance = "(null)"; public ResultWriter(string header = null) { _header = header; } + public static string IdOf(object value) => value != null ? value.GetType().Name + "#" + GetHashCode(value) : NullInstance; + public Task WriteAsync(TelemetryMetrics m) { return WriteLineAsync(m.ToString()); @@ -22,5 +25,7 @@ public Task WriteAsync(TelemetryMetrics m) public abstract Task FlushAsync(); protected abstract Task WriteLineAsync(string s); + + private static int GetHashCode(object value) => value?.GetHashCode() ?? 0; } } diff --git a/e2e/stress/IoTClientPerf/Reporting/ResultWriterFile.cs b/e2e/stress/IoTClientPerf/Reporting/ResultWriterFile.cs index 9b09d5e0eb..46e6d545c4 100644 --- a/e2e/stress/IoTClientPerf/Reporting/ResultWriterFile.cs +++ b/e2e/stress/IoTClientPerf/Reporting/ResultWriterFile.cs @@ -11,7 +11,7 @@ namespace Microsoft.Azure.Devices.E2ETests { public class ResultWriterFile : ResultWriter { - private const long MaximumFileSize = (long)2 * 1024 * 1024 * 1024; + private const long MaximumFileSize = (long)1 * 1024 * 1024 * 1024; private const int FileBufferBytes = 100 * 1024 * 1024; private StreamWriter _writer; private SemaphoreSlim _semaphore = new SemaphoreSlim(1); diff --git a/e2e/stress/IoTClientPerf/Reporting/SystemMetrics.cs b/e2e/stress/IoTClientPerf/Reporting/SystemMetrics.cs index f3ea358bd0..3e97ed2167 100644 --- a/e2e/stress/IoTClientPerf/Reporting/SystemMetrics.cs +++ b/e2e/stress/IoTClientPerf/Reporting/SystemMetrics.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; +using System.ComponentModel; using System.Diagnostics; using System.Net.NetworkInformation; using System.Text; @@ -12,10 +13,10 @@ namespace Microsoft.Azure.Devices.E2ETests { public static class SystemMetrics { - private const int RefreshIntervalMs = 1000; - private static readonly Stopwatch _sw = new Stopwatch(); - private static double s_lastTotalCpuUsageMs = 0.0; - private static int s_cpuPercent; + private const int RefreshIntervalMs = 500; + private static readonly Stopwatch s_sw = new Stopwatch(); + private static TimeSpan s_lastProcCpuUsageMs = TimeSpan.Zero; + private static int s_cpuLoad; private static long s_totalMemoryBytes; private static long s_lastGcBytes; private static long s_lastTcpConnections; @@ -23,12 +24,12 @@ public static class SystemMetrics private static long s_devicesConnected; - private static object s_lock = new object(); + private static readonly object s_lock = new object(); - public static void GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devicesConn) + public static void GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devicesConn) { EnsureUpToDate(); - cpuPercent = s_cpuPercent; + cpuLoad = s_cpuLoad; memoryBytes = s_totalMemoryBytes; gcBytes = s_lastGcBytes; tcpConn = s_lastTcpConnections; @@ -52,15 +53,23 @@ public static void TcpFilterPort(int port) private static void UpdateCpuUsage() { - var proc = Process.GetCurrentProcess(); - double currentTotalCpuUsageMs = proc.TotalProcessorTime.TotalMilliseconds / Environment.ProcessorCount; - double timeDeltaMs = _sw.Elapsed.TotalMilliseconds; + TimeSpan elapsed = s_sw.Elapsed; + Process proc = Process.GetCurrentProcess(); + + if ((elapsed.Ticks != 0) && (s_lastProcCpuUsageMs != TimeSpan.Zero)) + { - double usedTimeDeltaMs = currentTotalCpuUsageMs - s_lastTotalCpuUsageMs; - if (timeDeltaMs > 0.1) s_cpuPercent = (int)(usedTimeDeltaMs * 100 / timeDeltaMs); - if (s_cpuPercent > 100) s_cpuPercent = 100; + TimeSpan currentTotalCpuUsageMs = proc.TotalProcessorTime; + TimeSpan usedTimeDelta = currentTotalCpuUsageMs - s_lastProcCpuUsageMs; - s_lastTotalCpuUsageMs = currentTotalCpuUsageMs; + s_cpuLoad = (int)(((double)usedTimeDelta.Ticks / elapsed.Ticks) * 100); + } + else + { + s_cpuLoad = -1; + } + + s_lastProcCpuUsageMs = proc.TotalProcessorTime; } private static void UpdateTotalMemoryBytes() @@ -91,16 +100,20 @@ private static void UpdateTCPConnections() private static void EnsureUpToDate() { - if (!_sw.IsRunning || _sw.ElapsedMilliseconds > RefreshIntervalMs) + if (!s_sw.IsRunning) + { + s_sw.Start(); + } + else if (s_sw.ElapsedMilliseconds > RefreshIntervalMs) { lock (s_lock) { - UpdateCpuUsage(); UpdateGCMemoryBytes(); UpdateTCPConnections(); UpdateTotalMemoryBytes(); + UpdateCpuUsage(); - _sw.Restart(); + s_sw.Restart(); } } } diff --git a/e2e/stress/IoTClientPerf/Reporting/TelemetryMetrics.cs b/e2e/stress/IoTClientPerf/Reporting/TelemetryMetrics.cs index 2397de6701..dc35706437 100644 --- a/e2e/stress/IoTClientPerf/Reporting/TelemetryMetrics.cs +++ b/e2e/stress/IoTClientPerf/Reporting/TelemetryMetrics.cs @@ -12,6 +12,7 @@ public class TelemetryMetrics public const string DeviceOperationCreate = "device_create"; public const string DeviceOperationOpen = "device_open"; public const string DeviceOperationClose = "device_close"; + public const string DeviceOperationDispose = "device_dispose"; public const string DeviceOperationSend = "device_send"; public const string DeviceOperationReceive = "device_receive"; public const string DeviceOperationMethodEnable = "device_method_enable"; @@ -30,7 +31,7 @@ public class TelemetryMetrics private static string s_configString; // Contains all Config* parameters. public int? Id; - public string OperationType; // e.g. OpenAsync / SendAsync, etc + private string OperationType; // e.g. OpenAsync / SendAsync, etc public double? ScheduleTime; public double? ExecuteTime; public string ErrorMessage; @@ -62,6 +63,14 @@ public static string GetHeader() "ErrorMessage"; } + public void Clear(string operationType) + { + OperationType = operationType; + ScheduleTime = null; + ExecuteTime = null; + ErrorMessage = null; + } + public static void SetStaticConfigParameters( string runId, int timeSeconds, @@ -87,9 +96,9 @@ public override string ToString() Add(sb, ScheduleTime); Add(sb, ExecuteTime); - SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); + SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn); - Add(sb, cpuPercent); + Add(sb, cpuLoad); Add(sb, memoryBytes); Add(sb, gcBytes); Add(sb, tcpConn); diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceAllNoRetry.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceAllNoRetry.cs index 3bb06b2e0b..c062df15dd 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceAllNoRetry.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/DeviceAllNoRetry.cs @@ -1,6 +1,9 @@ // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. +using Microsoft.Azure.Devices.Client.Exceptions; +using System; +using System.Diagnostics; using System.Threading; using System.Threading.Tasks; @@ -8,6 +11,8 @@ namespace Microsoft.Azure.Devices.E2ETests { public class DeviceAllNoRetry : DeviceClientScenario { + private const int DelaySecondsAfterFailure = 1; + private readonly SemaphoreSlim _lock = new SemaphoreSlim(1); private Task _sendTask; private Task _receiveTask; private Task _waitForMethodTask; @@ -28,14 +33,37 @@ public override async Task SetupAsync(CancellationToken ct) public override async Task RunTestAsync(CancellationToken ct) { - SetupTasks(ct); - Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _sendTask, _receiveTask, _waitForMethodTask).ConfigureAwait(false); - - if (completedTask == _waitForDisconnectTask) + try { - DisposeDevice(); - await SetupAsync(ct).ConfigureAwait(false); + await _lock.WaitAsync().ConfigureAwait(false); SetupTasks(ct); + + Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _sendTask, _receiveTask, _waitForMethodTask).ConfigureAwait(false); + + if (completedTask == _waitForDisconnectTask) + { + await DisposeDevice().ConfigureAwait(false); + + try + { + // Drain current operations. Method will not be notified in any way of the disconnect. + await Task.WhenAll(_sendTask, _receiveTask).ConfigureAwait(false); + } + catch (IotHubException) { } + catch (OperationCanceledException) { } + + _waitForDisconnectTask = null; + _sendTask = null; + _receiveTask = null; + _waitForMethodTask = null; + + await Task.Delay(DelaySecondsAfterFailure * 1000).ConfigureAwait(false); + await SetupAsync(ct).ConfigureAwait(false); + } + } + finally + { + _lock.Release(); } } diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceC2DNoRetry.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceC2DNoRetry.cs deleted file mode 100644 index 8fac2e3b14..0000000000 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceC2DNoRetry.cs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System.Threading; -using System.Threading.Tasks; - -namespace Microsoft.Azure.Devices.E2ETests -{ - public class DeviceC2DNoRetry : DeviceClientScenario - { - private Task _receiveTask; - private Task _waitForDisconnectTask; - - - public DeviceC2DNoRetry(PerfScenarioConfig config) : base(config) - { - } - - public override async Task SetupAsync(CancellationToken ct) - { - await CreateDeviceAsync().ConfigureAwait(false); - DisableRetry(); - await OpenDeviceAsync(ct).ConfigureAwait(false); - } - - public override async Task RunTestAsync(CancellationToken ct) - { - SetupTasks(ct); - Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _receiveTask).ConfigureAwait(false); - - if (completedTask == _waitForDisconnectTask) - { - DisposeDevice(); - await SetupAsync(ct).ConfigureAwait(false); - SetupTasks(ct); - } - } - - private void SetupTasks(CancellationToken ct) - { - if (_waitForDisconnectTask == null || _waitForDisconnectTask.IsCompleted) _waitForDisconnectTask = WaitForDisconnectedAsync(ct); - if (_receiveTask == null || _receiveTask.IsCompleted) _receiveTask = ReceiveMessageAsync(ct); - } - - public override Task TeardownAsync(CancellationToken ct) - { - return CloseAsync(ct); - } - } -} diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceClientScenario.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceClientScenario.cs index 4e5b136ea6..4ea0e23ac2 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceClientScenario.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/DeviceClientScenario.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using Microsoft.Azure.Devices.Client; +using Microsoft.Azure.Devices.Client.Exceptions; using System; using System.Diagnostics; using System.Runtime.ExceptionServices; @@ -17,41 +18,35 @@ public abstract class DeviceClientScenario : PerfScenario private DeviceClient _dc; // Shared by Create, Open and Send - private TelemetryMetrics _m = new TelemetryMetrics(); - private Stopwatch _sw = new Stopwatch(); + private readonly TelemetryMetrics _m = new TelemetryMetrics(); + private readonly Stopwatch _sw = new Stopwatch(); // Separate metrics and time calculation for operations that can be parallelized. - private TelemetryMetrics _mRecv = new TelemetryMetrics(); - private Stopwatch _swRecv = new Stopwatch(); + private readonly TelemetryMetrics _mRecv = new TelemetryMetrics(); + private readonly Stopwatch _swRecv = new Stopwatch(); private const string TestMethodName = "PerfTestMethod"; - private TelemetryMetrics _mMethod = new TelemetryMetrics(); - private Stopwatch _swMethod = new Stopwatch(); - private SemaphoreSlim _methodSemaphore = new SemaphoreSlim(0); + private readonly TelemetryMetrics _mMethod = new TelemetryMetrics(); + private readonly Stopwatch _swMethod = new Stopwatch(); + private readonly SemaphoreSlim _methodSemaphore = new SemaphoreSlim(0); private static readonly MethodResponse s_methodResponse = new MethodResponse(200); - private TelemetryMetrics _mConnectionStatus = new TelemetryMetrics(); - private SemaphoreSlim _connectionStatusChangedSemaphore = new SemaphoreSlim(0); - private SemaphoreSlim _waitForDisconnectSemaphore = new SemaphoreSlim(0); + private readonly TelemetryMetrics _mConnectionStatus = new TelemetryMetrics(); + private readonly SemaphoreSlim _connectionStatusChangedSemaphore = new SemaphoreSlim(1); + private readonly SemaphoreSlim _waitForDisconnectSemaphore = new SemaphoreSlim(0, 1); private bool _connected; - private byte[] _messageBytes; + private readonly byte[] _messageBytes; - private bool _pooled; - private int _poolSize; + private readonly bool _pooled; + private readonly int _poolSize; public DeviceClientScenario(PerfScenarioConfig config) : base(config) { _m.Id = _id; - _mRecv.Id = _id; - _mRecv.OperationType = TelemetryMetrics.DeviceOperationReceive; - _mMethod.Id = _id; - _mConnectionStatus.Id = _id; - _mConnectionStatus.ExecuteTime = null; - _mConnectionStatus.ScheduleTime = null; _messageBytes = new byte[_sizeBytes]; @@ -64,7 +59,7 @@ public DeviceClientScenario(PerfScenarioConfig config) : base(config) protected async Task CreateDeviceAsync() { _sw.Restart(); - _m.OperationType = TelemetryMetrics.DeviceOperationCreate; + _m.Clear(TelemetryMetrics.DeviceOperationCreate); ITransportSettings transportSettings = null; @@ -108,7 +103,6 @@ protected async Task CreateDeviceAsync() _dc.SetConnectionStatusChangesHandler(OnConnectionStatusChanged); _m.ExecuteTime = _sw.ElapsedMilliseconds; - _m.ScheduleTime = null; // sync operation await _writer.WriteAsync(_m).ConfigureAwait(false); } @@ -132,8 +126,13 @@ private async void OnConnectionStatusChanged(ConnectionStatus status, Connection _connected = false; } - _mConnectionStatus.OperationType = TelemetryMetrics.DeviceStateDisconnected; - _waitForDisconnectSemaphore.Release(); + _mConnectionStatus.Clear(TelemetryMetrics.DeviceStateDisconnected); + try + { + _waitForDisconnectSemaphore.Release(); + } + catch (SemaphoreFullException) { } + break; case ConnectionStatus.Connected: if (!_connected) @@ -142,7 +141,7 @@ private async void OnConnectionStatusChanged(ConnectionStatus status, Connection _connected = true; } - _mConnectionStatus.OperationType = TelemetryMetrics.DeviceStateConnected; + _mConnectionStatus.Clear(TelemetryMetrics.DeviceStateConnected); break; case ConnectionStatus.Disconnected_Retrying: if (_connected) @@ -151,8 +150,14 @@ private async void OnConnectionStatusChanged(ConnectionStatus status, Connection _connected = false; } - _mConnectionStatus.OperationType = TelemetryMetrics.DeviceStateDisconnectedRetrying; - _waitForDisconnectSemaphore.Release(); + _mConnectionStatus.Clear(TelemetryMetrics.DeviceStateDisconnectedRetrying); + + try + { + _waitForDisconnectSemaphore.Release(); + } + catch (SemaphoreFullException) { } + break; case ConnectionStatus.Disabled: if (_connected) @@ -161,14 +166,14 @@ private async void OnConnectionStatusChanged(ConnectionStatus status, Connection _connected = false; } - _mConnectionStatus.OperationType = TelemetryMetrics.DeviceStateDisconnected; + _mConnectionStatus.Clear(TelemetryMetrics.DeviceStateDisconnected); break; default: - _mConnectionStatus.OperationType = TelemetryMetrics.DeviceStateUnknown; + _mConnectionStatus.Clear(TelemetryMetrics.DeviceStateUnknown); break; } - _mConnectionStatus.ErrorMessage = $"ConnectionStatus: {status} reason: {reason}"; + _mConnectionStatus.ErrorMessage = $"ConnectionStatus: {status} reason: {reason} id: {ResultWriter.IdOf(_dc)}"; await _writer.WriteAsync(_mConnectionStatus).ConfigureAwait(false); } finally @@ -184,8 +189,8 @@ protected Task WaitForDisconnectedAsync(CancellationToken ct) protected async Task OpenDeviceAsync(CancellationToken ct) { - _m.OperationType = TelemetryMetrics.DeviceOperationOpen; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.DeviceOperationOpen); + _sw.Restart(); try { @@ -197,7 +202,7 @@ protected async Task OpenDeviceAsync(CancellationToken ct) } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -209,8 +214,8 @@ protected async Task OpenDeviceAsync(CancellationToken ct) protected async Task SendMessageAsync(CancellationToken ct) { - _m.OperationType = TelemetryMetrics.DeviceOperationSend; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.DeviceOperationSend); + _sw.Restart(); try @@ -224,7 +229,7 @@ protected async Task SendMessageAsync(CancellationToken ct) } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -236,7 +241,7 @@ protected async Task SendMessageAsync(CancellationToken ct) protected async Task ReceiveMessageAsync(CancellationToken ct) { - _mRecv.ScheduleTime = null; + _mRecv.Clear(TelemetryMetrics.DeviceOperationReceive); _swRecv.Restart(); try @@ -253,7 +258,7 @@ protected async Task ReceiveMessageAsync(CancellationToken ct) } catch (Exception ex) { - _mRecv.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_mRecv, ex); throw; } finally @@ -265,8 +270,7 @@ protected async Task ReceiveMessageAsync(CancellationToken ct) protected async Task EnableMethodsAsync(CancellationToken ct) { - _mMethod.ScheduleTime = null; - _mMethod.OperationType = TelemetryMetrics.DeviceOperationMethodEnable; + _mMethod.Clear(TelemetryMetrics.DeviceOperationMethodEnable); _swMethod.Restart(); try @@ -279,7 +283,7 @@ protected async Task EnableMethodsAsync(CancellationToken ct) } catch (Exception ex) { - _mMethod.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_mMethod, ex); throw; } finally @@ -297,8 +301,7 @@ private Task MethodHandlerAsync(MethodRequest methodRequest, obj protected async Task WaitForMethodAsync(CancellationToken ct) { - _mMethod.ScheduleTime = null; - _mMethod.OperationType = TelemetryMetrics.DeviceOperationMethodCalled; + _mMethod.Clear(TelemetryMetrics.DeviceOperationMethodCalled); _swMethod.Restart(); try @@ -311,7 +314,7 @@ protected async Task WaitForMethodAsync(CancellationToken ct) } catch (Exception ex) { - _mMethod.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_mMethod, ex); throw; } finally @@ -325,8 +328,7 @@ protected async Task CloseAsync(CancellationToken ct) { if (_dc == null) return; - _m.ScheduleTime = null; - _m.OperationType = TelemetryMetrics.DeviceOperationClose; + _m.Clear(TelemetryMetrics.DeviceOperationClose); _sw.Restart(); try @@ -335,7 +337,7 @@ protected async Task CloseAsync(CancellationToken ct) } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -345,9 +347,42 @@ protected async Task CloseAsync(CancellationToken ct) } } - protected void DisposeDevice() + private void SetErrorMessage(TelemetryMetrics m, Exception ex) + { + m.ErrorMessage = $"{ex.GetType().Name} id: {ResultWriter.IdOf(_dc)} - {ex.Message}"; + if (IsFatalException(ex)) + { + throw new ParallelRunFatalException(ExceptionDispatchInfo.Capture(ex)); + } + } + + private bool IsFatalException(Exception ex) + { + // List of known exceptions: + if (ex is IotHubCommunicationException || /* Expected during fault injection if no retry policy or the retry policy expired.*/ + ex is ObjectDisposedException) /* Expected during fault injection, in the no-retry case as the DeviceClient is thrown away and reconstructed during pending operations.*/ + { + return false; + } + + if (ex is IotHubException) + { + // AMQP-only, expected during faults in the no-retry case: + if (ex.Message == "Device is now offline." && + (_transport == Client.TransportType.Amqp || _transport == Client.TransportType.Amqp_Tcp_Only || _transport == Client.TransportType.Amqp_WebSocket_Only)) + { + return false; + } + } + + return true; + } + + protected async Task DisposeDevice() { + _m.Clear(TelemetryMetrics.DeviceOperationDispose); _dc.Dispose(); + await _writer.WriteAsync(_m).ConfigureAwait(false); } } } diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceD2CNoRetry.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceD2CNoRetry.cs deleted file mode 100644 index 258b900113..0000000000 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceD2CNoRetry.cs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System.Threading; -using System.Threading.Tasks; - -namespace Microsoft.Azure.Devices.E2ETests -{ - public class DeviceD2CNoRetry : DeviceClientScenario - { - private Task _sendTask; - private Task _waitForDisconnectTask; - - - public DeviceD2CNoRetry(PerfScenarioConfig config) : base(config) - { - } - - public override async Task SetupAsync(CancellationToken ct) - { - await CreateDeviceAsync().ConfigureAwait(false); - DisableRetry(); - await OpenDeviceAsync(ct).ConfigureAwait(false); - } - - public override async Task RunTestAsync(CancellationToken ct) - { - SetupTasks(ct); - Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _sendTask).ConfigureAwait(false); - - if (completedTask == _waitForDisconnectTask) - { - DisposeDevice(); - await SetupAsync(ct).ConfigureAwait(false); - SetupTasks(ct); - } - } - - private void SetupTasks(CancellationToken ct) - { - if (_waitForDisconnectTask == null || _waitForDisconnectTask.IsCompleted) _waitForDisconnectTask = WaitForDisconnectedAsync(ct); - if (_sendTask == null || _sendTask.IsCompleted) _sendTask = SendMessageAsync(ct); - } - - public override Task TeardownAsync(CancellationToken ct) - { - return CloseAsync(ct); - } - } -} diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceMethodsNoRetry.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceMethodsNoRetry.cs deleted file mode 100644 index 56ae25dbd8..0000000000 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceMethodsNoRetry.cs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - -using System.Threading; -using System.Threading.Tasks; - -namespace Microsoft.Azure.Devices.E2ETests -{ - public class DeviceMethodsNoRetry : DeviceClientScenario - { - private Task _waitForMethodTask; - private Task _waitForDisconnectTask; - - public DeviceMethodsNoRetry(PerfScenarioConfig config) : base(config) - { - } - - public override async Task SetupAsync(CancellationToken ct) - { - await CreateDeviceAsync().ConfigureAwait(false); - DisableRetry(); - await OpenDeviceAsync(ct).ConfigureAwait(false); - await EnableMethodsAsync(ct).ConfigureAwait(false); - } - - public override async Task RunTestAsync(CancellationToken ct) - { - SetupTasks(ct); - Task completedTask = await Task.WhenAny(_waitForDisconnectTask, _waitForMethodTask).ConfigureAwait(false); - - if (completedTask == _waitForDisconnectTask) - { - DisposeDevice(); - await SetupAsync(ct).ConfigureAwait(false); - SetupTasks(ct); - } - } - - private void SetupTasks(CancellationToken ct) - { - if (_waitForDisconnectTask == null || _waitForDisconnectTask.IsCompleted) _waitForDisconnectTask = WaitForDisconnectedAsync(ct); - if (_waitForMethodTask == null || _waitForMethodTask.IsCompleted) _waitForMethodTask = WaitForMethodAsync(ct); - } - - public override Task TeardownAsync(CancellationToken ct) - { - return CloseAsync(ct); - } - } -} diff --git a/e2e/stress/IoTClientPerf/Scenarios/DeviceOneD2CTest.cs b/e2e/stress/IoTClientPerf/Scenarios/DeviceOneD2CTest.cs index 030f039993..ab5ff708cf 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/DeviceOneD2CTest.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/DeviceOneD2CTest.cs @@ -42,7 +42,7 @@ public override async Task SetupAsync(CancellationToken ct) private async Task CreateDeviceAsync() { _sw.Restart(); - _m.OperationType = TelemetryMetrics.DeviceOperationCreate; + _m.Clear(TelemetryMetrics.DeviceOperationCreate); ITransportSettings transportSettings = null; @@ -79,8 +79,7 @@ private async Task CreateDeviceAsync() protected async Task OpenDeviceAsync(CancellationToken ct) { ExceptionDispatchInfo exInfo = null; - _m.OperationType = TelemetryMetrics.DeviceOperationOpen; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.DeviceOperationOpen); _sw.Restart(); try { @@ -110,8 +109,7 @@ public override Task RunTestAsync(CancellationToken ct) protected async Task SendMessageAsync(CancellationToken ct) { ExceptionDispatchInfo exInfo = null; - _m.OperationType = TelemetryMetrics.DeviceOperationSend; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.DeviceOperationSend); _sw.Restart(); try diff --git a/e2e/stress/IoTClientPerf/Scenarios/GenerateIotHubConfigTest.cs b/e2e/stress/IoTClientPerf/Scenarios/GenerateIotHubConfigTest.cs index 05c751cd2f..f20e319f83 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/GenerateIotHubConfigTest.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/GenerateIotHubConfigTest.cs @@ -24,11 +24,10 @@ static GenerateIotHubConfigTest() Console.WriteLine("2. Manually upload the `devices.txt` file to the BLOB. (One way is to use the Azure Portal.)"); Console.WriteLine("3. Get a container SAS key:"); Console.WriteLine($"\taz storage container generate-sas -n --account-name --account-key --permissions dlrw --expiry {(DateTime.Now.AddDays(1)).ToString("yyyy-MM-dd")} "); + Console.WriteLine("\tAlternatively use Microsoft Azure Storage Explorer to generate a full-access (IoT Hub needs both read and write), short lived SAS token URI."); Console.WriteLine("4. Import into IoT Hub:"); - Console.WriteLine("\taz iot hub device-identity import --hub-name --input-blob-container-uri \"https://.blob.core.windows.net/?(KEY_GENERATED_STEP_3)\" --output-blob-container-uri "); - Console.WriteLine("5. Monitor job progress:"); - Console.WriteLine("\taz iot hub job list --hub-name "); - Console.WriteLine("----------------------"); + Console.WriteLine("\tSet environment variable IOTHUB_IMPORTEXPORT_BLOB_URI="); + Console.WriteLine("\tIoTClientPerf -f import_iothub_config"); } public GenerateIotHubConfigTest(PerfScenarioConfig config) : base(config) diff --git a/e2e/stress/IoTClientPerf/Scenarios/HarnessBaseline.cs b/e2e/stress/IoTClientPerf/Scenarios/HarnessBaseline.cs index 4fbf7ccf8c..d387028dcc 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/HarnessBaseline.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/HarnessBaseline.cs @@ -24,7 +24,7 @@ public HarnessBaseline(PerfScenarioConfig config) : base(config) public override async Task SetupAsync(CancellationToken ct) { _sw.Restart(); - _m.OperationType = "baseline_setup"; + _m.Clear("baseline_setup"); await Task.Delay(100).ConfigureAwait(false); _m.ExecuteTime = _sw.ElapsedMilliseconds; _m.ScheduleTime = null; // sync operation @@ -34,7 +34,7 @@ public override async Task SetupAsync(CancellationToken ct) public override async Task RunTestAsync(CancellationToken ct) { _sw.Restart(); - _m.OperationType = "baseline_run"; + _m.Clear("baseline_run"); await Task.Delay(100).ConfigureAwait(false); _m.ExecuteTime = _sw.ElapsedMilliseconds; _m.ScheduleTime = null; // sync operation @@ -44,7 +44,7 @@ public override async Task RunTestAsync(CancellationToken ct) public override async Task TeardownAsync(CancellationToken ct) { _sw.Restart(); - _m.OperationType = "baseline_teardown"; + _m.Clear("baseline_teardown"); await Task.Delay(100).ConfigureAwait(false); _m.ExecuteTime = _sw.ElapsedMilliseconds; _m.ScheduleTime = null; // sync operation diff --git a/e2e/stress/IoTClientPerf/Scenarios/ImportIotHubConfigTest.cs b/e2e/stress/IoTClientPerf/Scenarios/ImportIotHubConfigTest.cs new file mode 100644 index 0000000000..a6f6edd10e --- /dev/null +++ b/e2e/stress/IoTClientPerf/Scenarios/ImportIotHubConfigTest.cs @@ -0,0 +1,67 @@ + +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using Microsoft.Azure.Devices.Client.Exceptions; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Azure.Devices.E2ETests.Scenarios +{ + public class ImportIotHubConfigTest : PerfScenario + { + public ImportIotHubConfigTest(PerfScenarioConfig c) : base(c) { } + + public override Task RunTestAsync(CancellationToken ct) + { + throw new OperationCanceledException(); + } + + public override async Task SetupAsync(CancellationToken ct) + { + if (_id != 0) return; + + try + { + + using (var registryManager = RegistryManager.CreateFromConnectionString(Configuration.IoTHub.ConnectionString)) + { + JobProperties importJob = await registryManager.ImportDevicesAsync( + Configuration.Stress.ImportExportBlobUri, + Configuration.Stress.ImportExportBlobUri).ConfigureAwait(false); + + // Wait until job is finished + while (true) + { + importJob = await registryManager.GetJobAsync(importJob.JobId).ConfigureAwait(false); + Console.WriteLine($"\rImport job '{importJob.JobId}' Status: {importJob.Status} Progress: {importJob.Progress}%] "); + + if (importJob.Status == JobStatus.Completed) return; + else if (importJob.Status == JobStatus.Failed || importJob.Status == JobStatus.Cancelled) + { + string error = $"Import job '{importJob.JobId}' failed ({importJob.Progress}% done). Status: {importJob.Status}, Reason: {importJob.FailureReason}"; + throw new IotHubException(error, isTransient: false); + } + + await Task.Delay(5000).ConfigureAwait(false); + } + } + } + catch (Exception ex) + { + Console.WriteLine($"\n\n{ex.Message}\n\n"); + await Task.Delay(10000).ConfigureAwait(false); + + throw; + } + } + + public override Task TeardownAsync(CancellationToken ct) + { + return Task.CompletedTask; + } + } +} diff --git a/e2e/stress/IoTClientPerf/Scenarios/ServiceClientScenarios.cs b/e2e/stress/IoTClientPerf/Scenarios/ServiceClientScenarios.cs index ef1d586d32..7cd69a736b 100644 --- a/e2e/stress/IoTClientPerf/Scenarios/ServiceClientScenarios.cs +++ b/e2e/stress/IoTClientPerf/Scenarios/ServiceClientScenarios.cs @@ -1,8 +1,11 @@ // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. +using Microsoft.Azure.Devices.Common.Exceptions; using System; using System.Diagnostics; +using System.Globalization; +using System.Net.WebSockets; using System.Runtime.ExceptionServices; using System.Threading; using System.Threading.Tasks; @@ -20,6 +23,11 @@ public abstract class ServiceClientScenario : PerfScenario // Separate metrics and time calculation for operations that can be parallelized. private const string TestMethodName = "PerfTestMethod"; private const int MethodPassStatus = 200; + private const int MethodConnectionTimeoutSeconds = 30; + private const int MethodResponseTimeoutSeconds = 30; + + private const int C2DExpiryTimeSeconds = 90; + private TelemetryMetrics _mMethod = new TelemetryMetrics(); private Stopwatch _swMethod = new Stopwatch(); @@ -42,13 +50,32 @@ public ServiceClientScenario(PerfScenarioConfig config) : base(config) protected void CreateServiceClient() { - if (_id == 0) s_sc = ServiceClient.CreateFromConnectionString(Configuration.IoTHub.ConnectionString); + if (_id != 0) return; + s_sc?.Dispose(); + + switch (_transport) + { + case Client.TransportType.Amqp_WebSocket_Only: + s_sc = ServiceClient.CreateFromConnectionString(Configuration.IoTHub.ConnectionString, TransportType.Amqp_WebSocket_Only); + break; + case Client.TransportType.Amqp_Tcp_Only: + s_sc = ServiceClient.CreateFromConnectionString(Configuration.IoTHub.ConnectionString, TransportType.Amqp); + break; + + case Client.TransportType.Amqp: + case Client.TransportType.Http1: + case Client.TransportType.Mqtt: + case Client.TransportType.Mqtt_WebSocket_Only: + case Client.TransportType.Mqtt_Tcp_Only: + default: + s_sc = ServiceClient.CreateFromConnectionString(Configuration.IoTHub.ConnectionString); + break; + } } protected async Task OpenServiceClientAsync(CancellationToken ct) { - _m.OperationType = TelemetryMetrics.ServiceOperationOpen; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.ServiceOperationOpen); _sw.Restart(); try { @@ -58,9 +85,15 @@ protected async Task OpenServiceClientAsync(CancellationToken ct) _sw.Restart(); await t.ConfigureAwait(false); } + catch (NullReferenceException ex) // TODO #708 - ServiceClient AMQP will continuously fail with NullRefException after fault. + { + CreateServiceClient(); + SetErrorMessage(_m, ex); + throw; + } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -72,22 +105,28 @@ protected async Task OpenServiceClientAsync(CancellationToken ct) protected async Task SendMessageAsync(CancellationToken ct) { - _m.OperationType = TelemetryMetrics.ServiceOperationSend; - _m.ScheduleTime = null; + _m.Clear(TelemetryMetrics.ServiceOperationSend); _sw.Restart(); try { var message = new Message(_messageBytes); + message.ExpiryTimeUtc = DateTime.UtcNow + TimeSpan.FromSeconds(C2DExpiryTimeSeconds); Task t = s_sc.SendAsync(Configuration.Stress.GetDeviceNameById(_id, _authType), message); _m.ScheduleTime = _sw.ElapsedMilliseconds; _sw.Restart(); await t.ConfigureAwait(false); } + catch (NullReferenceException ex) // TODO #708 - ServiceClient AMQP will continuously fail with NullRefException after fault. + { + CreateServiceClient(); + SetErrorMessage(_m, ex); + throw; + } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -99,15 +138,17 @@ protected async Task SendMessageAsync(CancellationToken ct) protected async Task CallMethodAsync(CancellationToken ct) { - _mMethod.ScheduleTime = null; - _mMethod.OperationType = TelemetryMetrics.ServiceOperationMethodCall; + _mMethod.Clear(TelemetryMetrics.ServiceOperationMethodCall); _swMethod.Restart(); try { string deviceId = Configuration.Stress.GetDeviceNameById(_id, _authType); - var methodCall = new CloudToDeviceMethod(TestMethodName); + var methodCall = new CloudToDeviceMethod( + methodName: TestMethodName, + responseTimeout: TimeSpan.FromSeconds(MethodResponseTimeoutSeconds), + connectionTimeout: TimeSpan.FromSeconds(MethodConnectionTimeoutSeconds)); methodCall.SetPayloadJson(_methodPayload); Task t = s_sc.InvokeDeviceMethodAsync(Configuration.Stress.GetDeviceNameById(_id, _authType), methodCall); _mMethod.ScheduleTime = _swMethod.ElapsedMilliseconds; @@ -118,12 +159,12 @@ protected async Task CallMethodAsync(CancellationToken ct) // Check method result. if (result.Status != MethodPassStatus) { - throw new InvalidOperationException($"IoTPerfClient: Status: {result.Status} Payload:{result.GetPayloadAsJson()}"); + throw new MethodCallFailedException($"IoTPerfClient: Status: {result.Status} Payload:{result.GetPayloadAsJson()}"); } } catch (Exception ex) { - _mMethod.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_mMethod, ex); throw; } finally @@ -133,12 +174,34 @@ protected async Task CallMethodAsync(CancellationToken ct) } } + private void SetErrorMessage(TelemetryMetrics m, Exception ex) + { + m.ErrorMessage = $"{ex.GetType().Name} id: {ResultWriter.IdOf(s_sc)} - {ex.Message}"; + if (IsFatalException(ex)) + { + throw new ParallelRunFatalException(ExceptionDispatchInfo.Capture(ex)); + } + } + + private bool IsFatalException(Exception ex) + { + // List of known exceptions: + if (ex is MethodCallFailedException || /* Method call exception */ + ex is DeviceNotFoundException || /* Returned when device is offline (during fault injection). */ + ex is DeviceMaximumQueueDepthExceededException || /* caused by the device not consuming C2D messages */ + ex is IotHubCommunicationException) /* POST operation timed out. */ + { + return false; + } + + return true; + } + protected async Task CloseAsync(CancellationToken ct) { if (s_sc == null) return; - _m.ScheduleTime = null; - _m.OperationType = TelemetryMetrics.ServiceOperationClose; + _m.Clear(TelemetryMetrics.ServiceOperationClose); _sw.Restart(); try @@ -147,7 +210,7 @@ protected async Task CloseAsync(CancellationToken ct) } catch (Exception ex) { - _m.ErrorMessage = $"{ex.GetType().Name} - {ex.Message}"; + SetErrorMessage(_m, ex); throw; } finally @@ -156,5 +219,10 @@ protected async Task CloseAsync(CancellationToken ct) await _writer.WriteAsync(_m).ConfigureAwait(false); } } + + public class MethodCallFailedException : Exception + { + public MethodCallFailedException(string message) : base(message) { } + } } } diff --git a/e2e/stress/IoTClientPerf/scripts/blockPortToHub.ps1 b/e2e/stress/IoTClientPerf/scripts/blockPortToHub.ps1 new file mode 100644 index 0000000000..5c9c63528b --- /dev/null +++ b/e2e/stress/IoTClientPerf/scripts/blockPortToHub.ps1 @@ -0,0 +1,67 @@ +#Requires -RunAsAdministrator + +<# +.SYNOPSIS + Blocks a port using Windows Firewall +.DESCRIPTION + Blocks a port for a specific IoT Hub using Windows Firewall. +.PARAMETER + +.INPUTS + +.OUTPUTS + .log> + +.EXAMPLE + +#> + +param ( + [string] $IotHubHostName = $null, + [int] $BlockDurationSeconds = 10, + [switch] $cleanOnly = $false +) + +if (-not $cleanOnly) +{ + if ($IotHubHostName -eq $null) + { + throw "-IotHubHostName is required." + } + + Write-Host -NoNewline "Getting IP address for $IotHubHostName . . . " + $resolveResponse = Resolve-DnsName -Name $IotHubHostName -Type A + $ipaddress = ($resolveResponse | Where-Object {$_.Type -eq "A"}).IPAddress + Write-Host $ipaddress +} + +$err = 0 + +try +{ + if (-not $cleanOnly) + { + Write-Host "`tBlocking HTTPS" + New-NetFirewallRule -DisplayName "IoTClientPerf HTTPS $($ipaddress):443" -Protocol TCP -Action Block -Direction Outbound -RemoteAddress $ipaddress -RemotePort 443 -ErrorAction Stop | Out-Null + Write-Host "`tBlocking AMQPS" + New-NetFirewallRule -DisplayName "IoTClientPerf MQTTS $($ipaddress):8883" -Protocol TCP -Action Block -Direction Outbound -RemoteAddress $ipaddress -RemotePort 8883 -ErrorAction Stop | Out-Null + Write-Host "`tBlocking MQTTS" + New-NetFirewallRule -DisplayName "IoTClientPerf AMQPS $($ipaddress):5671" -Protocol TCP -Action Block -Direction Outbound -RemoteAddress $ipaddress -RemotePort 5671 -ErrorAction Stop | Out-Null + + Write-Host -NoNewLine "Waiting $BlockDurationSeconds seconds . . ." + Start-Sleep $BlockDurationSeconds + Write-Host "Done" + } +} +catch +{ + Write-Error "An error occured: $_." + $err = 1 +} +finally +{ + Write-Host "Removing all firewall rules..." + Get-NetFirewallRule | Where-Object {$_.DisplayName -like "IoTClientPerf*"} | ForEach-Object { Write-Host "`t$($_.DisplayName)"; Remove-NetFirewallRule -DisplayName ($_.DisplayName) -ErrorAction Continue } +} + +exit $err diff --git a/e2e/stress/IoTClientPerf/scripts/runClient.ps1 b/e2e/stress/IoTClientPerf/scripts/runClient.ps1 index 7370801637..62c0e9636c 100644 --- a/e2e/stress/IoTClientPerf/scripts/runClient.ps1 +++ b/e2e/stress/IoTClientPerf/scripts/runClient.ps1 @@ -18,24 +18,73 @@ Param( $clients = 100, $protocol = "amqp", $connections = 10, - $outputFile = "client.csv", - $durationSeconds = 300 + $outputFile = "device$(Get-Date -format "yyyy-MM-dd'T'HH-mm").csv", + $durationSeconds = 300, + $scenario = "device_all", + [switch] $fault = $false, + $faultStartDelaySeconds = 60, + $faultDurationSeconds = 30 ) +function Test-Administrator +{ + $user = [Security.Principal.WindowsIdentity]::GetCurrent(); + (New-Object Security.Principal.WindowsPrincipal $user).IsInRole([Security.Principal.WindowsBuiltinRole]::Administrator) +} + $host.ui.RawUI.WindowTitle = "Azure IoT SDK: Device Stress" +$fileName = [io.path]::GetFileNameWithoutExtension($outputFile) +$filePath = [io.path]::GetDirectoryName($outputFile) +if ($filePath -eq "") { + $filePath = pwd +} + +if ($fault -and (-not (Test-Administrator))) +{ + Write-Error "Fault injection requires administrator rights. Run elevated or without -fault" + exit 1 +} + +Write-Host -ForegroundColor Cyan "`nDEVICE scenario: $scenario`n" +$out = Join-Path $filePath "$fileName.$($scenario).csv" + +$proc_device = Start-Process -NoNewWindow dotnet -ArgumentList "run --no-build -c Release -- -t $durationSeconds -o $out -p $protocol -n $clients -c $connections -f $scenario -s 2048" -PassThru -RedirectStandardError "$out.err" +$handle = $proc_device.Handle # Workaround to ensure we have the exit code -Write-Host -ForegroundColor Cyan "`nDEVICE: C2D`n" -& dotnet run --no-build -c Release -- -t $durationSeconds -o $outputFile -p $protocol -n $clients -c $connections -f device_c2d +if ($fault) +{ + $hubHostName = $Env:IOTHUB_CONN_STRING_CSHARP.Split(';')[0].Split('=')[1] + $scriptPath = $PSScriptRoot + + Write-Host -ForegroundColor Magenta "Fault requested after $faultStartDelaySeconds for $hubHostName" + Start-Sleep $faultStartDelaySeconds + $proc_fault = Start-Process -NoNewWindow powershell -ArgumentList "$(Join-Path $scriptPath 'blockPortToHub') -IotHubHostName $hubHostName -BlockDurationSeconds $faultDurationSeconds" -PassThru + $handle2 = $proc_fault.Handle # Workaround to ensure we have the exit code +} +Wait-Process $proc_device.Id -Write-Host -ForegroundColor Cyan "`nDEVICE: Methods`n" -& dotnet run --no-build -c Release -- -t $durationSeconds -o $outputFile -p $protocol -n $clients -c $connections -f device_method +$err = 0 +if ($proc_device.ExitCode -ne 0) +{ + Write-Host -ForegroundColor Red "ERROR: DeviceClient failed with exit code: $($proc_device.ExitCode)" + $err = $proc_device.ExitCode + foreach ($file in (ls *.err)) + { + Write-Host -ForegroundColor Red "ERRORS $file" + cat $file + Write-Host + } +} -Write-Host -ForegroundColor Cyan "`nDEVICE: All`n" -& dotnet run --no-build -c Release -- -t $durationSeconds -o $outputFile -p $protocol -n $clients -c $connections -f device_all +if ($fault -and ($proc_fault.ExitCode -ne 0)) +{ + Write-Host -ForegroundColor Red "ERROR: FaultInjection failed with exit code: $($proc_fault.ExitCode)" + $err = $proc_fault.ExitCode +} +rm -ErrorAction Continue "$out.err" -Write-Host -ForegroundColor Cyan "`nDEVICE: D2C`n" -& dotnet run --no-build -c Release -- -t $durationSeconds -o $outputFile -p $protocol -n $clients -c $connections -f device_d2c +exit $err diff --git a/e2e/stress/IoTClientPerf/scripts/runServer.ps1 b/e2e/stress/IoTClientPerf/scripts/runServer.ps1 index 39a5ba28b2..d12dab2fab 100644 --- a/e2e/stress/IoTClientPerf/scripts/runServer.ps1 +++ b/e2e/stress/IoTClientPerf/scripts/runServer.ps1 @@ -18,47 +18,130 @@ Param( $clients = 100, $protocol = "amqp", $connections = 10, - $outputFile = "service.csv", - $durationSeconds = 1800, - $type = $null + $outputFile = "service$(Get-Date -format "yyyy-MM-dd'T'HH-mm").csv", + $durationSeconds = 300, + $type = $null, + $nowait = $false ) $fileName = [io.path]::GetFileNameWithoutExtension($outputFile) $filePath = [io.path]::GetDirectoryName($outputFile) +if ($filePath -eq "") { + $filePath = pwd +} if ($type -eq $null) { + # Main + $scriptPath = $PSScriptRoot $scriptName = $MyInvocation.MyCommand.Name $script = Join-Path $scriptPath $scriptName Write-Host "Root: $PSScriptRoot name: $scriptName" - Start-Process powershell -ArgumentList "$script -clients $clients -protocol $protocol -connections $connections -outputFile $outputFile -durationSeconds $durationSeconds -type methods" - Start-Process powershell -ArgumentList "$script -clients $clients -protocol $protocol -connections $connections -outputFile $outputFile -durationSeconds $durationSeconds -type c2d" + Write-Host -NoNewline "`t Starting Methods..." + $proc_method = Start-Process -NoNewWindow powershell -ArgumentList "$script -clients $clients -protocol $protocol -connections $connections -outputFile $outputFile -durationSeconds $durationSeconds -type methods -nowait $nowait" -PassThru + $handle1 = $proc_method.Handle # Workaround to ensure we have the exit code + Write-Host "PID $($proc_method.Id)" + + Write-Host -NoNewline "`t Starting C2D..." + $proc_c2d = Start-Process -NoNewWindow powershell -ArgumentList "$script -clients $clients -protocol $protocol -connections $connections -outputFile $outputFile -durationSeconds $durationSeconds -type c2d -nowait $nowait" -PassThru + $handle2 = $proc_c2d.Handle # Workaround to ensure we have the exit code + Write-Host "PID $($proc_c2d.Id)" + + Write-Host -NoNewline "`t Waiting for processes to finish..." + Wait-Process -Id ($proc_method.Id, $proc_c2d.Id) + Write-Host "Done" + + $err = 0 + if ($proc_method.ExitCode -ne 0) + { + Write-Host -ForegroundColor Red "ERROR: ServiceClient Methods failed with exit code: $($proc_method.ExitCode)" + $err = $proc_method.ExitCode - exit + foreach ($file in (ls *.err)) + { + if ($file.Name -match "method") + { + Write-Host -ForegroundColor Red "ERRORS $file" + cat $file + Write-Host + } + } + } + + if ($proc_c2d.ExitCode -ne 0) + { + Write-Host -ForegroundColor Red "ERROR: ServiceClient C2D failed with exit code: $($proc_c2d.ExitCode)" + $err = $proc_c2d.ExitCode + + foreach ($file in (ls *.err)) + { + if ($file.Name -match "c2d") + { + Write-Host -ForegroundColor Red "ERRORS $file" + cat $file + Write-Host + } + } + } + + rm -ErrorAction Continue *.err + + exit $err #One of the methods or c2d error codes } elseif ($type -eq "methods") { + # Methods Fork + $host.ui.RawUI.WindowTitle = "Azure IoT SDK: Service Stress [Methods]" Write-Host -ForegroundColor Cyan "`nSERVICE: Methods`n" - $out = Join-Path $filePath "$fileName.method.csv" $scenario = "service_method" + $out = Join-Path $filePath "$fileName.$($scenario).csv" + if (Test-Path $out) + { + rm $out + } + } elseif ($type -eq "c2d") { + # C2D Fork + $host.ui.RawUI.WindowTitle = "Azure IoT SDK: Service Stress [C2D]" Write-Host -ForegroundColor Cyan "`nSERVICE: C2D`n" - $out = Join-Path $filePath "$fileName.c2d.csv" $scenario = "service_c2d" + $out = Join-Path $filePath "$fileName.$($scenario).csv" + + if (Test-Path $out) + { + rm $out + } } else { Write-Error "Unknown test type $type". } +# Fork (C2D/Methods) +$proc_sevice = Start-Process -NoNewWindow dotnet -ArgumentList "run --no-build -c Release -- -t $durationSeconds -o $out -p $protocol -n $clients -c $connections -f $scenario -s 2048" -PassThru -RedirectStandardError "$out.err" +$handle3 = $proc_sevice.Handle # Workaround to ensure we have the exit code +Wait-Process -Id $proc_sevice.Id + +if ($proc_sevice.ExitCode -ne 0) +{ + Write-Host -ForegroundColor Red "ERROR: ServiceClient failed with exit code: $($proc_sevice.ExitCode)" + $err = $proc_sevice.ExitCode + + Write-Host -ForegroundColor Red "ERRORS:" + cat "$out.err" +} + +if (-not $nowait) +{ + Read-Host "Press ENTER to close" +} -& dotnet run --no-build -c Release -- -t $durationSeconds -o $out -p $protocol -n $clients -c $connections -f $scenario -Read-Host "Press ENTER to close" +exit $proc_sevice.ExitCode diff --git a/e2e/stress/IoTClientPerf/scripts/test.ps1 b/e2e/stress/IoTClientPerf/scripts/test.ps1 new file mode 100644 index 0000000000..04aba569f6 --- /dev/null +++ b/e2e/stress/IoTClientPerf/scripts/test.ps1 @@ -0,0 +1,3 @@ +Start-Process -NoNewWindow powershell -ArgumentList "dir" +Start-Process -NoNewWindow powershell -ArgumentList "dir" + diff --git a/e2e/stress/IoTClientPerf/scripts/zipLogs.ps1 b/e2e/stress/IoTClientPerf/scripts/zipLogs.ps1 new file mode 100644 index 0000000000..e2fe6919a7 --- /dev/null +++ b/e2e/stress/IoTClientPerf/scripts/zipLogs.ps1 @@ -0,0 +1,19 @@ +$dataFolder = "./" +$files = dir (Join-Path $dataFolder "*.csv") + +$i = 0 +foreach ($file in $files) +{ + $i++ + $p = $i / $files.Length * 100 + Write-Progress -PercentComplete $p -Activity "Compressing log files" + + $outFile = "$($file).zip" + + if (-not (Test-Path $outFile)) + { + Compress-Archive -Path $file -DestinationPath $outFile + } +} + +$src = Join-Path $dataFolder "*.zip" diff --git a/e2e/test/E2ETests.csproj b/e2e/test/E2ETests.csproj index d8cf2201ba..a601cc783f 100644 --- a/e2e/test/E2ETests.csproj +++ b/e2e/test/E2ETests.csproj @@ -8,9 +8,6 @@ Microsoft.Azure.Devices.E2ETests Microsoft.Azure.Devices.E2ETests $(MSBuildProjectDirectory)\..\.. - - - $(RootDir)\common\test diff --git a/vsts/vstsPerf.yaml b/vsts/vstsPerf.yaml new file mode 100644 index 0000000000..a859f7a61c --- /dev/null +++ b/vsts/vstsPerf.yaml @@ -0,0 +1,169 @@ +name: $(BuildID)_$(BuildDefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r) +resources: +- repo: self + clean: true +#variables: + #SIM_DEVICE_COUNT: 100 + #SIM_PROTOCOL: amqp + #SIM_CONNECTIONS: 10 + #SIM_DURATION_SECONDS: 120 + #SIM_SCENARIO: device_all_noretry + #SIM_FAULT: yes + #SIM_FAULT_START_DELAY: 30 + #SIM_FAULT_DURATION: 30 + #KPI_CONNECTED_DEVICES_PERCENTAGE: 100 + #KPI_TCP_CONNECTIONS_PERCENTAGE: 100 + #KPI_RPS_AVG_MIN: 1500 + #KPI_RPS_STD_MAX: 2000 + #KPI_GC_MEM_MAX: 600000000 + #KPI_SUCCESS_DEVICE: 95 + #KPI_SUCCESS_SERVICE: 65 + #AZURE_IOT_LOCALPACKAGES: + +phases: + +### DeviceSim box ### +- phase: DEVICECLIENT_SIM + displayName: DeviceClientSim + + condition: succeeded() + queue: + name: csharp-perf + timeoutInMinutes: 180 + steps: + + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + ./scripts/blockPortToHub.ps1 -cleanOnly + displayName: Remove all test firewall rules + + + - script: | + dotnet new + cd e2e\stress\IoTClientPerf\ + dotnet build -c Release + displayName: build + env: + AZURE_IOT_LOCALPACKAGES: $(AZURE_IOT_LOCALPACKAGES) + + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + + if ($env:SIM_FAULT -eq "yes") + { + ./scripts/runClient.ps1 -clients $env:SIM_DEVICE_COUNT -protocol $env:SIM_PROTOCOL -connections $env:SIM_CONNECTIONS -durationSeconds $env:SIM_DURATION_SECONDS -scenario $env:SIM_SCENARIO -fault -faultStartDelaySeconds $env:SIM_FAULT_START_DELAY -faultDurationSeconds $env:SIM_FAULT_DURATION + } + else + { + ./scripts/runClient.ps1 -clients $env:SIM_DEVICE_COUNT -protocol $env:SIM_PROTOCOL -connections $env:SIM_CONNECTIONS -durationSeconds $env:SIM_DURATION_SECONDS -scenario $env:SIM_SCENARIO + } + + displayName: runDeviceClient + env: + IOTHUB_CONN_STRING_CSHARP: $(IOTHUB-CONN-STRING-CSHARP) + IOTHUB_X509_PFX_CERTIFICATE: $(IOTHUB-X509-PFX-CERTIFICATE) + IOTHUB_DEVICE_CONN_STRING: $(IOTHUB-DEVICE-CONN-STRING) + IOTHUB_DEVICE_CONN_STRING2: $(IOTHUB-DEVICE-CONN-STRING2) + SIM_DEVICE_COUNT: $(SIM_DEVICE_COUNT) + SIM_PROTOCOL: $(SIM_PROTOCOL) + SIM_CONNECTIONS: $(SIM_CONNECTIONS) + SIM_DURATION_SECONDS: $(SIM_DURATION_SECONDS) + SIM_SCENARIO: $(SIM_SCENARIO) + SIM_FAULT: $(SIM_FAULT) + SIM_FAULT_START_DELAY: $(SIM_FAULT_START_DELAY) + SIM_FAULT_DURATION: $(SIM_FAULT_START_DELAY) + IOTHUB_PERF_CONNECTED_PERCENTAGE: $(KPI_CONNECTED_DEVICES_PERCENTAGE) + IOTHUB_PERF_TCP_PERCENTAGE: $(KPI_TCP_CONNECTIONS_PERCENTAGE) + IOTHUB_PERF_RPS_MIN_AVG: $(KPI_RPS_AVG_MIN) + IOTHUB_PERF_RPS_MAX_STD: $(KPI_RPS_STD_MAX) + IOTHUB_PERF_GC_MEM_BYTES_MAX: $(KPI_GC_MEM_MAX) + IOTHUB_PERF_SUCCESS_RATE_PERCENTAGE: $(KPI_SUCCESS_DEVICE) + + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + ./scripts/zipLogs.ps1 + displayName: zipLogs + condition: always() + + - task: CopyFiles@2 + displayName: 'Copy files to the artifacts folder' + inputs: + SourceFolder: '$(Build.SourcesDirectory)' + + Contents: '**/*.zip' + + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + condition: always() + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: perflogs_device' + inputs: + ArtifactName: perflogs_device + + condition: always() + +### ServiceSim box ### +- phase: SERVICECLIENT_SIM + displayName: ServiceClientSim + + condition: succeeded() + queue: + name: csharp-perf + timeoutInMinutes: 180 + steps: + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + ./scripts/blockPortToHub.ps1 -cleanOnly + displayName: Remove all test firewall rules + + - script: | + dotnet new + cd e2e\stress\IoTClientPerf\ + dotnet build -c Release + displayName: build + env: + AZURE_IOT_LOCALPACKAGES: $(AZURE_IOT_LOCALPACKAGES) + + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + ./scripts/runServer.ps1 -clients $env:SIM_DEVICE_COUNT -durationSeconds $env:SIM_DURATION_SECONDS + displayName: runServiceClient + env: + IOTHUB_CONN_STRING_CSHARP: $(IOTHUB-CONN-STRING-CSHARP) + IOTHUB_X509_PFX_CERTIFICATE: $(IOTHUB-X509-PFX-CERTIFICATE) + IOTHUB_DEVICE_CONN_STRING: $(IOTHUB-DEVICE-CONN-STRING) + IOTHUB_DEVICE_CONN_STRING2: $(IOTHUB-DEVICE-CONN-STRING2) + SIM_DEVICE_COUNT: $(SIM_DEVICE_COUNT) + SIM_PROTOCOL: $(SIM_PROTOCOL) + SIM_CONNECTIONS: $(SIM_CONNECTIONS) + SIM_DURATION_SECONDS: $(SIM_DURATION_SECONDS) + SIM_SCENARIO: $(SIM_SCENARIO) + SIM_FAULT: $(SIM_FAULT) + SIM_FAULT_START_DELAY: $(SIM_FAULT_START_DELAY) + SIM_FAULT_DURATION: $(SIM_FAULT_START_DELAY) + IOTHUB_PERF_SUCCESS_RATE_PERCENTAGE: $(KPI_SUCCESS_SERVICE) + + - powershell: | + cd ./e2e/stress/IoTClientPerf/ + ./scripts/zipLogs.ps1 + displayName: zipLogs + condition: always() + + - task: CopyFiles@2 + displayName: 'Copy files to the artifacts folder' + inputs: + SourceFolder: '$(Build.SourcesDirectory)' + + Contents: '**/*.zip' + + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + condition: always() + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: perflogs_service' + inputs: + ArtifactName: perflogs_service + + condition: always()