Skip to content

Commit

Permalink
Stress/perf test automation: devops yaml, fault injection via Windows
Browse files Browse the repository at this point in the history
Firewall, adding KPI and exception faulting.
  • Loading branch information
CIPop committed Oct 1, 2019
1 parent 9b8897c commit 27c745e
Show file tree
Hide file tree
Showing 27 changed files with 866 additions and 324 deletions.
37 changes: 37 additions & 0 deletions e2e/stress/IoTClientPerf/Configuration.Stress.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System;
using System.Globalization;
using System.Security.Cryptography.X509Certificates;

namespace Microsoft.Azure.Devices.E2ETests
Expand All @@ -25,6 +26,36 @@ public static partial class Stress

private static Lazy<X509Certificate2> s_cert = new Lazy<X509Certificate2>(() => { return Configuration.IoTHub.GetCertificateWithPrivateKey(); });

/// <summary>
/// Gets the import export BLOB URI.
/// </summary>
public static string ImportExportBlobUri => GetValue("IOTHUB_IMPORTEXPORT_BLOB_URI");

/// <summary>
/// Gets the connected devices percentage expected by the runner after the test ended.
/// </summary>
public static long? ConnectedDevicesPercentage => ParseNullable(GetValue("IOTHUB_PERF_CONNECTED_PERCENTAGE", ""));

/// <summary>
/// Gets the connected devices percentage expected by the runner after the test ended.
/// </summary>
public static long? TcpConnectionsPercentage => ParseNullable(GetValue("IOTHUB_PERF_TCP_PERCENTAGE", ""));

/// <summary>
/// Gets the requests per second minimum average after the test ended.
/// </summary>
public static long? RequestsPerSecondMinAvg => ParseNullable(GetValue("IOTHUB_PERF_RPS_MIN_AVG", ""));

/// <summary>
/// Gets the requests per second minimum standard deviation after the test ended.
/// </summary>
public static long? RequestsPerSecondMaxStd => ParseNullable(GetValue("IOTHUB_PERF_RPS_MAX_STD", ""));

/// <summary>
/// Gets the requests per second minimum standard deviation after the test ended.
/// </summary>
public static long? GCMemoryBytes => ParseNullable(GetValue("IOTHUB_PERF_GC_MEM_BYTES_MAX", ""));

public static string GetDeviceNameById(int id, string authType)
{
return $"{NamePrefix}_{authType}_{id}";
Expand All @@ -43,6 +74,12 @@ public static string GetConnectionStringById(int id, string authType)
public static string Key2 => s_key2.Value;

public static X509Certificate2 Certificate => s_cert.Value;

private static long? ParseNullable(string s)
{
if (long.TryParse(s, out long l)) return l;
return null;
}
}
}
}
22 changes: 15 additions & 7 deletions e2e/stress/IoTClientPerf/IoTClientPerf.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,27 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.1</TargetFramework>
<TargetFramework>netcoreapp2.2</TargetFramework>
<RootNamespace>Microsoft.Azure.Devices.E2ETests</RootNamespace>
<RootDir>$(MSBuildProjectDirectory)\..\..\..</RootDir>
<CommonTest>$(RootDir)\common\test</CommonTest>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\..\..\common\test\Configuration.cs" Link="Configuration.cs" />
<Compile Include="..\..\..\common\test\Configuration.IoTHub.cs" Link="Configuration.IoTHub.cs" />
<Compile Include="$(CommonTest)\Configuration.cs" Link="Configuration.cs" />
<Compile Include="$(CommonTest)\Configuration.IoTHub.cs" Link="Configuration.IoTHub.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\iothub\device\src\Microsoft.Azure.Devices.Client.csproj" />
<ProjectReference Include="..\..\..\iothub\service\src\Microsoft.Azure.Devices.csproj" />
<ProjectReference Include="..\..\..\shared\src\Microsoft.Azure.Devices.Shared.csproj" />
<ItemGroup Condition=" '$(AZURE_IOT_LOCALPACKAGES)' == '' ">
<ProjectReference Include="$(RootDir)\iothub\device\src\Microsoft.Azure.Devices.Client.csproj" />
<ProjectReference Include="$(RootDir)\iothub\service\src\Microsoft.Azure.Devices.csproj" />
<ProjectReference Include="$(RootDir)\shared\src\Microsoft.Azure.Devices.Shared.csproj" />
</ItemGroup>

<ItemGroup Condition=" '$(AZURE_IOT_LOCALPACKAGES)' != '' ">
<PackageReference Include="Microsoft.Azure.Devices" Version="1.*" />
<PackageReference Include="Microsoft.Azure.Devices.Shared" Version="1.*" />
<PackageReference Include="Microsoft.Azure.Devices.Client" Version="1.*" />
</ItemGroup>

</Project>
46 changes: 40 additions & 6 deletions e2e/stress/IoTClientPerf/ParallelRun.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.ExceptionServices;
using System.Threading;
using System.Threading.Tasks;

Expand Down Expand Up @@ -43,10 +45,15 @@ public ParallelRun(

public async Task RunAsync(bool runOnce, CancellationToken ct)
{
int cursor_left, cursor_top;
cursor_left = Console.CursorLeft;
cursor_top = Console.CursorTop;
int cursor_left = 0, cursor_top = 0;

try
{
cursor_left = Console.CursorLeft;
cursor_top = Console.CursorTop;
}
catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ }

int actualParallel = Math.Min(_parallelOperations, _tests.Length);
int currentInstance = 0;

Expand Down Expand Up @@ -82,6 +89,14 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
break;
case TaskStatus.Faulted:
statInterimFaulted++;
foreach (Exception ex in finished.Exception.InnerExceptions)
{
if (ex is ParallelRunFatalException)
{
// Crash the process to simplify analysis. Recover original stack.
((ParallelRunFatalException)ex).ThrowInner();
}
}
break;
case TaskStatus.RanToCompletion:
statInterimCompleted++;
Expand All @@ -103,9 +118,13 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
double statInterimSeconds = statInterimSw.Elapsed.TotalSeconds;
statTotalCompleted += statInterimCompleted;

Console.SetCursorPosition(cursor_left, cursor_top);
cursor_left = Console.CursorLeft;
cursor_top = Console.CursorTop;
try
{
Console.SetCursorPosition(cursor_left, cursor_top);
cursor_left = Console.CursorLeft;
cursor_top = Console.CursorTop;
}
catch (IOException) { /* Avoid "The handle is invalid" exception in DevOps */ }

_updateStatistics(statInterimCompleted, statInterimFaulted, statInterimCancelled, statInterimSeconds);
if (drain) Console.Write("Waiting for tasks to finish...\r");
Expand Down Expand Up @@ -142,4 +161,19 @@ public async Task RunAsync(bool runOnce, CancellationToken ct)
}
}
}

public class ParallelRunFatalException : Exception
{
private ExceptionDispatchInfo _exceptionDispatchInfo;

public ParallelRunFatalException(ExceptionDispatchInfo innerExceptionDispatchInfo)
{
_exceptionDispatchInfo = innerExceptionDispatchInfo;
}

public void ThrowInner()
{
_exceptionDispatchInfo.Throw();
}
}
}
77 changes: 62 additions & 15 deletions e2e/stress/IoTClientPerf/PerfTestRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ public class PerfTestRunner
private readonly int _timeSeconds;
private readonly Func<PerfScenarioConfig, PerfScenario> _scenarioFactory;

private PerfScenario[] _tests;
private Stopwatch _sw = new Stopwatch();
private readonly PerfScenario[] _tests;
private readonly Stopwatch _sw = new Stopwatch();

public PerfTestRunner(
ResultWriter writer,
Expand Down Expand Up @@ -80,8 +80,9 @@ private void FilterTcpStatistics()
}
}

public async Task RunTestAsync()
public async Task<int> RunTestAsync()
{
int ret = 0;
_sw.Restart();

try
Expand All @@ -92,14 +93,16 @@ public async Task RunTestAsync()
catch (OperationCanceledException)
{
Console.WriteLine($"Setup FAILED (timeout:{_sw.Elapsed})");
ret = 1;
return ret;
}

_sw.Restart();
Console.WriteLine();

try
{
await LoopAsync().ConfigureAwait(false);
ret = await LoopAsync().ConfigureAwait(false);
}
catch (OperationCanceledException)
{
Expand All @@ -111,16 +114,22 @@ public async Task RunTestAsync()

await TeardownAllAsync().ConfigureAwait(false);
Console.WriteLine("Done. ");

return ret;
}

private async Task LoopAsync()
private async Task<int> LoopAsync()
{
using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(_timeSeconds)))
{
ulong statTotalCompleted = 0;
ulong statTotalFaulted = 0;
ulong statTotalCancelled = 0;
double statTotalSeconds = 0.0;
int cpuLoad = 0;
long memoryBytes = 0, gcBytes = 0, tcpConn = 0, devConn = 0;
double avgRps = 0.0, stdDevRps = 0.0;

List<double> statRps = new List<double>();

var runner = new ParallelRun(
Expand All @@ -145,23 +154,61 @@ private async Task LoopAsync()
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;
double totalTransferPerSec = totalRequestsPerSec * _messageSizeBytes;

(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
(avgRps, stdDevRps) = CalculateAvgAndStDev(statRps);
double avgBps = avgRps * _messageSizeBytes;
double stdDevBps = stdDevRps * _messageSizeBytes;
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);

SystemMetrics.GetMetrics(out cpuLoad, out memoryBytes, out gcBytes, out tcpConn, out devConn);
Console.WriteLine($"[{_sw.Elapsed}] Loop Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(avgRps)}/s ");
Console.WriteLine($"Throughput: {GetHumanReadableBytes(transferPerSec)}/s Avg: {GetHumanReadableBytes(avgBps)}/s +/-StdDev: {GetHumanReadableBytes(stdDevBps)}/s ");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Console.WriteLine($"Data : {GetHumanReadableBytes(statTotalCompleted * (ulong)_messageSizeBytes)} ");
});

await runner.RunAsync(runOnce: false, ct: cts.Token).ConfigureAwait(false);

Console.WriteLine();
int ret = 0;
float? expectedDeviceConn = (float)_n * Configuration.Stress.ConnectedDevicesPercentage / 100;
float? expectedTcpConn = (float)_poolSize * Configuration.Stress.TcpConnectionsPercentage / 100;

if (expectedDeviceConn.HasValue && (devConn < expectedDeviceConn))
{
Console.Error.WriteLine($"FAILED KPI: Connected Devices. Expected: >{expectedDeviceConn}; Actual: {devConn}.");
ret = 1;
}

if (expectedTcpConn.HasValue && (tcpConn != expectedTcpConn)) // Ensure all are connected and no connection leaks exist.
{
Console.Error.WriteLine($"FAILED KPI: TCP Connections. Expected: ={expectedTcpConn}; Actual: {tcpConn}.");
ret = 2;
}

if (Configuration.Stress.RequestsPerSecondMinAvg.HasValue && (avgRps < Configuration.Stress.RequestsPerSecondMinAvg))
{
Console.Error.WriteLine($"FAILED KPI: RPS Average. Expected: >{Configuration.Stress.RequestsPerSecondMinAvg}; Actual: {avgRps}.");
ret = 3;
}

if (Configuration.Stress.RequestsPerSecondMaxStd.HasValue && (stdDevRps > Configuration.Stress.RequestsPerSecondMaxStd))
{
Console.Error.WriteLine($"FAILED KPI: RPS StdDev. Expected: <{Configuration.Stress.RequestsPerSecondMaxStd}; Actual: {stdDevRps}.");
ret = 4;
}

if (Configuration.Stress.GCMemoryBytes.HasValue && (gcBytes > Configuration.Stress.GCMemoryBytes))
{
Console.Error.WriteLine($"FAILED KPI: GC Memory. Expected: <{GetHumanReadableBytes(Configuration.Stress.GCMemoryBytes.Value)}; Actual: {GetHumanReadableBytes(gcBytes)}.");
ret = 5;
}

if (ret != 0) Console.WriteLine("^^^^^^^^^^^^^^^^^^^\n");
return ret;
}
}

Expand Down Expand Up @@ -211,12 +258,12 @@ private async Task SetupAllAsync()
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;

(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);

Console.WriteLine($"[{_sw.Elapsed}] Setup Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Expand Down Expand Up @@ -257,13 +304,13 @@ private async Task TeardownAllAsync()
double totalRequestsPerSec = statTotalCompleted / statTotalSeconds;

(double avgRps, double stdDevRps) = CalculateAvgAndStDev(statRps);
SystemMetrics.GetMetrics(out int cpuPercent, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);
SystemMetrics.GetMetrics(out int cpuLoad, out long memoryBytes, out long gcBytes, out long tcpConn, out long devConn);


Console.WriteLine($"[{_sw.Elapsed}] Teardown Statistics:");
Console.WriteLine($"RPS : {requestsPerSec,10:N2} R/s Avg: {avgRps,10:N2} R/s +/-StdDev: {stdDevRps,10:N2} R/s");
Console.WriteLine($"Connected : {devConn,10:N0} ");
Console.WriteLine($"CPU : {cpuPercent,10:N2}% Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine($"CPU Load : {(float)cpuLoad/100,10:N2} Mem: {GetHumanReadableBytes(memoryBytes)} GC_Mem: {GetHumanReadableBytes(gcBytes)} TCP: {tcpConn,4:N0}");
Console.WriteLine("----");
Console.WriteLine($"TOTALs: ");
Console.WriteLine($"Requests : Completed: {statTotalCompleted,10:N0} Faulted: {statTotalFaulted,10:N0} Cancelled: {statTotalCancelled,10:N0}");
Expand Down
Loading

0 comments on commit 27c745e

Please sign in to comment.