Skip to content

Commit

Permalink
Add codegen flag 'replaceInvalidUtf8' to allow server SDK to replace …
Browse files Browse the repository at this point in the history
…invalid UTF-8 characters with �
  • Loading branch information
Fahad Zubair committed Feb 4, 2025
1 parent b36447e commit 8a00258
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 82 deletions.
11 changes: 11 additions & 0 deletions .changelog/1738663351.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
applies_to:
- server
authors:
- drganjoo
references: []
breaking: false
new_feature: true
bug_fix: false
---
Allow invalid UTF-8 characters to be replaced with �
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import software.amazon.smithy.model.traits.EnumTrait
import software.amazon.smithy.model.traits.SparseTrait
import software.amazon.smithy.model.traits.TimestampFormatTrait
import software.amazon.smithy.rust.codegen.core.rustlang.Attribute
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency
import software.amazon.smithy.rust.codegen.core.rustlang.RustWriter
import software.amazon.smithy.rust.codegen.core.rustlang.Writable
import software.amazon.smithy.rust.codegen.core.rustlang.escape
Expand Down Expand Up @@ -102,30 +101,30 @@ class JsonParserGenerator(
ReturnSymbolToParse(codegenContext.symbolProvider.toSymbol(shape), false)
},
private val customizations: List<JsonParserCustomization> = listOf(),
smithyJsonWithFeatureFlag: RuntimeType = RuntimeType.smithyJson(codegenContext.runtimeConfig),
) : StructuredDataParserGenerator {
private val model = codegenContext.model
private val symbolProvider = codegenContext.symbolProvider
private val runtimeConfig = codegenContext.runtimeConfig
private val codegenTarget = codegenContext.target
private val smithyJson = CargoDependency.smithyJson(runtimeConfig).toType()
private val protocolFunctions = ProtocolFunctions(codegenContext)
private val builderInstantiator = codegenContext.builderInstantiator()
private val codegenScope =
arrayOf(
"Error" to smithyJson.resolve("deserialize::error::DeserializeError"),
"expect_blob_or_null" to smithyJson.resolve("deserialize::token::expect_blob_or_null"),
"expect_bool_or_null" to smithyJson.resolve("deserialize::token::expect_bool_or_null"),
"expect_document" to smithyJson.resolve("deserialize::token::expect_document"),
"expect_number_or_null" to smithyJson.resolve("deserialize::token::expect_number_or_null"),
"expect_start_array" to smithyJson.resolve("deserialize::token::expect_start_array"),
"expect_start_object" to smithyJson.resolve("deserialize::token::expect_start_object"),
"expect_string_or_null" to smithyJson.resolve("deserialize::token::expect_string_or_null"),
"expect_timestamp_or_null" to smithyJson.resolve("deserialize::token::expect_timestamp_or_null"),
"json_token_iter" to smithyJson.resolve("deserialize::json_token_iter"),
"Error" to smithyJsonWithFeatureFlag.resolve("deserialize::error::DeserializeError"),
"expect_blob_or_null" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_blob_or_null"),
"expect_bool_or_null" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_bool_or_null"),
"expect_document" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_document"),
"expect_number_or_null" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_number_or_null"),
"expect_start_array" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_start_array"),
"expect_start_object" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_start_object"),
"expect_string_or_null" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_string_or_null"),
"expect_timestamp_or_null" to smithyJsonWithFeatureFlag.resolve("deserialize::token::expect_timestamp_or_null"),
"json_token_iter" to smithyJsonWithFeatureFlag.resolve("deserialize::json_token_iter"),
"Peekable" to RuntimeType.std.resolve("iter::Peekable"),
"skip_value" to smithyJson.resolve("deserialize::token::skip_value"),
"skip_to_end" to smithyJson.resolve("deserialize::token::skip_to_end"),
"Token" to smithyJson.resolve("deserialize::Token"),
"skip_value" to smithyJsonWithFeatureFlag.resolve("deserialize::token::skip_value"),
"skip_to_end" to smithyJsonWithFeatureFlag.resolve("deserialize::token::skip_to_end"),
"Token" to smithyJsonWithFeatureFlag.resolve("deserialize::Token"),
"or_empty" to orEmptyJson(),
*preludeScope,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package software.amazon.smithy.rust.codegen.core.testutil
import software.amazon.smithy.build.PluginContext
import software.amazon.smithy.model.Model
import software.amazon.smithy.model.node.ObjectNode
import software.amazon.smithy.model.node.ToNode
import software.amazon.smithy.rust.codegen.core.smithy.RuntimeConfig
import software.amazon.smithy.rust.codegen.core.util.runCommand
import java.io.File
Expand Down Expand Up @@ -50,87 +51,78 @@ data class IntegrationTestParams(
sealed class AdditionalSettings {
abstract fun toObjectNode(): ObjectNode

abstract class CoreAdditionalSettings protected constructor(val settings: List<AdditionalSettings>) : AdditionalSettings() {
override fun toObjectNode(): ObjectNode {
val merged =
settings.map { it.toObjectNode() }
.reduce { acc, next -> acc.merge(next) }

return ObjectNode.builder()
.withMember("codegen", merged)
companion object {
private fun Map<String, Any>.toCodegenObjectNode(): ObjectNode =
ObjectNode.builder()
.withMember(
"codegen",
ObjectNode.builder().apply {
forEach { (key, value) ->
when (value) {
is Boolean -> withMember(key, value)
is Number -> withMember(key, value)
is String -> withMember(key, value)
is ToNode -> withMember(key, value)
else -> throw IllegalArgumentException("Unsupported type for key $key: ${value::class}")
}
}
}.build(),
)
.build()
}

abstract class Builder<T : CoreAdditionalSettings> : AdditionalSettings() {
protected val settings = mutableListOf<AdditionalSettings>()
}

fun generateCodegenComments(debugMode: Boolean = true): Builder<T> {
settings.add(GenerateCodegenComments(debugMode))
return this
}
abstract class CoreAdditionalSettings protected constructor(
private val settings: Map<String, Any>,
) : AdditionalSettings() {
override fun toObjectNode(): ObjectNode = settings.toCodegenObjectNode()

abstract fun build(): T
abstract class Builder<T : CoreAdditionalSettings> : AdditionalSettings() {
protected val settings = mutableMapOf<String, Any>()

override fun toObjectNode(): ObjectNode = build().toObjectNode()
}
fun generateCodegenComments(debugMode: Boolean = true) =
apply {
settings["debugMode"] = debugMode
}

// Core settings that are common to both Servers and Clients should be defined here.
data class GenerateCodegenComments(val debugMode: Boolean) : AdditionalSettings() {
override fun toObjectNode(): ObjectNode =
ObjectNode.builder()
.withMember("debugMode", debugMode)
.build()
override fun toObjectNode(): ObjectNode = settings.toCodegenObjectNode()
}
}
}

class ClientAdditionalSettings private constructor(settings: List<AdditionalSettings>) :
AdditionalSettings.CoreAdditionalSettings(settings) {
class Builder : CoreAdditionalSettings.Builder<ClientAdditionalSettings>() {
override fun build(): ClientAdditionalSettings = ClientAdditionalSettings(settings)
}

// Additional settings that are specific to client generation should be defined here.

companion object {
fun builder() = Builder()
}
}

class ServerAdditionalSettings private constructor(settings: List<AdditionalSettings>) :
AdditionalSettings.CoreAdditionalSettings(settings) {
class Builder : CoreAdditionalSettings.Builder<ServerAdditionalSettings>() {
fun publicConstrainedTypes(enabled: Boolean = true): Builder {
settings.add(PublicConstrainedTypes(enabled))
return this
class ServerAdditionalSettings private constructor(
settings: Map<String, Any>,
) : AdditionalSettings.CoreAdditionalSettings(settings) {
class Builder : CoreAdditionalSettings.Builder<ServerAdditionalSettings>() {
fun publicConstrainedTypes(enabled: Boolean = true) =
apply {
settings["publicConstrainedTypes"] = enabled
}

fun addValidationExceptionToConstrainedOperations(enabled: Boolean = true): Builder {
settings.add(AddValidationExceptionToConstrainedOperations(enabled))
return this
fun addValidationExceptionToConstrainedOperations(enabled: Boolean = true) =
apply {
settings["addValidationExceptionToConstrainedOperations"] = enabled
}

override fun build(): ServerAdditionalSettings = ServerAdditionalSettings(settings)
}
fun replaceInvalidUtf8(enabled: Boolean = true) =
apply {
settings["replaceInvalidUtf8"] = enabled
}
}

private data class PublicConstrainedTypes(val enabled: Boolean) : AdditionalSettings() {
override fun toObjectNode(): ObjectNode =
ObjectNode.builder()
.withMember("publicConstrainedTypes", enabled)
.build()
}
companion object {
fun builder() = Builder()
}
}

private data class AddValidationExceptionToConstrainedOperations(val enabled: Boolean) : AdditionalSettings() {
override fun toObjectNode(): ObjectNode =
ObjectNode.builder()
.withMember("addValidationExceptionToConstrainedOperations", enabled)
.build()
}
class ClientAdditionalSettings private constructor(
settings: Map<String, Any>,
) : AdditionalSettings.CoreAdditionalSettings(settings) {
class Builder : CoreAdditionalSettings.Builder<ClientAdditionalSettings>()

companion object {
fun builder() = Builder()
}
companion object {
fun builder() = Builder()
}
}

/**
* Run cargo test on a true, end-to-end, codegen product of a given model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ data class ServerCodegenConfig(
*/
val experimentalCustomValidationExceptionWithReasonPleaseDoNotUse: String? = defaultExperimentalCustomValidationExceptionWithReasonPleaseDoNotUse,
val addValidationExceptionToConstrainedOperations: Boolean = DEFAULT_ADD_VALIDATION_EXCEPTION_TO_CONSTRAINED_OPERATIONS,
val replaceInvalidUtf8: Boolean = DEFAULT_REPLACE_INVALID_UTF8,
) : CoreCodegenConfig(
formatTimeoutSeconds, debugMode,
) {
Expand All @@ -105,6 +106,7 @@ data class ServerCodegenConfig(
private const val DEFAULT_IGNORE_UNSUPPORTED_CONSTRAINTS = false
private val defaultExperimentalCustomValidationExceptionWithReasonPleaseDoNotUse = null
private const val DEFAULT_ADD_VALIDATION_EXCEPTION_TO_CONSTRAINED_OPERATIONS = false
private const val DEFAULT_REPLACE_INVALID_UTF8 = false

fun fromCodegenConfigAndNode(
coreCodegenConfig: CoreCodegenConfig,
Expand All @@ -117,6 +119,7 @@ data class ServerCodegenConfig(
ignoreUnsupportedConstraints = node.get().getBooleanMemberOrDefault("ignoreUnsupportedConstraints", DEFAULT_IGNORE_UNSUPPORTED_CONSTRAINTS),
experimentalCustomValidationExceptionWithReasonPleaseDoNotUse = node.get().getStringMemberOrDefault("experimentalCustomValidationExceptionWithReasonPleaseDoNotUse", defaultExperimentalCustomValidationExceptionWithReasonPleaseDoNotUse),
addValidationExceptionToConstrainedOperations = node.get().getBooleanMemberOrDefault("addValidationExceptionToConstrainedOperations", DEFAULT_ADD_VALIDATION_EXCEPTION_TO_CONSTRAINED_OPERATIONS),
replaceInvalidUtf8 = node.get().getBooleanMemberOrDefault("replaceInvalidUtf8", DEFAULT_REPLACE_INVALID_UTF8),
)
} else {
ServerCodegenConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ fun jsonParserGenerator(
codegenContext: ServerCodegenContext,
httpBindingResolver: HttpBindingResolver,
jsonName: (MemberShape) -> String,
additionalParserCustomizations: List<JsonParserCustomization> = listOf(),
additionalParserCustomizations: List<JsonParserCustomization> = listOf()
): JsonParserGenerator =
JsonParserGenerator(
codegenContext,
Expand All @@ -144,6 +144,15 @@ fun jsonParserGenerator(
listOf(
ServerRequestBeforeBoxingDeserializedMemberConvertToMaybeConstrainedJsonParserCustomization(codegenContext),
) + additionalParserCustomizations,
smithyJsonWithFeatureFlag =
if (codegenContext.settings.codegenConfig.replaceInvalidUtf8) {
CargoDependency.smithyJson(codegenContext.runtimeConfig)
.copy(features = setOf("replace-invalid-utf8"))
.toType()
} else {
RuntimeType.smithyJson(codegenContext.runtimeConfig)
},
,
)

class ServerAwsJsonProtocol(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package software.amazon.smithy.rust.codegen.server.smithy

import org.junit.jupiter.api.Test
import software.amazon.smithy.rust.codegen.core.rustlang.rust
import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate
import software.amazon.smithy.rust.codegen.core.testutil.IntegrationTestParams
import software.amazon.smithy.rust.codegen.core.testutil.ServerAdditionalSettings
import software.amazon.smithy.rust.codegen.core.testutil.asSmithyModel
import software.amazon.smithy.rust.codegen.core.testutil.testModule
import software.amazon.smithy.rust.codegen.server.smithy.testutil.serverIntegrationTest

internal class ReplaceInvalidUtf8Test {
val model =
"""
namespace test
use aws.protocols#restJson1
@restJson1
service SampleService {
operations: [SampleOperation]
}
@http(uri: "/operation", method: "PUT")
operation SampleOperation {
input := {
x : String
}
}
""".asSmithyModel(smithyVersion = "2")

@Test
fun `invalid utf8 should be replaced if the codegen flag is set`() {
serverIntegrationTest(
model,
IntegrationTestParams(
additionalSettings =
ServerAdditionalSettings
.builder()
.replaceInvalidUtf8(true)
.toObjectNode(),
),
) { _, rustCrate ->
rustCrate.testModule {
rustTemplate(
"""
##[tokio::test]
async fn test_utf8_replaced() {
let body = r##"{ "x" : "\ud800" }"##;
let request = http::Request::builder()
.method("POST")
.uri("/operation")
.header("content-type", "application/json")
.body(hyper::Body::from(body))
.expect("failed to build request");
let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await;
assert!(
result.is_ok(),
"Invalid utf8 should have been replaced. {result:?}"
);
assert_eq!(
result.unwrap().x.unwrap(),
"�",
"payload should have been replaced with �."
);
}
""",
)
}
}
}

@Test
fun `invalid utf8 should be rejected if the codegen flag is not set`() {
serverIntegrationTest(
model,
) { _, rustCrate ->
rustCrate.testModule {
rustTemplate(
"""
##[tokio::test]
async fn test_invalid_utf8_raises_an_error() {
let body = r##"{ "x" : "\ud800" }"##;
let request = http::Request::builder()
.method("POST")
.uri("/operation")
.header("content-type", "application/json")
.body(hyper::Body::from(body))
.expect("failed to build request");
let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await;
assert!(
result.is_err(),
"invalid utf8 characters should not be allowed by default {result:?}"
);
}
""",
)
}
}
}
}
10 changes: 8 additions & 2 deletions rust-runtime/aws-smithy-json/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
[package]
name = "aws-smithy-json"
version = "0.61.2"
authors = ["AWS Rust SDK Team <aws-sdk-rust@amazon.com>", "John DiSanti <jdisanti@amazon.com>"]
version = "0.62.0"
authors = [
"AWS Rust SDK Team <aws-sdk-rust@amazon.com>",
"John DiSanti <jdisanti@amazon.com>",
]
description = "Token streaming JSON parser for smithy-rs."
edition = "2021"
license = "Apache-2.0"
Expand All @@ -20,3 +23,6 @@ targets = ["x86_64-unknown-linux-gnu"]
cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]
rustdoc-args = ["--cfg", "docsrs"]
# End of docs.rs metadata

[features]
replace-invalid-utf8 = []
Loading

0 comments on commit 8a00258

Please sign in to comment.