diff --git a/0_abstract.typ b/0_abstract.typ new file mode 100644 index 0000000..d39f3bf --- /dev/null +++ b/0_abstract.typ @@ -0,0 +1,3 @@ +Here goes the abstract + +#lorem(100) \ No newline at end of file diff --git a/1_introduction.typ b/1_introduction.typ index ceb38c1..3444b11 100644 --- a/1_introduction.typ +++ b/1_introduction.typ @@ -1,14 +1,14 @@ #import "imports.typ": * = Introduction -== Motivation +== Motivation Continuous improvements in quantum computing pose an incalculable risk to encryption algorithms used today. -It is not clear when quantum computers will be capable of breaking today's encryption algorithms, or even if they will ever be able to. +It is unclear when quantum computers will be capable of breaking today's encryption algorithms or even if they will ever be able to. Still, it is necessary to develop new ciphers and protocols already, as it takes much time and research to become confident in the security of a new cryptographic system. One of the most important encryption protocols used today is @tls. Websites use it to protect the integrity, authenticity, and confidentiality of the communication with the browser. -About 83~% of all page loads worldwide -- and even 94~% in the USA -- are secured by #gls("https", long: false), which is based on @tls, according to Firefox telemetry data from October 2024 @firefox_telemetry. +According to Firefox telemetry data from October~2024, about 83%~% of all page loads worldwide — and even 94% % in the USA — are secured by #gls("https", long: false), which is based on @tls~@firefox_telemetry. Besides @https, more protocols use @tls to secure the communication. For instance, #gls("imap", long: false) and #gls("smtp", long: false) used for e-mail exchange and #gls("ldap", long: false), which is often used as a central place to store credentials in a corporate network, have secured variants that build on @tls. @@ -17,41 +17,55 @@ The first is the most critical, as messages stored today can be decrypted retroa The @ietf Internet-Draft "Hybrid key exchange in TLS~1.3"~@tls1.3_hybrid provides a solution to that, and browsers are in the process of rolling it out already~@chrome_kyber @firefox_125_nightly. Between 12~% and 20~% of the connections are already protected that way~@cloudflare_radar. The second part of the @pq transition -- server authentication -- is covered by @kemtls. -It mitigates the issue that @pq secure signatures are a lot bigger than their classical counterparts. +It mitigates that #gls("pq")-secure signatures are much bigger than their classical counterparts. As the name suggests, @kemtls ensures server authentication using @pq safe @kem:pl instead of signatures. As current @pq @kem:pl encapsulations are about half as big as @pq signatures, @kemtls reduces the message size compared to the naive replacement of classical signature with @pq signatures~@kem_tls. -The last of the three parts is about securing the certification process that ties domain names to a long-living private key. -The current infrastructure uses certificates issued by trusted #glspl("ca") which attests the link between domain name and private key. +The last of the three parts concerns securing the certification process that binds domain names to a long-living private key. +The current infrastructure uses certificates issued by trusted #glspl("ca"), which attests to the link between the domain name and private key. These certificates comprise numerous signatures, which would significantly increase the size of the certificate if naively substituted with their @pq counterparts. Big certificates increase the data transferred during @tls handshakes, resulting in a worse performance or even broken connections due to non-standard conform implementations that worked fine so far. @david_adrian_tldrfail_2023 -To avoid big certificates, the @ietf Internet-Draft "Merkle Tree Certificates for TLS"~@rfc_mtc proposes a new architecture for certificate infrastructures. +To avoid large certificates, the @ietf Internet-Draft "Merkle Tree Certificates for TLS"~@rfc_mtc proposes a new architecture for certificate infrastructures. It uses Merkle Trees and Merkle Tree inclusion proofs to reduce the size of messages exchanged during @tls handshakes. -The architecture is designed for most common use-cases, but has a reduced scope compared to the current certificate infrastructure. -Thus, the proposed architecture is meant as an additional optimization to the current certificate infrastructure and not as a substitution. +The architecture is designed for most common use cases but has a reduced scope compared to the current certificate infrastructure. +Thus, the proposed architecture is meant to enhance the current certificate infrastructure rather than replace it. -== Our Contributions -This work analyzes the Internet-Draft for @mtc:pl in terms of the number of bytes transferred during the @tls handshake and implements the necessary changes in a @tls stack for the first time. +== Our Contributions +This work analyzes the Internet-Draft for @mtc:pl regarding the computational effort and number of bytes transferred during the @tls handshake and implements the necessary changes in a @tls stack for the first time. First, we compare the size of @tls handshake messages in a classical, X.509-based @pki with the message size of the proposed @mtc architecture. -We do this for both, classical, non-@pq secure signature schemes, and with the @pq signature schemes that the @nist recently specified. -We show that the @mtc architecture is more size efficient in all cases, and handles the big sizes of @pq signatures a lot better than an X.509-based setup. -Furthermore, @mtc setup requires a new update channel, as @rp:pl must regularly refresh their roots of trust. -Based on different assumptions derived from statistics in the current @pki, we estimate the size of these updates. +We do this for both classical, non-@pq secure signature schemes and with the @pq signature schemes that the @nist recently specified. +We show that the @mtc architecture is more size efficient in all cases and handles the big sizes of @pq signatures much better than an X.509-based setup. +Furthermore, the @mtc setup requires a new update channel, as @rp:pl must regularly refresh their roots of trust. +We estimate and interpret the size of these updates based on different assumptions derived from statistics in the current @pki. +Further, we estimate the CPU cycles clients use to verify @mtc and X.509 certificates and compare them with each other. -As a second contribution, we created the first @tls implementation that is compatible with the @mtc architecture. +As a second contribution, we created the first @tls implementation compatible with the @mtc architecture. We based our implementation on the popular #emph[Rustls] library and modified it to deal with two new negotiation mechanisms. -These negation mechanisms become necessary to allow client and server to agree on the @mtc certificate type and a specific trust anchor. -In addition, we developed a library for verifying @mtc:pl and integrated it into the Rustls library. +These negation mechanisms become necessary to allow the client and server to agree on the @mtc certificate type and a specific trust anchor. +In addition, we developed a library to verify @mtc:pl and integrated it into the Rustls library. This demonstrates that the @mtc Internet-Draft works in practice, and we confirmed that the negotiation mechanisms maintain interoperability with the existing certificate infrastructure. During the implementation process, we encountered some difficulties with the specification. -For all the problems we found, we contributed fixes and additionally added some improvements. -For example, we did encounter incorrect test vectors caused by a 16-bit instead of 8-bit length prefix. -We corrected these in the Internet-Draft and in the @ca implementation that produced these test vectors. +We contributed fixes and added some improvements for all the problems we found. +For example, we encountered incorrect test vectors caused by using a 16-bit rather than the specified 8-bit length prefix. +We corrected these in the Internet-Draft and the @ca implementation that produced these test vectors. Besides the fixes, we incorporated a new trust anchor negotiation mechanism into the proposed standard and implemented the required changes in the provided @ca implementation. -Moreover, we proposed a length prefix in the embedding of the @mtc in the @tls `Certificate` message to allow parsing the @tls certificate message without depending on an external state. +Moreover, we proposed a length prefix for the @mtc embedding in the @tls `Certificate` message to allow parsing the @tls certificate message without depending on an external state. This length prefix will be incorporated in the next pre-release of the standard. -Beyond that, we suggested a standard file structure of @mtc related files for @tls clients and servers, based on how certificate files are organized on modern Linux-based computer systems nowadays, incorporating the changed needs that arise with the use of @mtc. +Beyond that, we suggested a standard file structure of #gls("mtc")-related files for @tls clients and servers, based on how certificate files are organized on modern Linux-based computer systems nowadays, incorporating the changed needs that arise with the use of @mtc. == Outline +We start this work with an Introduction containing the Motivation (@sec:motivation) and Our Contributions (@sec:our_contributions). +Afterward, @sec:preliminaries introduces the preliminaries required for this work. +This includes @sec:merkle_trees, which explains the basics of Merkle Trees, followed by @sec:pki, which provides an overview of the current #gls("pki", long: true), including #gls("ocsp", long: false), #gls("acme", long: false), and #gls("ct", long: true). +Moreover, in @sec:tls, we recap the relevant parts of a @tls handshake and briefly present an optimized version called @kemtls. +As a last preliminary, @sec:pq_signatures lists some classical and #gls("pq", long: true) secure signature schemes with their performance metrics. +Next, @sec:mtc introduces the #gls("mtc", long: true) architecture and contains sections that provide details about the #gls("ca", long: true) (@sec:mtc_ca), the Transparency Service (@sec:mtc_ts), and the negotiation of @mtc in @tls (@sec:negotiation_tls). +After explaining the @mtc architecture, @sec:mtc_pki_comparison compares the @mtc with the X.509 architecture. +We split specific considerations into four sections. +@sec:certificate_size compares the certificate sizes and @sec:update_size investigates the size requirements of the new update mechanism. +Moreover, in @sec:file_structure, we propose a common file structure for devices that support @mtc, which we base on the file structure commonly used for X.509-related files. +The last comparison, in @sec:cpu, focuses on the CPU associated with using @mtc or X.509 certificates. +Further, @sec:development summarizes the development process of the Rustls-based @mtc server and client implementation and contributions to the standardization process. +Lastly, @sec:conclusion summarizes the findings and provides items that deserve further attention. \ No newline at end of file diff --git a/2_preliminaries.typ b/2_preliminaries.typ index d556931..5e4715e 100644 --- a/2_preliminaries.typ +++ b/2_preliminaries.typ @@ -5,16 +5,16 @@ = Preliminaries This section provides information relevant to understanding the architecture of #gls("mtc", long: true) and its implications. It starts with a reminder of Merkle Trees and continues with an explanation of the present #gls("pki", long: true), including its building blocks, the #gls("acme", long: true), the #gls("ocsp", long: true), and the #gls("ct", long: true) design. -Afterward, this section provides a summary of the @tls protocol and the optimization @kemtls, and ends with a list of relevant #gls("pq", long: true) secure signature algorithms. +Afterward, this section summarizes the @tls protocol and the optimization @kemtls, and ends with a list of relevant #gls("pq", long: true) secure signature algorithms. -== Merkle Trees +== Merkle Trees Merkle Trees, also known as Hash Trees, are binary trees with the property that each inner node is the result of a cryptographic hash function on its child nodes. Merkle Trees are tamper-evident and enable efficient verification of whether an element is included in the tree. The term "tamper-evident" refers to the inability to add, delete, or modify information contained within the Merkle Tree without changing the root node. -An efficient verification means that, given the information and a proof, one can easily verify that the information is contained in the root hash. +An efficient verification means that, given the information and proof, one can easily verify that the information is contained in the root hash. -As a reminder: A hash function takes an arbitrary length input and produces a fix-length output. -In the following, we will use $h = H(x)$ to denote that $h$ is the result of applying the hash function $H$ in the input $x$. +As a reminder, a hash function takes an arbitrary length input and produces a fix-length output. +In the following, we will use $h = H(x)$ to denote that $h$ results from applying the hash function $H$ in the input $x$. In addition, a cryptographic hash function typically has three properties: Collision resistance, first preimage resistance, and second preimage resistance. Collision resistance means that it is hard to find any two inputs $x eq.not x'$ that result in the same hash output $H(x) = H(x')$. First preimage resistance means that it is hard to find an input $x$ that produces a given hash output $h$. @@ -26,23 +26,23 @@ Hard in this context means that something is computationally infeasible, i.e., c #figure( merkle_tree, caption: [Visualization of a Merkle Tree with an inclusion proof for information $x_1$. - The inclusion proof consists of the yellow marked $h_0$ and $h_5$ node, which allows a verifier to recalculate the red, thick path up to the rood node. + The inclusion proof consists of the yellow marked $h_0$ and $h_5$ node, which allows a verifier to recalculate the red, thick path up to the root node. ]) @fig:merkle_tree shows an example tree for the information $x_0$, $x_1$, $x_2$ and $x_3$. The leaf nodes contain the hash of the information as $h_i = H_1(x_i)$. -Each of the inner nodes is the result of the hash function $H_2(h_i, h_(i+1))$ with the content of the two child nodes. -That way, one can build an inclusion proof to the root node without reveling any other information. -The inclusion proof contains the sibling hash for each node along the way to traverse up to the root note, so $h_0$ and $h_5$ in the example in @fig:merkle_tree. +Each inner node is the result of the hash function $H_2(h_i, h_(i+1))$ with the content of the two child nodes. +That way, one can build an inclusion proof to the root node without revealing any other information. +The inclusion proof contains the sibling hash for each node traversing up to the root note, so $h_0$ and $h_5$ in the example in @fig:merkle_tree. Please note that the leaf and internal node use two different hash functions, $H_1$ and $H_2$. -This is to ensure that an internal node can never be interpreted as leaf node. -This would allow constructing multiple Merkle Trees with the same root hash~@merkle_tree_second_preimage. -In practice, it is enough to slightly alter the hash function, such as by prepending a single domain separator byte which is different for the leaf and internal nodes~@rfc_ct. +This ensures that an internal node can never be interpreted as a leaf node. +This would allow the construction of multiple Merkle Trees with the same root hash~@merkle_tree_second_preimage. +In practice, it is enough to slightly alter the hash function, such as by prepending a single domain separator byte, which is different for the leaf and internal nodes~@rfc_ct. The internal hash function $H_2$ takes two arguments, even though hash functions generally only take a single input. -However, there are multiple ways to circumvent this restriction, for example, by concatenating the two inputs into one. +However, there are multiple ways to circumvent this restriction, such as concatenating the two inputs into one. -As long as the used hash function is collision resistant, it is not possible to alter, add, or delete any information included in the Merkle Tree without changing the hash of the root node~@handbook_applied_crypto[Section~13.4.1]. +As long as the used hash function is collision-resistant, it is impossible to alter, add, or delete any information included in the Merkle Tree without changing the hash of the root node~@handbook_applied_crypto[Section~13.4.1]. Instead, it is possible to add information to the tree and create a consistency proof showing that only specific data was added, but nothing else has changed in the tree. This property can be used to build logs that are verifiably append-only~@rfc_ct. @@ -61,13 +61,13 @@ This property can be used to build logs that are verifiably append-only~@rfc_ct. // - Securing local networks and smart card authentication // - Restricted access to #glspl("vpn") -A #gls("pki", long: true) is a crucial part of ensuring security in various digital systems. +A #gls("pki", long: true) is crucial to ensuring security in various digital systems. Its core functionality is to bind a cryptographic public key to a set of verified information~@rfc_pki. Typically, this information represents an identity such as a domain name, company name, or the name of a natural person. -However, in some cases, the verified information instead contains permissions, or ownership, without an identity. -An example of that is the @rpki, which is used to secure @bgp announcements and therefore harden the security of routing on the internet @rfc_rpki. +However, in some cases, the verified information instead contains permissions or ownership without an identity. +An example is the @rpki, which is used to secure @bgp announcements and, therefore, harden the security of routing on the internet~@rfc_rpki. -The verified information and public key are combined with a cryptographic signature of the @ca and form a #emph([certificate]) that way. +The verified information and public key are combined with a cryptographic signature of the @ca to form a #emph([certificate]) that way. The common format to encode the verified information and signature is X.509. Later, a #gls("rp", long: true) can parse the certificate, verify the signature, and trust the signed information given that it trusts the @ca. @@ -76,34 +76,34 @@ To name a few examples: It is used to encrypt and sign emails, to sign documents This work focuses on this last use case, the web browsing. The corresponding @pki infrastructure is often referred to as #emph([WebPKI]). -It combines the domain name and possibly a company name with a public key, such that a @rp can verify it connected to the intended website. +It combines the domain name and possibly a company name with a public key, so an @rp can verify that it is connected to the intended website. This verification is built into the @https protocol, which combines the @http with the @tls security layer. Typically, the @ca does not issue a certificate from its root certificate that is stored as trusted in the @rp:pl. -Instead, a @ca uses an intermediate certificate, signed with the root certificate, to sign the so called #gls("ee", long: true) certificate, i.e., the one attesting the link between the domain name and public key. +Instead, an @ca uses an intermediate certificate, signed with the root certificate, to sign the so-called #gls("ee", long: true) certificate, i.e., the one attesting the link between the domain name and public key. Together, the @ee and corresponding intermediate and root certificates are referred to as #emph[certificate chain]. -For security reasons, certificates have only a limited lifetime, especially the @ee in the web ecosystem. -Since 2020 Chrome and Apple enforce new @ee certificates to be valid for at most 398 days @chrome_cert_lifetime @apple_cert_lifetime. -Let's Encrypt, a @ca which is responsible for 57~% of all currently valid certificates, issues certificates for just 90 days @merkle_town @lets_encrypt_cert_lifetime. +For security reasons, certificates have a limited lifetime, especially the @ee in the web ecosystem. +Since 2020, Chrome and Apple have enforced new @ee certificates to be valid for at most 398 days~@chrome_cert_lifetime @apple_cert_lifetime. +Let's Encrypt, a @ca responsible for 57~% of all currently valid certificates, issues certificates for just 90~days~@merkle_town @lets_encrypt_cert_lifetime. Let's Encrypt provides two reasons for their comparably short certificate lifetimes: -They want to limit the damage a miss issuance or key compromise can do, and they want to encourage an automated issuance process, which they see as a crucial part of a widespread @https adoption. +They want to limit the damage a miss-issuance or key compromise can do and encourage an automated issuance process, which they see as a crucial part of widespread @https adoption. Nevertheless, it is important to have a mechanism in place to revoke certificates. -This can be necessary if a private key leaked or the assured information is not accurate (anymore). +This can be necessary if a private key leaks or the assured information is not accurate (anymore). There are two mechanisms in place for that: -The @crl:pl are regularly published by the @ca in which all revoked certificates are listed and the #gls("ocsp", long: true) allows @rp:pl to query the @ca if a certificate was revoked in real-time. +The @ca:pl regularly publish @crl:pl, listing all revoked certificates and the #gls("ocsp", long: true) allows @rp:pl to query the @ca if a certificate was revoked in real-time. // As it will become relevant later on, the following section will explain @ocsp a bit more in-depth. === OCSP -@ocsp is meant as an improvement over the classical @crl, as it avoids downloading a list of all blocked certificates occasionally, but instead allows querying a @ca about the status of one specific certificate whenever it is needed. +@ocsp is meant as an improvement over the classical @crl, as it avoids downloading a list of all blocked certificates occasionally but instead allows querying a @ca about the status of one specific certificate whenever it is needed. The @ca includes an #gls("http", long: false) endpoint to an @ocsp responder in the certificates it issues, which @rp:pl such as browsers can query for recent information about whether a certificate is valid~@rfc_ocsp. -In practice, this comes with a couple of issues: Speed, high load on @ca servers, availability, and privacy. +In practice, this comes with a couple of issues: speed, high load on @ca servers, availability, and privacy. Every time a @rp checks a certificate, an additional round trip to the @ocsp responder is required, which slows down the connection by about 30~%~@ocsp_30p_faster. -Moreover, the @ca:pl have to answer these status requests, which results in a high server load and therefore costs. -If an @ocsp responder is not reachable, the @rp either cannot connect to the server or has to ignore the failure, called fail-close and fail-open, respectively. -The German health care system showcases that a fail-close approach can be very fragile in practice~@e-rezept. +Moreover, the @ca:pl have to answer these status requests, which results in a high server load and, therefore, costs. +If an @ocsp responder is not reachable, the @rp either cannot connect to the server or has to ignore the failure, which is called fail-close and fail-open, respectively. +The German healthcare system showcases that a fail-close approach can be very fragile in practice~@e-rezept. Browsers opted for a fail-open approach in favor of service availability. This decision, however, limits the benefit of recent information, as an attacker can block access to the @ocsp endpoint and thereby block the information that a certificate was revoked~@ocsp_soft_fail. Furthermore, @ocsp raises privacy concerns, as @ca:pl can build profiles of users based on which certificates they query. @@ -115,21 +115,21 @@ This reduces the load on the @ca, eliminates the need for an additional round tr // Knowing about the existence of a certificate might be less obvious, but is undoubtedly an essential building block to revocation. // The following section explains why this is not self-evident and how to ensure it anyway. -=== ACME +=== ACME As mentioned earlier, present certificate lifetimes are often 90 days, but not longer than 398 days. Short certificate lifetimes require automation to not overload humans with constant certificate renewals. -Additionally, automation facilitates widespread adoption of @https as it lowers the (human) effort -- and consequently costs -- associated. +Additionally, automation facilitates widespread adoption of @https by lowering the (human) effort -- and consequently costs -- associated. Therefore, Let's Encrypt initiated the development of the #gls("acme", long: true) in 2015 and started issuing certificates in that highly automated way in the same year~@first_acme. The @acme protocol finally became an #gls("ietf", long: false) standard in 2019~@rfc_acme. Please note that the fully automated @acme mechanism allows for #emph([Domain Validation]) (DV) certificates only. -This means that the @ca verifies that the requestor has effective control over the domain, as opposed to #emph([Organization Validation]) and #emph([Extended Validation]) which require human interaction to verify the authenticity of the requesting organization. -@mtc requires a high degree of automation, so that DV certificates are the only practical certificate type for @mtc. +This means that the @ca verifies that the requestor has effective control over the domain, as opposed to #emph([Organization Validation]) and #emph([Extended Validation]), which require human interaction to verify the authenticity of the requesting organization. +@mtc requires a high degree of automation so DV certificates are the only practical certificate type for @mtc. However, this is only a limited drawback for the applicable scope of @mtc as 93~% of all valid certificates are DV certificates as of 2024-09-21~@merkle_town. -There exist three standardized methods to verify a user requesting a certificate has effective control over the domain; the `HTTP-01`, `DNS-01`, and `TLS-ALPN` challenges. -Each of them generally works by placing a specific challenge value provided by the @ca into a place that only an owner of a domain can do. +Three standardized @acme methods exist to verify that a user requesting a certificate has effective control over the domain: the `HTTP-01`, `DNS-01`, and `TLS-ALPN` challenges. +Each generally works by placing a specific challenge value provided by the @ca in a place that only the domain owner can do. As the names suggest, this is either a web page at a specific path, a TXT #gls("dns", long: false) entry, or a specific ALPN protocol in the @tls stack, respectively. // Each of them has different advantages and disadvantages, @@ -145,7 +145,7 @@ As the names suggest, this is either a web page at a specific path, a TXT #gls(" // ) -=== Certificate Transparency +=== Certificate Transparency // - WebPKI contains a lot of trusted CA (as of 21.09.2024: 153 Firefox @firefox_root_store, 135 Chrome @chrome_root_store) // - Response to 2011 attack on DigiNotar // - Any of them could be compromised and issue certificates for any website @@ -155,10 +155,10 @@ As the names suggest, this is either a web page at a specific path, a TXT #gls(" Browser vendors ship a list of @ca:pl which are trusted to issue genuine certificates only. As of November 6, 2024, there are 176 trusted root @ca:pl built into Firefox and 134 in Chrome~@firefox_root_store @chrome_root_store. If only a single @ca misbehaves, this can tremendously impact the security of the whole system. -One infamous example is the security breach of DigiNotar in 2011, which allowed the attacker to listen into the connection of about 300,000 Iranian citizens with Google~@diginotar. +One infamous example is DigiNotar's 2011 security breach, which allowed the attacker to listen into the connections of about 300,000 Iranian citizens with Google~@diginotar. This was possible because the domain owner, i.e., Google, could not know that a certificate was issued in their name. -In such a case, even the best certificate revocation mechanism is meaningless, as there is nobody who could initiate it. -As a direct consequence, Google initiated a program to ensure that all issued certificates must be logged publicly such that a domain owner can recognize maliciously issued certificates and take action retroactively. +In such a case, even the best certificate revocation mechanism is meaningless, as nobody could initiate it. +As a direct consequence, Google initiated a program to ensure that all issued certificates are logged publicly so that a domain owner can recognize maliciously issued certificates and take action retroactively. This is referred to as #gls("ct", long: true). #figure( @@ -173,7 +173,7 @@ The pre-certificate breaks the cyclic dependency, that a #gls("ct")-log needs th To issue the final certificate, the @ca must send this pre-certificate to at least two independent #gls("ct")-logs, which will ensure the certificate is logged publicly to an append-only log. In return, each log provides a @sct to the @ca for including it in the final certificate. Subsequently, the @ca returns the certificate with the embedded @sct:pl to the @ap, which can use it whenever a @rp connects thereafter. -At the same time, Monitors are constantly watching the logs and possibly notify a @ap for every certificate issued on their name. +At the same time, Monitors constantly watch the logs and possibly notify an @ap for every certificate issued under their name. In addition to Monitors, there are also Auditors -- not shown in the figure -- that check the consistency of the log. This includes the append-only property, that all certificates are actually logged as promised, and that the log provides the same answers to all clients, independent of the location or other properties~@certificate_transparency. If the answers provided differ, this is called a #emph[split-view] attack. @@ -181,7 +181,7 @@ If the answers provided differ, this is called a #emph[split-view] attack. The functionality of an auditor may be spread over multiple entities. For example, the consistency checks could be performed by the monitors, while they are receiving the added certificates anyway. Additionally, there exists some specific and hardened hardware across the world that checks the log consistency over time and additionally tries to notice any split-view~@verification_transparency_dev @armored_witness. -To check that a certificate actually gets included into the log after the log operator sent a @sct is more complex. +It is more complex to check that a certificate actually gets included in the log after the log operator sends an @sct. To detect if a certificate was included in the log, browsers can request inclusion proof of a specific certificate for a Signed Tree Head they trust. Unfortunately, this is difficult to do in practice, as browsers would leak which websites they visited to the log operators. @@ -191,7 +191,7 @@ Chrome and Apple require their trusted root @ca:pl to include at least two indep That way, effectively every certificate must be logged publicly to be of any value. This solves the problem of certificates that are unknown to an @ap, and @ct allows Monitors to analyze certificates and @ca:pl for misbehavior. -== TLS +== TLS // - Standardized by the @ietf // - successor of @ssl, developed by Netscape Communications in the 1990s // - Focus on newest version: @tls 1.3 from #cite(, form: "year") specified in @rfc_tls13 @@ -213,7 +213,7 @@ It improved security and incorporated protocol simplifications and speed improve The following will focus on @tls~1.3 as it is used for 94~% of all @https requests according to Cloudflare Radar on 2024-09-23 and support for @tls 1.1 and older has been dropped by all relevant browsers in 2020~@cloudflare_radar @chrome_drop_tls @firefox_drop_tls @microsoft_drop_tls @apple_drop_tls. -@tls consists of two sub-protocols, the handshake protocol for authentication and negotiation of cryptographic ciphers, followed by the record protocol to transmit the application data. +@tls consists of two sub-protocols: the handshake protocol for authentication and negotiation of cryptographic ciphers, followed by the record protocol for transmitting application data. The following concentrates on the handshake protocol and skips some functionality such as client certificates, the usage of previously or out-of-band negotiated keys, and 0-RTT data that can be used to send application data before the handshake is finished. This allows focus on the parts relevant to this thesis. @@ -227,37 +227,37 @@ This allows focus on the parts relevant to this thesis. @tls_handshake_overview illustrates the messages and extensions sent during the @tls handshake and whether they are encrypted. A @tls connection is always initiated by the client through a `ClientHello` message. This message contains extensions with a key share and signature algorithms the client supports. -The server responds with a `ServerHello` message, which contains a key share as an extension as well. +The server responds with a `ServerHello` message, which also includes a key share as an extension. Knowing both key shares, the server and client derive a shared symmetric secret and use it to protect all subsequent messages. The following messages authenticate the server to the client by sending its certificate chain and a `CertificateVerify` message. The `CertificateVerify` message contains the signature over the handshake transcript up to that point. -This proves the server is in the possession of the private key corresponding to the certificate and messages have not been tampered with. +This proves that the server is in possession of the private key corresponding to the certificate and that messages have not been tampered with. The handshake ends with a `Finished` message each side sends and verifies. It contains a @mac over the transcript and thus assures the integrity and authenticity of the handshake. -This @mac is not strictly necessary for the security when performing a full handshake as described in this work, but is essential if protocol optimizations are used which allow using out-of-band or previously negotiated keys, for example~@finished_message_tls13. +This @mac is not strictly necessary for security when performing a full handshake, as described in this work, but it is essential if protocol optimizations are used that allow using out-of-band or previously negotiated keys~@finished_message_tls13. After the successful handshake, @tls continues to the record layer to exchange the application data. -=== KEMTLS +=== KEMTLS @kemtls aims to improve the communication cost of @tls handshakes using the fact that the best currently available @pq @kem:pl are smaller than the @pq signatures. Instead of explicitly authenticating the server in the `CertificateVerify` using a signature, @kemtls uses @kem:pl to authenticate the key exchange. -The exact differences with the standard @tls handshake are not relevant for this work, so we refrain from explaining them here. +The exact differences with the standard @tls handshake are not relevant to this work, so we refrain from explaining them here. However, it is relevant to note that a @kemtls handshake requires a @kem public key in the @ee certificate instead of a signature key~@kem_tls. -Hence, the utilization of @kemtls has an impact on the size of the certificate, as shown in @sec:certificate_size. +Hence, the utilization of @kemtls impacts the size of the certificate, as shown in @sec:certificate_size. -== Post-Quantum Signatures +== Post-Quantum Signatures // - Two PQ signatures are standardized by @nist in @fips_204 (ML-DSA, formally known as CRYSTALS-Dilithium) and @fips_205 (SLH-DSA formally known as Sphincs+) // - The third - FN-DSA / Falcon - specified later. It relies on dangerous floating-point arithmetic that produces side-channel leakage. // This section provides a short overview of the @pq signatures available today. // It helps with understanding size and performance considerations later on. -@tab:pq_signature_comp shows a comparison of @ecdsa and #gls("rsa", long: false)-2048 as classical signature schemes and the @pq signature schemes selected by the @nist for standardization. +@tab:pq_signature_comp compares @ecdsa and #gls("rsa", long: false)-2048 as classical signature schemes and the @pq signature schemes selected by the @nist for standardization. @mldsa was known as #box[CRYSTALS]-Dilithium and @nist standardized it as FIPS 204 in 2024, together with the @slhdsa as FIPS 205 @fips_204 @fips_205. A @nist draft for the @fndsa is expected in late 2024. -The @nist decided to specify three signature algorithms, as each of them has their benefits and drawbacks. +The @nist decided to specify three signature algorithms, as each of them has its benefits and drawbacks. @mldsa is the recommended algorithm for most applications, as it has reasonable values in all categories. @slhdsa is currently the most trusted algorithm, as it relies on the security of the well-established #gls("sha", long: false)--2 or #gls("sha")--3 hashing algorithms, that would need to be dramatically broken to harm the security of @slhdsa~@sphincs_proposal. This makes @slhdsa a suitable candidate for long-term keys or situations where an upgrade is hard. @@ -274,5 +274,9 @@ Moreover, verifying does not rely on floating-point arithmetic, and even if it d pq_signatures, caption: [ Comparison of selected classical signature schemes with algorithms (to be) standardized by the @nist. - The #box(baseline: 0.2em, height: 1em, image("images/red-alert-icon.svg")) symbols fast, but dangerous floating-point arithmetic~@bas_westerbaan_state_2024. The CPU cycles taken from the SUPERCOP database, measured on a AMD Ryzen 7 7700; 8 x 3800MHz machine~@supercop-asym. The @pq algorithms were not listed in their final versions yet. Therefore, we used benchmarks of preliminary versions that are closest to the final standard.] + The #box(baseline: 0.2em, height: 1em, image("images/red-alert-icon.svg")) symbols fast, but dangerous floating-point arithmetic~@bas_westerbaan_state_2024. + The CPU cycles were taken from the SUPERCOP database and measured on an AMD Ryzen 7 7700; 8 x 3800MHz machine~@supercop-asym. + The @pq algorithms were not listed in their final versions yet. + Therefore, we used benchmarks of preliminary versions that are closest to the final standard. + ] ) diff --git a/3_mtc.typ b/3_mtc.typ index 8aa5fd6..321c059 100644 --- a/3_mtc.typ +++ b/3_mtc.typ @@ -22,17 +22,17 @@ // This section summarizes the @ietf Internet-Draft that describes #glspl("mtc") for @tls~@rfc_mtc. The motivation to create a new certificate architecture is mainly driven by the large size of @pq signatures. Unfortunately, today's Web@pki relies on signatures in various places not just limited to the @ca signature in the certificate, but also for the embedded @sct for @ct and possibly @ocsp staples for certificate revocation. -Therefore, replacing all these signatures naively results in a big increase in bytes transferred during a @tls handshake, as @sec:certificate_size will show in detail. +Therefore, replacing all these signatures naively results in a significant increase in bytes transferred during a @tls handshake, as @sec:certificate_size will show in detail. To prevent this, the Internet-Draft proposes an architecture that reduces the number of signatures where possible and instead greatly relies on hash functions. Hash functions have the advantage of being computationally lightweight, small, and @pq secure. -On a high level, the idea is to certify a batch of assertions at the same time by building a Merkle Tree. +On a high level, the idea is to certify a batch of assertions simultaneously by building a Merkle Tree. Instead of signing each assertion individually, the @ca signs only the tree head. -These tree heads get distributed to Transparency Services, which serve a similar goal as the logs in @ct, but additionally provide a channel for @rp:pl to regularly update to the most recent batch tree heads. +These tree heads are distributed to Transparency Services, which serve a similar goal as the logs in @ct, but additionally provide a channel for @rp:pl to regularly update to the most recent batch tree heads. Note that the @mtc proposal does not aim to replace the certificate infrastructure as we know it today; instead, it functions as an optional optimization. -Compared to today's Web@pki it has a reduced scope and assumes more prerequisites to function properly. -@sec:mtc_pki_comparison elaborates on these differences, the following focuses more on the technical details of @mtc. +Compared to today's Web@pki, it has a reduced scope and assumes more prerequisites for functioning properly. +@sec:mtc_pki_comparison elaborates on these differences; the following focuses more on the technical details of @mtc. #figure( mtc_overview(), @@ -47,10 +47,10 @@ Compared to today's Web@pki it has a reduced scope and assumes more prerequisite - A #emph([Monitor]) monitors the transparency services for suspicious or unauthorized certificates. - An #emph([Assertion]) is information that an @ap gets certified by a @ca, i.e., a public key and one or multiple domain name(s) or #gls("ip", long: false) address(es). An #emph([Abridged Assertion]) hashes the public key stored in an assertion to reduce the size, especially for potentially large @pq keys. - A #emph([Batch]) is a collection of assertions that are certified simultaneously. -- The #emph([Batch Duration]) is the time the batch spans. The authors recommend a Batch Duration of one hour, meaning that all assertions collected within this hour are certified at the same time in one Batch. +- The #emph([Batch Duration]) is the time the batch spans. The authors recommend a Batch Duration of one hour, meaning that all assertions collected within this hour are certified simultaneously in one Batch. - A #emph([Batch Tree Head]) is the Merkle Tree root node over all assertions of one batch. - An #emph([Inclusion Proof]) is a proof that a certain assertion is included in a batch. The proof consists of the hashes required to rebuild the path up to the Batch Tree Head. -- A #emph([Validity Window]) is the range of consecutive batch tree heads that are valid at a time. +- A #emph([Validity Window]) is the range of consecutive batch tree heads valid at a time. - A #emph([Certificate]) combines an assertion with an inclusion proof. #figure( @@ -61,48 +61,51 @@ Compared to today's Web@pki it has a reduced scope and assumes more prerequisite With this terminology, the following explains the certificate issuance flow depicted in @fig:mtc_overview + First, the @ap requests a certificate from the @ca. - Due to the frequency of that operation, this should be an automated process using the @acme protocol, for example. + Due to its frequency, this operation should be automated using the @acme protocol, for example. + Every time a batch becomes ready, the @ca builds the Merkle Tree, signs the whole Validity Window, which includes the new Batch Tree Head, with a @pq algorithm, and publishes the tree to the Transparency Services. + The @ca also sends the inclusion proof back to the @ap, which can subsequently use it to authenticate against #glspl("rp") that trust this batch. // + The Transparency Services recompute the Merkle Tree to validate the Merkle Tree Head contains exactly what is advertised and validate the signature of the Batch Tree Head. + Monitors mirror all Assertions published to the Transparency Services and check for fraudulent behavior. This can include, but is not limited to, notifying domain owners about certificates issued. -+ #glspl("rp") regularly update their trust anchors to the most recent Batch Tree Heads that have been validated by their trusted Transparency Service(s). ++ @rp:pl regularly update their trust anchors to the most recent Batch Tree Heads validated by their trusted Transparency Service(s). + When connecting to an @ap, the @rp signals which trust anchors it supports, i.e., which tree heads it trusts. -+ If the @ap has a valid @mtc certificate, for one of the supported trust anchors, it will send this instead of a classical X.509 certificate. ++ If the @ap has a valid @mtc certificate for one of the supported trust anchors, it will send this instead of a classical X.509 certificate. The following sections elaborate on the responsibilities and objectives of the components involved. -== Certification Authority -A @ca is defined by the following parameters that are publicly known and cannot change. In particular, the Transparency Service trusts certain @ca:pl and uses these parameters to validate the signed validity windows it receives from the @ca:pl. +== Certification Authority +A @ca is defined by the following publicly known parameters that cannot change. +In particular, the Transparency Service trusts certain @ca:pl and uses these parameters to validate the signed validity windows it receives from the @ca:pl. - `hash`: The hash function used to build the Merkle Tree. Currently, only #gls("sha")-256 is supported. - `issuer_id`: A @tai as defined in @rfc_tai. That is a relative @oid under the prefix `1.3.6.1.4.1`. Organizations append their @pen registered at the @iana. -- `public_key`: The public key is used by the Transparency Services to validate the signed validity window. -- `start_time`: Is the issuance time of first batch as POSIX timestamp @posix[pp.~113]. +- `public_key`: The Transparency Services use the public key to validate the signed validity window. +- `start_time`: Is the issuance time of the first batch as POSIX timestamp @posix[pp.~113]. - `batch_duration`: The time between two batches given in full seconds. - `lifetime`: The number of seconds a batch is valid. It must be a multiple of `batch_duration`. -- The `validity_window_size` defines the number of tree heads that are valid at the same time. It is calculated as the `lifetime` divided by the `batch_duration`. +- The `validity_window_size` defines the number of tree heads that are valid simultaneously. + It is calculated as the `lifetime` divided by the `batch_duration`. -The authors of the Internet-Draft suggest a `batch_duration` of one hour and a `lifetime` of 14 days. This results in a `validity_window_size` of 336. +The authors of the Internet-Draft suggest a `batch_duration` of one hour and a `lifetime` of 14 days. +This results in a `validity_window_size` of 336. As with the current certificate infrastructure, the @ca is responsible for checking the assertions it certifies. -In particular, the @ca must verify that the @ap requesting the certificate is in effective control over the domain names and @ip addresses the certificate is issued to. -The exact mechanism is outside the scope of this work, but usually involves completing a challenge by setting a DNS record or serving a file at a specific URL. +In particular, the @ca must verify that the @ap requesting the certificate effectively controls the domain names and @ip addresses to which the certificate is issued. +The exact mechanism is outside the scope of this work but usually involves completing a challenge by setting a DNS record or serving a file at a specific URL. All assertions the @ca is willing to certify are accumulated in a batch. A batch is always in one of three states: #emph([pending]), #emph([ready]), or #emph([issued]). A pending batch is not yet due, i.e., the `start_time`~+~`batch_number`~$dot$~`batch_duration` is bigger than the current time. -A batch in the ready state is due according to the current time, but has not yet been issued. +A batch in the ready state is due at the current time but has not yet been issued. This will typically be a small time frame in which the @ca builds the Merkle Tree and signs the validity window. Subsequently, the batch transfers to the issued state, i.e., the @ca published the signed validity window and abridged assertions. -As an invariant, all batches before the latest issued one must be issued as well, i.e., no gaps are allowed. +As an invariant, all batches before the latest issued one must be issued as well; no gaps are allowed. Every time a batch becomes ready, the @ca converts all assertions it found to be valid into abridged assertions by hashing the -- possibly large -- signature key in that assertion. Afterward, it builds a Merkle Tree as depicted in @fig:merkle_tree_abridged_assertion. Lastly, the @ca signs a `LabeledValidityWindow` that contains the domain separator `Merkle Tree Crts ValidityWindow\0`, the `issuer_id`, the `batch_number`, and all Merkle Tree root hashes that are currently valid. The domain separator allows the protocol to be extended in the future, if the @ca would need to sign different structs with the same key. -One example could be the introduction of a revocation mechanism that requires the @ca to sign some data with the same key. +One example could be introducing a revocation mechanism that requires the @ca to sign some data with the same key. Signing the entire validity window instead of each tree root individually has two advantages: For one, if a client or Transparency Service is behind more than one Tree Head, only a single signature needs to be transferred instead of multiple, which saves bandwidth and computational effort for the signature verification. The second benefit is that it complicates split-view attacks. @@ -122,10 +125,10 @@ A @ca would have to keep the split view for an entire validity window instead of caption: [Example Merkle Tree for three assertions]) -== The Role of the Transparency Service +== The Role of the Transparency Service While transparency was an afterthought in the current certificate infrastructure, it is an integral part of the @mtc architecture. The main task of the Transparency Service is to validate and mirror the signed validity windows produced by @ca:pl and serve them to @rp:pl as well as monitors. -To check a signed validity window, the Transparency Service fetches the latest signed validity window as well as all abridged assertions from a @ca. +To check a signed validity window, the Transparency Service fetches the latest signed validity window and all abridged assertions from a @ca. It then checks that the signature of the validity window matches the public key of that @ca. By rebuilding the Merkle Tree from the abridged assertions, the Transparency Service ensures that @ca produced certificates for exactly what the @ca serves as abridged assertions. Due to the collision resistance of the hash function, it is computationally infeasible for the @ca to secretly include another assertion, leave one out, or modify an assertion. @@ -133,17 +136,17 @@ Due to the collision resistance of the hash function, it is computationally infe Conceptually, the Transparency Service is a single instance. In practice, though, it should consist of multiple services hosted by independent organizations. This reduces the chance that a @ca can collude with a single Transparency Service to provide a split view. -Also, the authors of the draft imagine that in practice, the browser vendors would run such a Transparency Service for their product and use their update mechanism to frequently provide the most recent tree heads. +Also, the draft authors imagine that, in practice, browser vendors would run such a Transparency Service for their product and use their update mechanism to frequently provide the most recent tree heads. // == The Transparency Service -- Relying Party link -This link between the Transparency Service and @rp includes one significant design decision: Either, the Transparency Service forwards only the tree heads to the @rp, or it includes the @ca signature as well. -@sec:update_size elaborates on how that influences the amount of data to be distributed. -Omitting the @ca signature does not only significantly reduce update bandwidth but also means that the client does not need to perform @pq signature verification. -Consequently, the @rp must trust the Transparency Service to adequately check the @ca signature, and it requires the @rp to have a secure channel with the Transparency Service. +This link between the Transparency Service and @rp includes one significant design decision: Either the Transparency Service forwards only the tree heads to the @rp, or it includes the @ca signature as well. +@sec:update_size elaborates on how that influences how much data is distributed. +Omitting the @ca signature significantly reduces update bandwidth and eliminates the need for the client to perform @pq signature verification. +Consequently, the @rp must trust the Transparency Service to check the @ca signature adequately, and it requires the @rp to have a secure channel with the Transparency Service. Depending on how this channel is designed, it may require interaction with @pq signatures on the client side nevertheless. Additionally, a malicious Transparency Service could provide a split view to a client without the need to collude with a @ca. -At the same time, if the Transparency Service is run by the browser vendor, it is anyway in the position to decide about with connection to trust. +At the same time, if the browser vendor runs the Transparency Service, it is anyway in a position to decide which connection to trust, even without potentially modifying the trusted roots. // - Conceptually one instance, but actually distributed // - Could be the browser vendor or independent @@ -179,8 +182,8 @@ It allows the @rp to send the newest batch tree heads it supports to the @ap, su In particular, this allows the @ap to know which @mtc certificate to send during a certificate rotation. In practice, it is not possible to send the whole list of known trust anchors to an @ap for two main reasons: Size and privacy. -The list of all supported trust anchors is potentially large, keeping in mind that the trust anchor mechanism is not exclusively designed for @mtc, but explicitly also for other mechanisms, such as X.509. -Assuming that just 50 @ca:pl would participate in this mechanism with an average identifier length of four bytes, each `ClientHello` would need to carry 250 additional bytes, 200 for the identifiers and 50 for the encoding with length prefixes. +The list of all supported trust anchors is potentially large, considering that the trust anchor mechanism is not exclusively designed for @mtc but explicitly also for other mechanisms, such as X.509. +Assuming that just 50 @ca:pl would participate in this mechanism with an average identifier length of four bytes, each `ClientHello` would carry 250 additional bytes, 200 for the identifiers and 50 for the encoding with length prefixes. The second concern is that a server can use the detailed information about the client for fingerprinting. Especially with the quickly changing @mtc system, users might have recognizable trust stores, depending on when they pulled the latest tree heads from the Transparency Service. @@ -189,4 +192,4 @@ The @ap can create a @svcb @dns record listing all the trust anchors it supports Based on this information, the @rp can decide with trust anchor to offer to the @ap during the handshake. Requiring information from the @dns complicates the deployment, but the #emph[Encrypted Client Hello] relies on a @svcb @dns record as well~@rfc_ech and is deployed in practice already~@firefox_ech@apple_ech@cloudflare_ech@chrome_ech. The second option a @rp has is to guess trust anchors an @ap may support and do a retry if the guess was not correct. -The main downside is that a retry causes an additional round trip, and therefore higher latency. \ No newline at end of file +The main downside is that a retry causes an additional round trip and, therefore, higher latency. \ No newline at end of file diff --git a/4_comparison.typ b/4_comparison.typ index a397627..ed4b0eb 100644 --- a/4_comparison.typ +++ b/4_comparison.typ @@ -4,39 +4,40 @@ = Comparison of MTC with the Current WebPKI -Based on the introduction to @pki in @sec:pki and the explanation of @mtc in @sec:mtc, it becomes obvious that there are significant differences between these architectures. -This chapter presents the results of the analysis we conducted about the differences and the advantages and disadvantages the architectures result in. +The introduction to @pki in @sec:pki and the explanation of @mtc in @sec:mtc make it obvious that there are significant differences between these architectures. +This chapter presents the results of our analysis of the differences and the advantages and disadvantages of the architectures. // #heading("Advantages", level: 4, outlined: false, numbering: none) The most obvious change is the significant reduction of the certificate lifetime. The authors of @mtc propose a lifetime of 14 days. In contrast, as of October 2024, @tls leaf certificates for the Web@pki may be issued for at most 13 months, i.e., 398 days~@chrome_cert_lifetime @apple_cert_lifetime. -Often they are issued only for 90 days, which is still more than six times as long as the proposed lifetime of @mtc. -At the same time, it is likely that the validity periods of classical certificates will decrease further. +Often, they are issued only for 90 days, which is still more than six times as long as @mtc's proposed lifetime. +At the same time, the validity periods of classical certificates will likely decrease further. In October 2024, Apple published a proposal to the CA/Browser Forum suggesting a gradual reduction of the maximum certificate lifetime to 45 days by September 2027~@apple_45_days_cert. -It is unclear if this proposal will be accepted, but it is clear that the maximum certificate lifetime will only decrease in the future, possibly approaching the certificate lifetime of @mtc. +It is unclear if this proposal will be accepted, but the maximum certificate lifetime will only decrease in the future, possibly approaching the certificate lifetime of @mtc. Another notable difference is that the @mtc draft explicitly ignores certificate revocation. -This is a direct result of the short certificate lifetimes; if certificates live as long as it takes for a revocation to effectively propagate, certificate revocation is not necessary anymore. -Eliminating the need for a revocation mechanism is a clear improvement over the current Web@pki, as it continuously suffers from ineffective revocation mechanisms~@lets_encrypt_new_crl @crl_sets_effectiveness @reddit_ocsp_firefox. -Chrome does not check @ocsp or @crl:pl, but relies on a custom summary called #emph("CRLSets") containing a (small) subset of all revoked certificates curated by Google @chrome_crlsets. -In contrast to that, Firefox does still check @ocsp responses, but the CA/Browser forum changed its recommendation to support @ocsp in their @ca baseline requirements to an optional @ocsp support in version 2.0.1, effective as of March 2024~@cab_ocsp_optional_crl_mandatory. +This is a direct result of the short certificate lifetimes; if certificates live as long as it takes for a revocation to propagate effectively, certificate revocation is no longer necessary. +Eliminating the need for a revocation mechanism is a clear improvement over the current Web@pki, which continuously suffers from ineffective revocation mechanisms~@lets_encrypt_new_crl @crl_sets_effectiveness @reddit_ocsp_firefox. +Chrome does not check @ocsp or @crl:pl but relies on a custom summary called #emph("CRLSets"), which contains a (small) subset of all revoked certificates curated by Google~@chrome_crlsets. +In contrast, Firefox does still check @ocsp responses, but the CA/Browser forum changed its recommendation to support @ocsp in their @ca baseline requirements to optional @ocsp support in version 2.0.1, effective as of March 2024~@cab_ocsp_optional_crl_mandatory. As @ocsp entails high operational costs for @ca:pl, it is likely that @ocsp will further lose relevance. Let's Encrypt already announced to end their @ocsp support "as soon as possible"~@lets_encrypt_end_ocsp. -Instead, the CA/Browser forum tightens the requirements for @crl:pl and Mozilla is working on accumulating all revoked certificates into a small list called #emph("CRLite") since 2017, but did not enable this mechanism by default in Firefox as of version 132 from October 2024~@crlite_paper @mozilla_crlite. +Instead, the CA/Browser forum tightens the requirements for @crl:pl, and Mozilla has been working on accumulating all revoked certificates into a small list called #emph("CRLite") since 2017~@crlite_paper @mozilla_crlite. +However, as of version 132 from October 2024, Firefox does not enable this mechanism by default. // Furthermore, certificate transparency is built into @mtc, as opposed to the X.509 certificate infrastructure, where it was added later on. // #heading("Disadvantages", level: 4, outlined: false, numbering: none) A significant downside of @mtc compared to the classical certificate infrastructure is the longer issuance times. -There are two aspects to this: First, the issuance of the certificate itself takes up to `batch_duration` seconds, i.e., one hour assuming the default values, and second, the time the new tree heads propagate to a relevant number of @rp:pl. +There are two aspects to this: First, the certificate issuance itself takes up to `batch_duration` seconds, i.e., one hour assuming the default values, and second, the time the new tree heads propagate to a relevant number of @rp:pl. The first one will not make up for the major part of the difference in practice. -For both X509 and @mtc certificates, the @ca must validate the @ap has effective control over the domain beforehand. +For both X.509 and @mtc certificates, the @ca must validate beforehand that the @ap has effective control over the domain. This validation process often involves @dns record propagation or @http page propagation across multiple servers and data centers, especially for large-scale deployments~@lets_encrypt_challange_types @tls_issuance_delay. -Therefore, classical certificate issuance can take up to an hour as well, though in optimized configurations it can work a lot faster. +Therefore, classical certificate issuance can also take up to an hour, though in optimized configurations, it can work much faster. The second part, the propagation delay of new tree heads to the @rp:pl, is more relevant. -X.509 certificates are trusted by @rp:pl immediately after they are issued. -In contrast to that, verifying @mtc:pl requires the @rp to be up-to-date with the batch tree head. +X.509 certificates are trusted by @rp:pl immediately after issuing them. +In contrast, verifying @mtc:pl requires the @rp to be up-to-date with the batch tree head. In practice, we do not expect updates from the Transparency Service to the @rp to happen substantially more frequently than every six hours~@mtc_fallback_estimate. Therefore, the delay until a new @mtc is broadly usable may be up to a few days in the worst case. @@ -44,16 +45,16 @@ To determine how big the impact of the long issuance delay is, it is helpful to In such a situation, the Internet-Draft assumes the existence of a fallback mechanism for fast issuance. This could be an X.509 certificate or another, future mechanism that allows for fast issuance. The drawback of large certificate chains is only temporary, until @rp:pl updated their trust stores to incorporate the new tree heads, enabling them to utilize the size-efficient @mtc mechanism again. -There are two main reasons why a fast issuance is required; for a new domain and for an unplanned move of the domain. +There are two main reasons why a fast issuance is required: a new domain and an unplanned move of the domain. A scenario in which an expired certificate must be renewed quickly because of a forgotten, manual renewal is very unlikely, as @mtc requires a high level of automation anyway. -In @mtc_fallback_estimate, Heimberger estimates the likelihood of those fallbacks. -For that, she uses the fact that all certificates must be logged to a transparency log to be accepted by the major browser, which makes the analysis of all current and expired certificates possible. +In @mtc_fallback_estimate, L. Heimberger estimates the likelihood of those fallbacks. +For that, she uses the fact that all certificates must be logged to a transparency log to be accepted by the major browser, which allows her to analyze all current and expired certificates. Heimberger divided domains into two categories: Top domains and random domains. -This is interesting because the most visited websites are more likely to be well maintained than websites that are visited less often. -The analysis she performed potentially has a high rate of large positives, but it is useful to have an idea of the order of magnitude anyway. +This is interesting because the most visited websites are more likely to be well-maintained than those visited less often. +The analysis she performed potentially has a high rate of false positives, but it is useful to have an idea of the order of magnitude anyway. Assuming a propagation delay of three days, the top domains have a chance of 0.0004~% of hitting a fallback, while the random domains have a chance of 0.009~%. -This indicates that the chance of hitting a fallback is very unlikely, and thus the longer issuance delays will barely affect the daily operations. +This indicates that the chance of hitting a fallback is very unlikely, and thus, the longer issuance delays will barely affect the daily operations. // - Not a replacement, but an optimization // - Reduced Scope @@ -77,39 +78,39 @@ This indicates that the chance of hitting a fallback is very unlikely, and thus // Correction: Root certs are typically not sent. There may be multiple certificates with the same CN #emoji.face.explode. On a large scale, every byte saved during a @tls handshake is a relevant reduction, as a handshake takes place before almost every connection. -Cloudflare published some notable statistics regarding the number of bytes transferred from server to client. +Cloudflare published a notable statistic regarding the number of bytes transferred from server to client. Their statistic only considers QUIC connections, as they likely originate from browsers. -This fits nicely, as the @mtc architecture is mainly designed for browser-like applications as well. -For non-resumed QUIC connections, the median number of transferred bytes is 7.8~kB and the average is 395~kB. +This fits nicely, as the @mtc architecture is also mainly designed for browser-like applications. +For non-resumed QUIC connections, the median number of transferred bytes is 7.8~kB, and the average is 395~kB. The big difference between the median and average indicates that a few data-heavy connections heavily influence the average, while there is a high volume of small connections. -This allows the rough estimate that about 40~% of the bytes transferred from the server to the client are for the certificate chain in at least half of the non-resumed QUIC connections. +This allows a rough estimate that about 40~% of the bytes transferred from the server to the client are for the certificate chain in at least half of the non-resumed QUIC connections. Therefore, we investigate the main improvement of @mtc over classical X.509 certificates in this section, namely the size reduction of the @tls handshake. -Initially, we focus on the authentication related cryptographic material exchanged during the handshake. -This means, we do not include the bytes that encode the domain name, key usage constraints, validity timestamps, and similar. -We do also ignore the bytes required to establish a shared key used for the record layer, which is used for the encryption and authentication of the payload messages. +Initially, we focus on the authentication-related cryptographic material exchanged during the handshake. +This means we do not include the bytes that encode the domain name, key usage constraints, validity timestamps, and similar. +We also ignore the bytes required to establish a shared key for the record layer, which encrypts and authenticates the payload messages. Hence, an X.509 handshake contains the following components: -One signature for active authentication of the handshake, two signatures for @sct:pl, optionally one signature for an @ocsp staple, one signature of the intermediate @ca on the @ee certificate, and one signature of the root @ca on the intermediate @ca. -In addition, the @ee and intermediate certificate contain one public key each. +One signature for active handshake authentication, two signatures for @sct:pl, optionally one signature for an @ocsp staple, one signature of the intermediate @ca on the @ee certificate, and one signature of the root @ca on the intermediate @ca. +In addition, the @ee and intermediate certificates contain one public key each. The root certificate is typically not sent to the @rp, as it is expected to know it already. -Summing this up, we count six signatures and two public keys. +To sum this up, we counted six signatures and two public keys. The last case in @tab:x509_size, marked in yellow, is a special case. -It uses @kemtls and therefore sends a key encapsulation instead of a signature and stores the public key of the @kem in the certificate instead of a public key for signature generation. +It uses @kemtls and, therefore, sends a key encapsulation instead of a signature and stores the public key of the @kem in the certificate instead of a public key for signature generation. For our analysis, we ignore this fact, as it serves the same objective, namely actively authenticating the handshake. -@tab:x509_size contains one optimistic and one conservative but realistic estimate for each, a @pq and non @pq secure setup. +@tab:x509_size contains one optimistic and one conservative but realistic estimate for each, a @pq and non-#gls("pq")-secure setup. Additionally, it contains one setup for @kemtls. The optimistic estimate assumes the usage of 256-bit @ecdsa signatures and keys across the whole chain. About 24~% of all currently active certificates are issued for an @ecdsa key, with about 53~% using a 384-bit and 47~% using a 256-bit key length. The remaining 76~% of all current @ee certificates use an RSA algorithm. @merkle_town For the root @ca:pl stored in the Firefox root program, the numbers are a bit different. 44~% (78) use a 4096-bit RSA key, 26~% (46) use a 2048-bit RSA key, 27~% use a 384-bit @ecdsa key and only 2~% (4) use a 256-bit @ecdsa key @firefox_root_store. -Without telemetry data from browsers, it is not possible to judge which are the most common combinations just from the percentage of certificates issued and the configuration of root @ca:pl, as there is a big imbalance on which @ca:pl and certificates are heavily used and which are not. +Without browser telemetry data, it is not possible to judge the most common combinations just from the percentage of certificates issued and the configuration of root @ca:pl, as there is a big imbalance between which @ca:pl and certificates are heavily used and which are not. We tried to get an impression by manually checking the certificate chains for the top 10 domains according to Cloudflare Radar @cloudflare_radar_domains. The results in @tab:top_10_signatures show that the landscape of used signatures is diverse. The significance is very limited, though, as five of the ten top domains do not serve a website and are purely used for @api calls or to serve static assets like pictures and videos (root-servers.net, googleapis.com, gstatic.com, tiktokcdn.com, amazonaws.com). -The remaining five domains serve a website, but it seems likely that the majority of calls still originates from @api calls, which may use different certificate chains. -Moreover, the server may adopt the presented certificate based on a specific subdomain, the user agent, and other factors, which further complicates a holistic view. +The remaining five domains serve a website, but it seems likely that the majority of calls still originate from @api calls, which may use different certificate chains. +Moreover, the server may adopt the presented certificate based on a specific subdomain, the user agent, and other factors, further complicating a holistic view. Nevertheless, we are convinced that the chosen combinations represent adequate examples. The signatures for @sct:pl is more uniform. @@ -122,20 +123,20 @@ From the number of columns, it becomes obvious that a @mtc contains way less asy The certificate contains a single key used to protect the integrity of the handshake. Together with the length of the inclusion proof, they determine the size of the authentication related cryptography. The size of the inclusion proof logarithmically depends on the number of certificates in a batch. -To estimate the size for the inclusion proof, we checked the number of active certificates for the biggest @ca, Let's Encrypt. -According to their statistics, there exists about 420 million active certificates in October 2024, which matches with observations based on certificate transparency logs @merkle_town @lets_encrypt_stats. +To estimate the size of the inclusion proof, we checked the number of active certificates for the biggest @ca, Let's Encrypt. +According to their statistics, there were about 420 million active certificates in October 2024, which matches observations based on certificate transparency logs~@merkle_town @lets_encrypt_stats. The logs further show that there are around one billion active certificates in total. For the first estimate of the proof length, we take Let's Encrypt's recommendation to renew certificates every 60 days. Knowing that certificates issued by Let's Encrypt are always valid for 90 days, we can deduce that there exist around $420 dot 10^9 dot 60/90 = 280 dot 10^9$ authenticating parties using the services of Let's Encrypt. In a @mtc setup, @ap:pl are recommended to renew their certificates every ten days. Assuming that a batch lasts for one hour, each batch contains $(280 dot 10^9)/(10 dot 24) = 1.16 dot 10^9$ certificates. To accommodate this number of assertions, the Merkle Tree requires $ceil(log_2 1.16 dot 10^9) = 21$ level, resulting in a proof length of 21 hashes. -The current draft only allows #gls("sha")-256 as hashing algorithm and also future iterations are unlikely to extend the length of the digest, even if changing the algorithm. +The current draft only allows #gls("sha")-256 as the hashing algorithm, and future iterations are unlikely to extend the length of the digest, even if the algorithm is changed. Therefore, the proof length for this scenario is $21 dot 32 "bytes" = 672 "bytes"$. -The second scenario indicates a worst-case scenario, assuming a big increase in @ap:pl or centralization to few certificate authorities. +The second scenario indicates a worst-case scenario, assuming a big increase in @ap:pl or centralization to a few certificate authorities. We map the one billion currently active certificates to one billion @ap:pl, which is very conservative as it ignores the transition periods, which we considered in the first scenario. -Assuming again that each @ap renews their certificate every ten days and a batch size of one hour, the above calculation results in a proof length of $832 "bytes"$. +Assuming again that each @ap renews its certificate every ten days and that the batch size is one hour, the above calculation results in a proof length of $832 "bytes"$. It is interesting to realize that for every doubling of @ap:pl, the proof size grows by 32 bytes, as the tree depth grows logarithmically. // - 78 RSA 4096 bits @@ -169,8 +170,8 @@ It is interesting to realize that for every doubling of @ap:pl, the proof size g Comparing @tab:x509_size and @tab:bikeshed_size reveals the size advantages of @mtc, especially when using @pq algorithms. Focusing on the classical case first: In the best X.509 case, when using only 256-bit @ecdsa for all signatures, @mtc performs slightly worse in terms of the number of authentication bytes. -While the X.509 case requires 448 authentication-related bytes, @mtc requires 768~bytes, which is an absolute difference of 320~bytes, i.e., the X.509 certificate requires 41.67~% fewer bytes than the @mtc. -Comparing @mtc to a mostly #gls("rsa", long: false)-based certificate, @mtc demonstrates it advantages, as the X.509 certificate grows to 1,728~bytes. +While the X.509 case requires 448 authentication-related bytes, @mtc requires 768~bytes, an absolute difference of 320~bytes, i.e., the X.509 certificate requires 41.67~% fewer bytes than the @mtc. +Comparing @mtc to a mostly #gls("rsa", long: false)-based certificate, @mtc demonstrates its advantages, as the X.509 certificate grows to 1,728~bytes. An @mtc certificate with the same #gls("rsa", long: false)-2048-bit algorithm and the conservative estimate of one billion active @ap:pl requires 384~bytes, or 22~% less. Moving on to the @pq algorithms, the drastic improvement of @mtc shows up. @@ -178,7 +179,7 @@ Compared to the best X.509 case using only @mldsa signatures, @mtc saves 12,740~ Moreover, it seems realistic that a @ca would use @slhdsa instead of @mldsa due to its higher security guarantees. This further increases the advantage of @mtc to 80.05~% or 79.79~%, saving 18,176 or 18,016 bytes, respectively. When replacing the @mldsa key and signature with @mlkem, the handshake is 1,460 bytes smaller, independent of @mtc or X.509. -Nevertheless, the relative gain of @kemtls is bigger for @mtc as it exchanges fewer bytes in the baseline scenario. +Nevertheless, the relative gain of @kemtls is bigger for @mtc, as it exchanges fewer bytes in the baseline scenario. // - In the best classical case, X.509 contains less authentication bytes // - Compared to a realistic setup, @mtc with classical crypto already saves about 1000 bytes. @@ -190,10 +191,10 @@ Nevertheless, the relative gain of @kemtls is bigger for @mtc as it exchanges fe // TODO Mention that there are also other size improvements due to the Bikeshed certificate format In addition to size improvements related to authentication cryptography, @mtc brings additional size improvements by using a new certificate format. X.509 is based on @asn1 and certificates are transferred in @der encoding. -The @mtc Internet-Draft defines a new certificate format called Bikeshed certificate. +The @mtc Internet-Draft defines a new certificate format called #emph[Bikeshed] certificate. The name is meant as a placeholder, and the authors aim to replace it before it potentially becomes a standard. -@der uses a type-length-value encoding, meaning that each value in the certificate explicitly has a type and length encoded. -The encoding of @mtc, on the contrary, is more efficient because types and lengths of fields are implicit, i.e., fixed, where possible. +@der uses a type-length-value encoding, meaning that each value in the certificate explicitly has a type and length encoded. +On the contrary, the encoding of @mtc is more efficient because the types and lengths of fields are implicit, i.e., fixed, where possible. Besides the encoding, the Bikeshed certificate type saves bytes by omitting information that is superfluous in the new setting. The following fields are not stored in a Bikeshed certificate, that are not already covered by the size considerations above: - Not before timestamp @@ -205,36 +206,36 @@ The following fields are not stored in a Bikeshed certificate, that are not alre - Subject and authority key identifier To give an example: -The certificate chain for `www.google.com`#footnote([SHA-256 fingerprint \ `37:9A:80:C9:25:2C:66:A1:BB:89:D6:C0:C8:83:33:39:55:1D:E6:0F:D3:75:58:5C:F9:A3:18:37:03:57:A0:D6`]) has 2,486 bytes in @der format. +The certificate chain for `www.google.com`#footnote([SHA-256 fingerprint \ `37:9A:80:C9:25:2C:66:A1:BB:89:D6:C0:C8:83:33:39:55:1D:E6:0F:D3:75:58:5C:F9:A3:18:37:03:57:A0:D6`]) has 2,486 bytes in the @der format. The chain contains 256-bit ECDSA, RSA-2048, and RSA-4096 bit keys and signatures. -Summing them up, the authentication related bytes transmitted in the certificate chain result in 1,248~bytes. +Summing them up, the authentication-related bytes transmitted in the certificate chain result in 1,248~bytes. Note that this does not contain the @ocsp staple or handshake signature included in @tab:x509_size as they are not included in the certificate chain itself. -In comparison, a comparable Bikeshed certificate with a 256-bit ECDSA key would contain 704 authentication related bytes, assuming 280 million active @ap:pl. +In comparison, a comparable Bikeshed certificate with a 256-bit ECDSA key would contain 704 authentication-related bytes, assuming 280 million active @ap:pl. The full certificate would be 785 bytes in size. -Thus, the X.509 certificate chain has an overhead of 1,238 bytes or 99~% while the Bikeshed certificate has an overhead of 81 bytes or 12~%. +Thus, the X.509 certificate chain has an overhead of 1,238 bytes, or 99~%, while the Bikeshed certificate has an overhead of 81 bytes, or 12~%. Even though we only analyzed a single example that closely, this indicates that the X.509/@asn1 format produces a significant overhead, that can be reduced by introducing a new certificate format. -An analysis of the certificate chains of the top websites provides shows that certificates are often even bigger than our example. +An analysis of the certificate chains of the top websites shows that certificates are often even bigger than our example. #cite(, form: "author") investigated the size of certificate chains sent by roughly 75,000 of the Tranco top sites, "A Research-Oriented Top Sites Ranking Hardened Against Manipulation"~@tranco. It reveals that the 5#super[th] percentile of certificate chains is 2308~bytes big, and the median certificate chain has even 4032~bytes. Applying existing certificate compression algorithms, this reduces to 1619~bytes and 3243~bytes, respectively~@dennis_cert_size. -Consequently, even though X.509 certificates require less authentication-related bytes if they completely rely on the size efficient @ecdsa algorithm, the inclusion of additional attributes and inefficient encoding result in that @mtc is smaller in practice; for @pq algorithms, but also for classical algorithms. +Consequently, even though X.509 certificates require fewer authentication-related bytes if they completely rely on the size-efficient @ecdsa algorithm, the inclusion of additional attributes and inefficient encoding result in @mtc being smaller in practice, not only for @pq algorithms but also for classical algorithms. == Update Mechanism Considerations As with many optimizations, one does not get the results from @sec:certificate_size without a trade. The @mtc architecture requires the @rp to regularly update the tree heads it trusts, as shown in Step 5 of @fig:mtc_overview. To pull the updates, the @rp regularly requires a connection to the Transparency Service. -This update mechanism is the reason the @mtc architecture cannot replace all X.509 based @pki:pl. +This update mechanism is the reason the @mtc architecture cannot replace all X.509-based @pki:pl. For the @tls use cases, the updates are feasible. There are a few common use cases for @tls: For public websites served via @https and visited by a browser, the browser requires a connection to the internet anyway. For public @api:pl, the connecting @rp requires an internet connection as well. -For services hosted in a corporate network that does not allow connections to the public internet, it should not be a problem either. -In this case, corporate networks should use an internal @ca, instead of relying on public @ca:pl. -On the contrary, non @tls use cases that rely on X.509 certificates are not covered by the @mtc architecture, which is also clearly stated in the Internet-Draft. +// It should not be a problem for services hosted in a corporate network that does not allow connections to the public Internet, either. +// In this case, corporate networks should use an internal @ca, instead of relying on public @ca:pl. +On the contrary, non-@tls use cases that rely on X.509 certificates are not covered by the @mtc architecture, which is also clearly stated in the Internet-Draft. For example, smart cards could not regularly update the stored certificates, as they do not have an internet connection. Signing documents or code would not work either, as an entity verifying the signature would need to remember all trusted batch tree heads of the past. -In other words, validating the signature produced by the certified key should happen temporally close to the certificate issuance, in the order of few weeks at most. +In other words, validating the signature produced by the certified key should happen temporally close to the certificate issuance, in the order of a few weeks at most. // - @rp:pl have to be updated regularly. // - Requires a constant connection to the internet and validation of the certificate temporally close to certificate issuance @@ -247,45 +248,46 @@ In other words, validating the signature produced by the certified key should ha For the use cases that allow a regular update, an important metric for the update mechanism is the amount of data that needs to be transferred from the Transparency Service to the @rp:pl. We base this estimation on a web surfing use case with the following assumptions: We assume 150 trusted root @ca:pl, which is somewhere between the number of @ca:pl currently in the root store of Firefox and Chrome @firefox_root_store @chrome_root_store. -Furthermore, we assume each @ca uses a batch duration of one hour and lifetime of 14 days, as recommended in the Internet-Draft @rfc_mtc[Section 5.1]. -According to a recent post by O’Brien, working in the Chrome Security team at Google, Chrome strives for an update frequency of six hours or less~@mtc_fallback_estimate. +Furthermore, we assume each @ca uses a batch duration of one hour and a lifetime of 14~days, as recommended in the Internet-Draft~@rfc_mtc[Section 5.1]. +According to a recent post by D. O’Brien, working in the Chrome Security team at Google, Chrome strives for an update frequency of six hours or less~@mtc_fallback_estimate. Therefore, we assume six hours as the default browser update frequency for @mtc tree heads. Lastly, we assume each @ca to use #gls("slhdsa")-128s to sign their validity window as the security guarantees for this algorithm are better compared to @mldsa, which is relevant for a long-lasting key. In addition to the basic assumptions, the update size depends on what exactly a @rp pulls from the Transparency Service. The straightforward way is to regularly pull all signed validity windows of all trusted root @ca:pl. -Each validity window contains 7,856 bytes for the signature, 4 bytes for the batch number, and $24 dot 14 dot 32 = 10,752$ bytes for the three heads. -Multiplying this with 150 trusted @ca:pl, each update transfers around 2.8 Megabyte, independent of the update cadence. +Each validity window contains 7,856 bytes for the signature, 4~bytes for the batch number, and $24 dot 14 dot 32 = 10,752$~bytes for the three heads. +Multiplying this with 150 trusted @ca:pl, each update transfers around 2.8 Megabytes, independent of the update cadence. As an optimization, the transfer could only contain the tree heads that the @rp does not know yet. This reduces the bytes transferred for the tree heads to $6 dot 32 = 192$ bytes if a @rp updates exactly every six hours. The signature would match the most recent batch number transferred, as it covers all valid batch tree heads anyway. -In other words: The Transparency Service does not need to transfer one signature for each batch tree head, but only one per update per @ca. +In other words, the Transparency Service does not need to transfer one signature for each batch tree head but only one per update per @ca. Together, this results in $150 dot (7,856 + 4 + 192) approx 1.2$~megabyte for each update every six hours. During a day, that accumulates to 4.8 Megabytes per @rp. -A more extreme optimization requires full trust into the update mechanism and Transparency Service. +A more extreme optimization requires full trust in the update mechanism and Transparency Service. In such circumstances, the update can omit the @ca signatures and save significant update bandwidth that way. For a six-hour update interval, each update contains $150 dot (4 + 192) = 29.4$ kilobytes, adding up to 117.6 Kilobytes per day. Compared to transferring the signatures, this saves 97.6~% in update bandwidth. The shorter the update interval, the more advantageous it is to omit the signature, as it needs to be transferred once per update. As mentioned, omitting the @ca signatures requires trust in the Transparency Service and update mechanism. -It is important to note that the Transparency Service that the browser uses to retrieve its updates is likely operated by the browser vendor. +It is important to note that the browser vendor likely operates the Transparency Service that the browser uses to retrieve its updates. In practice, users must trust their browser vendor in the first place to not build in any backdoors or install untrusted @ca:pl. To mitigate potential damage from this trust relation, a browser vendor could set up a verifiable, transparent update log that all updates must be pushed to before they can be installed by the browser. -A similar setup -- namely firmware transparency -- is described as part of the Tillian project containing software to build a transparency log, mostly used for #gls("ct", long: true) @trillian_firmware_transparency. +A similar setup -- namely firmware transparency -- is described as part of the Tillian project containing software to build a transparency log, mostly used for #gls("ct", long: true)~@trillian_firmware_transparency. However, the precise realization is not straightforward as the present transparency log implementations rely on classical signatures such as @rsa or @ecdsa. Additionally, the mechanism to bootstrap the updates requires some engineering, as it cannot be assumed that the browser knows recent @mtc roots that could be used to set up a #gls("tls")-based update connection. Potentially, the update mechanism requires large X.509 certificates with @pq cryptography, at least in some cases. -All the updates sizes scale linearly with the number of active @ca:pl. -This means, if we assume only 15 @ca:pl that support the @mtc architecture, we can reduce the estimates on the update size for all scenarios by 90~% to 280~kilobyte for a full update, 120~kilobytes for an update every six hours including @ca signatures, and only three kilobytes for an update every six hours without the @ca signatures. +All the update sizes scale linearly with the number of active @ca:pl. +This means that if we assume only 15 @ca:pl that support the @mtc architecture, we can reduce the estimates on the update size for all scenarios by 90~% to 280~kilobyte for a full update, 120~kilobytes for an update every six hours including @ca signatures, and only three kilobytes for an update every six hours without the @ca signatures. @tab:mtc_update_size provides an overview of the numbers mentioned in the above text. Assuming only 15~@ca:pl is likely a reasonable estimate, as only the biggest @ca:pl are presumably willing to invest the necessary resources in such a fundamental change. @mtc serves purely as an optimization of the existing X.509 ecosystem. Thus, small @ca:pl that do not participate can still issue working certificates. -These @ca:pl often exist because of various industry and government policies, which are generally covered by X.509 certificates and will likely not update to include @mtc for the foreseeable feature. +These @ca:pl often exist because of various industry and government policies, which are generally covered by X.509 certificates. +For the foreseeable feature, these policies will likely not be updated to include @mtc. #figure( update_mechanism_size, @@ -294,37 +296,38 @@ These @ca:pl often exist because of various industry and government policies, wh To evaluate the size estimates, it is helpful to set them in relation to the bandwidth required for browser updates nowadays. Therefore, we inspected the update size and frequency for Chrome and Firefox. -@tab:chrome_release and @tab:firefox_release in the Appendix provide an overview about the recent release cadence for Chrome and Firefox, respectively. -On average, Chrome releases a minor update every week, and a major release roughly every four weeks. -Likewise, Firefox releases a major update every four weeks, but the minor updates are slightly less frequent with ten days in between on average. -For Firefox, @tab:firefox_release contains also the update sizes for subsequent updates, which average to 13.2~MB. +@tab:chrome_release and @tab:firefox_release in the Appendix provide an overview of the recent release cadence for Chrome and Firefox, respectively. +On average, Chrome releases a minor update every week and a major release roughly every four weeks. +Likewise, Firefox releases a major update every four weeks, but the minor updates are slightly less frequent, with ten days in between on average. +For Firefox, @tab:firefox_release also contains the update sizes for subsequent updates, averaging to 13.2~MB. At the same time, Google claims that a minor update takes approximately 3~MB to 5~MB and a major update takes approximately 10~MB to 15~MB~@chrome_update_size. As a result, we assume that the update bandwidth per Firefox user is about 1.3~MB on a daily average. Similarly, the update bandwidth per Chrome user is about $3 dot 4 "MB" + 1 dot 13 "MB" = 25 "MB"$ per 28~days, i.e., approximately 900~kB per day. -Comparing the bandwidth for current browser updates with the bandwidth for the tree heads reveals that the updates are of reasonable size, especially in the early days of an @mtc ecosystem. +Comparing the bandwidth for current browser updates with the bandwidth for the tree heads reveals that the updates are of reasonable size. +// , especially in the early days of an @mtc ecosystem. Starting with the first row of @tab:mtc_update_size, it becomes clear that the naive update mechanism to send the full validity window including the signature is impractical for 150 active @ca:pl. -For a six-hour update cadence, the update mechanism needs to transfer 11~MB per user and day, which is a full order of magnitude larger than the present update mechanism transfers. -For a smaller ecosystem with 15 @ca:pl even this naive approach could work out. +For a six-hour update cadence, the update mechanism must transfer 11~MB per user and day, a whole order of magnitude more than the present update transfers. +For a smaller ecosystem with 15 @ca:pl, even this naive approach could work out. The update capacity for the @mtc update mechanism would be similarly dimensioned as for the present browser update. In other words, the update bandwidth would need to be doubled. -Nevertheless, the second row shows that the straightforward optimization to distribute only data unknown to the client reduces the required bandwidth significantly. -While an @mtc ecosystem with 150 @ca:pl would still require about five times more bandwidth, an ecosystem with 15 @ca:pl adds only 50~% more update bandwidth to the already existing update mechanisms. -Additionally, the daily update sizes in @tab:mtc_update_size assume that an update really happens every six hours. -In practice, the schedule might aim for this update frequency, but updates will not happen that frequently, for example because a computer is turned off during the night, or because of a metered connection. +Nevertheless, the second row shows that the straightforward optimization of distributing only data unknown to the client significantly reduces the required bandwidth. +While an @mtc ecosystem with 150 @ca:pl would still require about five times more bandwidth, an ecosystem with 15 @ca:pl adds only 50~% more update bandwidth to existing update mechanisms. +Additionally, the daily update sizes in @tab:mtc_update_size assume that an update actually happens every six hours. +In practice, the schedule might aim for this update frequency, but updates will not happen that frequently, for example, because a computer is turned off during the night or because of a metered connection. This results in less data being transferred over the day. The third row, which moves the signature checks to the Transparency Service, shows that the update size can be marginal. Even for 150~@ca:pl, this scenario adds only 9~% to the regular Firefox update bandwidth. -As discussed earlier, this requires trust in the Transparency Service and update mechanism. -As the Transparency Service will likely be operated by the browser vendor, there already exists a trust relation, such that omitting the signature checks in the client device seems reasonable. +As discussed, this requires trust in the Transparency Service and update mechanism. +As the browser vendor will likely operate the Transparency Service, there already exists a trust relation, such that omitting the signature checks in the client device seems reasonable. -One additional interesting observation, when reflecting on the update sizes, is to compare it with the data transferred for an ordinary page visit. -Therefore, we visited four pages we assumed for a daily usage and measured the transferred data for an initial page load without user interaction while being logged in already. -The results are not representative, but nevertheless serve as an indication. -Loading the webpage of Gmail and Outlook transferred 6.6~MB and 10.6~MB, respectively. +When reflecting on the update sizes, one additional interesting observation is to compare them with the data transferred for an ordinary page visit. +Therefore, we visited four pages we assumed for daily usage and measured the transferred data for an initial page load without user interaction while already logged in. +The results are not representative but nevertheless serve as an indication. +Loading the web pages of Gmail and Outlook transferred 6.6~MB and 10.6~MB, respectively. Loading The New York Times webpage (not logged in) and Google search transferred 2.7~MB and 0.8~MB, respectively. -This demonstrates that even the worst-case scenario with a full update of 150~@ca:pl every six hours transfers just as many data as a single page visit at Outlook, and should therefore not cause serious problems from an end-user perspective. +This demonstrates that even the worst-case scenario with a full update of 150~@ca:pl every six hours transfers just as much data as a single page visit at Outlook and should, therefore, not cause serious problems from an end-user perspective. @@ -342,34 +345,34 @@ This demonstrates that even the worst-case scenario with a full update of 150~@c // - Times the number of CAs: $150 dot 18,608 = 2,791,200 => 2.7 "MB"$ // - Daily update without the signatures: == Common File Structure -Besides small update sizes, it is desirable to store @mtc related data on a common place on an @os. +Besides small update sizes, it is desirable to store #gls("mtc")-related data in a common place on an @os. Having a common place for certificates on a single machine has multiple advantages. Firstly, it reduces the number of updates required in the @mtc architecture. Instead of every application pulling its updates, the @os can take care of it for various applications that depend on up-to-date tree heads. Furthermore, applications do not have to implement the update logic themselves. -This does save development resources and reduces the attack surface as there exist fewer different implementations. +This saves development resources and reduces the attack surface as there exist fewer different implementations. -Nowadays, Linux based operating systems such as Debian, RHEL, or Android store certificates on a well-known location for other programs to access it @go_root_store. +Nowadays, Linux-based operating systems such as Debian, RHEL, or Android store certificates in a well-known location for other programs to access~@go_root_store. // Debian, as an example, provides the trusted root certificates as a normal system package, which can be updated with the built-in package manager @debian_ca_certificates. We use the X.509 file structure of Debian as an inspiration to bring up a common file structure. @fig:mtc_client_file_tree shows the file structure we propose for a @rp. The absolute path (`/etc/ssl/mtc`) might vary per distribution. The structure thereafter is more interesting. -We suggest that each @ca lives in its own subdirectory, with the Issuer ID as the directory name. -The Issuer ID for @mtc:pl is an @oid, so directory names would look like `123.54.2`. -The directory contains the @ca parameters, the root hashes of the validity window and optionally the signature of the validity window. -As mentioned above, the signature is not necessary if the @rp trusts the Transparency Service and update mechanism. -In this case, the Transparency Service is not operated by a browser vendor, but maybe by the @os vendor. -Still, the argument remains that a user needs to trust its @os vendor either way and may therefore skip synchronizing the signature. +We suggest that each @ca lives in its subdirectory, with the Issuer ID as the directory name. +The Issuer ID for @mtc:pl is an @oid so that directory names would look like `123.54.2`. +The directory contains the @ca parameters, the validity window's root hashes, and, optionally, the validity window's signature. +As mentioned above, the signature is unnecessary if the @rp trusts the Transparency Service and update mechanism. +In this case, the Transparency Service is not operated by a browser vendor but may be operated by the @os vendor. +Still, the argument remains that a user needs to trust its @os vendor regardless and may, therefore, skip synchronizing the signature. In the proposed structure, the validity window contains the same data as specified in the Internet-Draft, namely the batch number and the hashes of all valid tree heads. The @ca parameters contain the following information: - The issuer ID, i.e., the @oid of the @ca. - The signature scheme used to sign the validity windows. - The public key of the @ca. It must match the signature scheme. -- The proof type used for inclusion proof in the certificates. As of now, the only option is a #gls("sha")-256 based Merkle Tree inclusion proof. +- The proof type used for inclusion proof in the certificates. Currently, the only option is a #gls("sha")-256-based Merkle Tree inclusion proof. - The start time of the @ca, i.e., the time the @ca was set up. This is required to calculate the validity of a certificate based on its batch number. -- The batch duration. This is required to calculate the validity of a certificate based on its batch number as well. -- The validity window size. Again, This is required to calculate the validity of a certificate based on its batch number. +- The batch duration. This is also required to calculate a certificate's validity based on its batch number. +- The validity window size. Again, this is required to calculate a certificate's validity based on its batch number. #figure( @@ -377,15 +380,15 @@ The @ca parameters contain the following information: caption: [Proposed file structure on a @rp. The signature only exists on @rp:pl that are willing to perform the @pq signature check themselves. The public key of the @ca is part of the @ca parameters.] ) -For a server setup, it is likely not as important to aim for a homogeneous file structure. +For a server setup, aiming for a homogeneous file structure is likely not as important. Nevertheless, it is worth making explicit what data is required by an @ap to function in the @mtc architecture. @fig:mtc_server_file_tree provides an example file structure that could be used. As for the file structure of the @rp, we propose to create one directory per Issuer ID. Within that, the valid @mtc certificates are stored, named as `.mtc`. -Adhering to the recommended parameters, there are either one or two valid certificates at a time, because of the overlapping of old and new certificates. -For the @ap, the only relevant information from the @ca parameters are the start time, batch duration, and validity window size to be able to calculate if a certificate is expired. -The Issuer ID is included in the certificates, but it is likely handy to include it in the @ca parameters nevertheless. -We propose to keep the format for the @ca parameters the same for @rp and @ap, such that they can share the same parser logic. +Adhering to the recommended parameters, there are either one or two valid certificates at a time because old and new certificates overlap. +For the @ap, the only relevant information from the @ca parameters are the start time, batch duration, and validity window size, which can be used to calculate whether a certificate has expired. +The Issuer ID is included in the certificates, but it is likely handy to include it in the @ca parameters, nevertheless. +We propose keeping the format for the @ca parameters the same for @rp and @ap so that they can share the same parser logic. At the same time, storing some information on the @ap that is not strictly required, does not seem to entail significant downsides. @@ -397,12 +400,12 @@ At the same time, storing some information on the @ap that is not strictly requi // - The signature over the validity window has the advantage that a CA would need to keep a split view over the whole window instead of for a single batch. See https://github.com/davidben/merkle-tree-certs/issues/84. -== CPU Usage -The previous sections spend attention on the bytes that need to be transferred for a #gls("pq")-secure server authentication. +== CPU Usage +The previous sections focused on the bytes that need to be transferred for a #gls("pq") secure server authentication. This section focuses on the computation required for server authentication in both systems, the classical, X.509 based and the @mtc based. -A low computational effort is beneficial for client devices, even though most have sufficient resources for complex computations. +A low computational effort benefits client devices, even though most have sufficient resources for complex computations. Nevertheless, battery-powered devices may last longer and the available computing power can be used for different tasks. -For servers, which often handle numerous @tls connections, the computational efficiency is important as well, as they may need to be equipped with more powerful and therefore expensive hardware if @tls handshakes are significantly more laborious. +For servers, which often handle numerous @tls connections, computational efficiency is also important, as they may need to be equipped with more powerful and, therefore, expensive hardware if @tls handshakes are significantly more laborious. We fund our estimates on the SUPERCOP project. SUPERCOP is an acronym for #emph[System for Unified Performance Evaluation Related to Cryptographic Operations and Primitives]. @@ -410,10 +413,10 @@ SUPERCOP publishes a database with benchmarks for various cryptographic primitiv All performance metrics we use were measured on the same machine with an AMD Ryzen~7~7700 with eight CPU cores at 3.8~GHz. Unfortunately, there are no metrics for the final @pq signature algorithms available in the database yet. Therefore, we used the benchmarks of the corresponding, preliminary algorithm versions. -For example, we used the metrics from Dilithium with level two security parameters from the third round of the @nist post quantum competition instead of #gls("mldsa")-44. +For example, we used the metrics from Dilithium with level two security parameters from the third round of the @nist post-quantum competition instead of #gls("mldsa")-44. -It quickly becomes clear that a client verifying an #gls("mtc")-based server authentication requires fewer signature verifications compared to an X.509-based server authentication. -To verify an X.509 certificate chain, the client must typically verify two @sct:pl, maybe an @ocsp staple, one signature in the @ee certificate and one signature in the intermediate certificate. +It quickly becomes clear that a client verifying an #gls("mtc")-based server authentication requires fewer signature verifications than an X.509-based server authentication. +To verify an X.509 certificate chain, the client must typically verify two @sct:pl, maybe an @ocsp staple, one signature in the @ee certificate, and one signature in the intermediate certificate. Additionally, the client must verify the handshake signature. To verify an @mtc certificate, the client must traverse up the Merkle Tree up to the root node, but does not require any asymmetric cryptography, assuming the @ca signature was verified either ahead of time or by the Transparency Service. Just as for the X.509 system, the client must verify the handshake signature nevertheless. @@ -422,18 +425,18 @@ To estimate computational costs associated with the tree traversal, we assume a The 21 or 26 level correspond to 280 million or one billion active @ap:pl for a single @ca, as described in @sec:certificate_size. @tab:x509_cpu_cyles approximates the CPU cycles required for verifying an X.509 certificate chain with the same parameters as in @sec:certificate_size. -A first observation is that the verification of @rsa signatures is less computationally expensive than the verification of @ecdsa signatures. +A first observation is that verifying @rsa signatures is less computationally expensive than verifying @ecdsa signatures. Possibly more surprising is that the @pq secure @mldsa signature verification is less expensive than an @ecdsa signature verification. Therefore, a certificate chain using @mldsa for all signatures requires only 27~% of the computation of a fully #gls("ecdsa")-based certificate chain. -Moreover, both @pq scenarios are less computationally expensive for a client compared to the classical scenarios. +Moreover, both @pq scenarios are less computationally expensive for a client than the classical scenarios. Nevertheless, using the @mtc architecture additionally decreases the computational costs for a client. @tab:mtc_cpu_cyles displays the approximated CPU cycles required for validating an @mtc with the same parameters as in @sec:certificate_size. As mentioned, only a single signature verification for the handshake is required. -The second variable which determines the computational cost is the number of active @ap:pl for a @ca. +The second variable that determines the computational cost is the number of active @ap:pl for a @ca. Comparing an @ecdsa X.509 certificate chain with an @mtc that holds an @ecdsa key, reveals that the @mtc uses only 19~% of the computation the certificate chain requires. -Comparing the same @ecdsa certificate chain with an @mtc with 280 million active @ap:pl, shows that the @mtc case uses only 6.8~% of the computation the certificate chain requires. -Moreover, comparing the @pq use-cases with each other, we observe a reduction of 73~% to 85~% in the advantage of @mtc. +Comparing the same @ecdsa certificate chain with an @mtc with 280 million active @ap:pl shows that the @mtc case uses only 6.8~% of the computation the certificate chain requires. +Moreover, comparing the @pq use cases with each other, we observe a reduction of 73~% to 85~% in the advantage of @mtc. #figure( x509_cpu_cycles, diff --git a/5_contributions.typ b/5_contributions.typ index 73dcbd2..850c749 100644 --- a/5_contributions.typ +++ b/5_contributions.typ @@ -1,7 +1,7 @@ #import "imports.typ": * #import "figures.typ": * -= Development Insights += Development Insights #figure( implementation, @@ -9,7 +9,7 @@ ) // As part of this work, we implemented parts of the @mtc system and contributed to the standardization process. -The objective of this work was to establish an #gls("mtc")-based @tls connection between an @ap and @rp for the first time, and to contribute enhancements or address errors or ambiguities in the @mtc Internet-Draft along the way. +The objective of this work was to establish an #gls("mtc")-based @tls connection between an @ap and @rp for the first time and to contribute enhancements or address errors or ambiguities in the @mtc Internet-Draft along the way. @fig:implementation provides an overview of the implemented components and their interactions. The Transparency Service and Monitor are grayed out as we bypass them for this proof-of-concept setup and instead copy over the validity window and signature directly from the @ca. As the icons indicate, we based the @ap and @rp on the #emph[Rustls] project~@github_rustls. @@ -19,14 +19,14 @@ The @ca is implemented in the programming language Go. We chose to use Rustls for multiple reasons. Firstly, writing a whole @tls implementation ourselves seems overcomplicated for this work and fails to demonstrate that the new @mtc system can be integrated well with existing software. Therefore, we decided to adopt an existing implementation. -Rustls is a comparably modern implementation of the @tls protocol and cleanly implemented. -One reason is that it never supported @tls~1.1 or older, which helps with keeping the code base clean and organized. -Nevertheless, Rustls is a serious project which gained adoption in big production deployments~@lets_encrypt_rustls @rustls_openssl_nginx. +Rustls is a comparably modern implementation of the @tls protocol and is cleanly implemented. +One reason is that it never supported @tls~1.1 or older, which helps keep the code base clean and organized. +Nevertheless, Rustls is a serious project that gained adoption in big production deployments~@lets_encrypt_rustls @rustls_openssl_nginx. Furthermore, Rustls is written in the programming language Rust, which, in contrast to C used in other famous @tls implementations such as OpenSSL, BoringSSL, or wolfSSL, provides memory safety. -Additionally, the strong type system of Rust allows catching possible mistakes in the implementation comparably easy already during compilation. +Additionally, Rust's strong type system makes catching possible mistakes in the implementation comparably easy already during compilation. Moreover, we avoided using the same programming language as for the @ca implementation. -This requires rewriting some common parts, such as parsing the binary certificate format and checking of the signature. -Additionally, it makes sure that neither of the implementations covertly behaves different from expected. +This requires rewriting some common parts, such as parsing the binary certificate format and checking the signature. +It also ensures that neither of the implementations covertly behaves differently from expected. // We started by adding the type definitions required for the certificate type and trust anchor negotiation mechanisms. @@ -35,13 +35,13 @@ Additionally, it makes sure that neither of the implementations covertly behaves // In the Rust ecosystem, libraries are referred to as #emph[crates]. The integration of @mtc into Rustls necessitated numerous modifications. -First, we added the negotiation mechanism for the certificate type, based on RFC~7250~@rfc_raw_public_keys. +First, we added the negotiation mechanism for the certificate type based on RFC~7250~@rfc_raw_public_keys. The negotiation mechanism relies on extensions exchanged during the `ClientHello` and `ServerHello` messages. This adoption entailed several changes to the Rustls code base, as it needs to keep state about which certificate type was negotiated. Previously, Rustls assumed X.509 certificates and related structures like stapled @ocsp responses at various places. -In addition to the negotiation of the certificate type, we implemented the negotiation mechanism for the #glspl("tai", long: true) as described in @sec:negotiation_tls. +In addition to negotiating the certificate type, we implemented the negotiation mechanism for the #glspl("tai", long: true) as described in @sec:negotiation_tls. Therefore, we extended the certificate selection logic to first match on the requested @tai:pl and fall back to the previously used certificate selection based on the @sni. -We simplified the @tai negotiation in that the client does not preselect the @tai:pl it requests in the `ClientHello` based on a @dns query, to simplify the implementation and testing. +We simplified the @tai negotiation so that the client does not preselect the @tai:pl it requests in the `ClientHello` based on a @dns query, simplifying the implementation and testing. // Instead, the client sends all the @tai:pl it supports, which is only a very limited set in our test setup. // In a real deployment, this is not practical due to the possibly large set of supported @tai:pl and fingerprinting possibilities. @@ -55,59 +55,59 @@ For each @tls handshake that negotiated to use @mtc as certificate type, our lib + checks if the recomputed tree head matches the stored, + and checks that the certificate falls in the latest validity window based on the stored @ca parameters and batch number in the certificate. If there are no errors, the certificate validates. -Note that there is no signature validation necessary during the certificate validation, as the @ca signature was checked when loading the tree heads into memory. -This does not mean that there is no signature check happening during the entire handshake. +Note that no signature validation is necessary during the certificate validation, as the @ca signature was checked when loading the tree heads into memory. +This does not mean there is no signature check during the entire handshake. If no optimization such as @kemtls is used, the `CertificateVerify` message still contains a signature over the messages exchanged up to this point. Along the way, we identified some issues in the specification and @ca implementation. // Along the way, the @ca implementation required a few adoptions. -First, we found a mismatch between the test vectors provided in the draft specification due to a 16-bit instead of 8-bit length encoding for @dns names. +First, we found a mismatch between the test vectors provided in the draft specification due to the use of a 16-bit instead of 8-bit length encoding for @dns names. The test vectors served as examples for assertions and abridged assertions for given inputs. We adopted the @ca implementation and standard accordingly~@fix_mtc_length_prefix_1 @fix_mtc_length_prefix_2 @fix_mtc_length_prefix_3. -While we worked on this thesis, the Internet-Draft switched to using #glspl("tai", long: true) for identifying the batches. +While we worked on this thesis, the Internet-Draft switched to using #glspl("tai", long: true) to identify the batches. Before, @mtc contained an Issuer ID as an opaque byte string and a batch number. During this switch, the authors of the proposed standard forgot to update the definitions for the hash nodes of the Merkle Tree; we fixed this inconsistency. Additionally, we removed the batch number from the hash input, as it is included in the newly added #gls("tai")-based `batch_id`. -Moreover, we introduced a more concise naming distinguishing `issuer_id` and `batch_id` to make clear where only the @oid part for the issuer is used and where the batch number is appended to the issuer ID~@fix_consitently_use_tai. +Moreover, we introduced a more concise naming convention distinguishing `issuer_id` and `batch_id` to make it clear where only the @oid part for the issuer is used and where the batch number is appended to the issuer ID~@fix_consitently_use_tai. Lastly, we also adopted the @ca implementation to the @tai:pl~@add_mtc_tai @fix_mtc_tai. -When implementing the parser for @tls `Certificate` message, we noticed that a consistent way of embedding the certificate in the @tls message -- independent of the type -- keeps the parsing logic free of external state. +When implementing the parser for the @tls `Certificate` message, we noticed that a consistent way of embedding the certificate in the @tls message -- independent of the type -- keeps the parsing logic free of external state. Up to that point, the bytes of the @mtc were embedded into the `Certificate` message without a length prefix. -Strictly seen, a length prefix is not necessary if the parser knows to interpret the certificate as @mtc as it contains all length information needed. -However, in practice the parsing happens without knowledge of the negotiated certificate type even though the application as a whole is already aware of the certificate type. +Strictly seen, a length prefix is unnecessary if the parser knows to interpret the certificate as @mtc as it contains all length information needed. +However, in practice, the parsing happens without knowledge of the negotiated certificate type even though the application as a whole is already aware of it. Interpreting the certificate bytes is postponed to a later stage and possibly passed on to an external library such as the Rustls WebPKI library or our @mtc verification library. -Therefore, it is advisable to allow the parser to treat the certificate as opaque bytes with a given length prefix. -The classical X.509 certificates as well as the `RawPublicKey` certificate type from RFC~7250~@rfc_raw_public_keys do already use a 24~bit length prefix. -We streamlined @mtc draft specification to embed the @mtc in a 24-bit length prefixed byte array as well~@add_array_embedding. +Therefore, allowing the parser to treat the certificate as opaque bytes with a given length prefix is advisable. +The classical X.509 certificates and the `RawPublicKey` certificate type from RFC~7250~@rfc_raw_public_keys already use a 24-bit length prefix. +We streamlined the @mtc draft specification to embed the @mtc in a 24-bit length prefixed byte array as well~@add_array_embedding. Moreover, we replaced a pre-standard version of Dilithium with @mldsa in the @ca implementation~@mtc_use_mldsa. This was required to verify the @ca signature in our @mtc verification library. -The @ca implementation used an implementation of the third round of the @nist post quantum signature competition, which has slight incompatibilities with the final specification. -As Rust did not have a library of Dilithium at the same round-three state available, the upgrade to the official @mldsa became necessary, for which libraries in Go and Rust exist. +The @ca implementation used an implementation of the third round of the @nist post-quantum signature competition, which has slight incompatibilities with the final specification. +As Rust did not have a library of Dilithium in the same round-three state available, the upgrade to the official @mldsa became necessary, for which libraries exist in Go and Rust. -Further, we opened a discussion on simplifying the certificate type negotiation, but the proposal turned out not be practical @supersede_certificate_type. +Further, we opened a discussion on simplifying the certificate type negotiation, but the proposal turned out not to be practical~@supersede_certificate_type. The idea was to combine the certificate type negotiation with the negotiation of the trust anchor. As the trust anchors negotiation mechanism works not only for @mtc but also for X.509 and possibly other certificate types, we proposed that the peer contains the selected trust anchor in the @tai extension of the `Certificate` message. -So far, the negotiation mechanism purely indicates that one of the proposed trust anchors was selected, but not which. +So far, the negotiation mechanism merely indicates that one of the proposed trust anchors was selected, but it does not specify which one. By changing this indication to include the selected @tai, the peer could deduce the certificate type and therefore a separate certificate type negotiation would be superfluous. -However, Benjamin identified some issues that might arise from the fact that not all certificates participate in the @tai negotiation mechanism. +However, D. Benjamin identified some issues that might arise from the fact that not all certificates participate in the @tai negotiation mechanism. Therefore, some niche cases are not properly covered. For example, if a server sends a fallback certificate that does not participate in the @tai negotiation, or of which the @tai is unknown to the client, the client does not know what certificate type to expect. As a result, the client cannot parse the certificate even if the client would accept it anyway, such as a widely accepted X.509 certificate. Consequently, we closed this discussion without additional modifications. -Lastly, we also suggested the file structure explained in @sec:file_structure to be added to the Internet-Draft @file_structure. +Lastly, we also suggested adding the file structure explained in @sec:file_structure to the Internet-Draft @file_structure. As explained earlier, we hope to achieve a more uniform @mtc ecosystem from that. -As of writing this, the discussion did not really start on that topic yet, so it is unclear if this proposal will get incorporated by the draft standard. +As of this writing, the discussion has not started on that topic yet, so it is unclear if this proposal will be incorporated into the standard~@file_structure. The development efforts resulted in a successful connection @tls handshake between an example client and server based on our modified Rustls version. -We can use the @ca implementation to create certificates, validity windows, and signatures which we copy manually to the directories as proposed in @sec:file_structure. +We can use the @ca implementation to create certificates, validity windows, and signatures, which we copy manually to the directories as proposed in @sec:file_structure. The server loads the @mtc certificates next to the fallback X.509 and serves them to the client if negotiated. -The client uses our @mtc verifier implementation to read the available batch tree heads from the disk, to validate the signatures, and to validate the @mtc certificates. -@sec:byte_analysis_handshake compares the exchanged handshake messages on a byte level with the text in the Internet-Drafts and illustrates that our implementation adheres to the draft specifications. -This was necessary as our implementation is the first and interoperability tests with other implementations are therefore not possible. -At the same time, we showed interoperability between the existing @ca implementation in Go with our @mtc verifier written in Rust. +The client uses our @mtc verifier implementation to read the available batch tree heads from the disk, validate the signatures, and validate the @mtc certificates. +@sec:byte_analysis_handshake compares the exchanged handshake messages on a byte level with the text in the Internet-Drafts, illustrating that our implementation adheres to the draft specifications. +This was necessary as our implementation is the first, and interoperability tests with other implementations are, therefore, impossible. +At the same time, we showed interoperability between the existing @ca implementation in Go and our @mtc verifier written in Rust. // - Develop certificate verifier in Rust #link("https://github.com/pohlm01/mtc-verifier", "pohlm01/mtc-verifier") diff --git a/6_conclusion.typ b/6_conclusion.typ index 9d1d042..3c8c25b 100644 --- a/6_conclusion.typ +++ b/6_conclusion.typ @@ -1,36 +1,55 @@ #import "imports.typ": * -= Conclusion and Outlook += Conclusion and Outlook Constant achievements in building quantum computers endanger many asymmetric cryptography systems used today. This includes the signatures used in X.509-based certificates, which are used for server identification in @tls connections. -Replacing all signatures in the X.509-based architecture with #gls("pq")-secure signature schemes results in a big expansion of the certificate sizes. -This results in slower connections and more data to be transferred for each @tls handshake. +Replacing all signatures in the X.509-based architecture with #gls("pq")-secure signature schemes results in a large expansion of the certificate sizes. +This ultimately results in slower connections and more data to be transferred for each @tls handshake. -#cite(, form: "author") propose #glspl("mtc", long: true) to supplement the current X.509 architecture which reduces the number of signatures to shrink the size of certificates. +#cite(, form: "author") propose #glspl("mtc", long: true) to supplement the current X.509 architecture, which reduces the number of signatures to shrink the size of certificates. As a trade, certificates cannot be used immediately and the @mtc architecture requires a regular update channel between the Transparency Service and the #gls("rp", long: true). -In this thesis, we analyzed theoretical improvements in terms of data transmission and computational effort when introducing @mtc and implemented a client as well as a server that use @mtc:pl to prove the server identity. -We showed that @mtc likely saves about 74~% to 80~% of the bytes related to the cryptographic server authentication compared to X.509 certificates when using #gls("pq")-secure signature schemes. -The actual improvement is even significant, as @mtc:pl use more efficient encoding and require less additional attributes in the certificate, such as not before / not after timestamps or @crl and @ocsp endpoints. +In this thesis, we analyzed theoretical improvements in data transmission when introducing @mtc. +// and implemented a client as well as a server that use @mtc:pl to prove the server identity. +We showed that @mtc saves about 74~% to 80~% of the bytes related to the cryptographic server authentication compared to X.509 certificates when using #gls("pq")-secure signature schemes. +The actual improvement is even more significant, as @mtc:pl use more efficient encoding and require fewer additional attributes in the certificate, such as not before / not after timestamps or @crl and @ocsp endpoints. In favor of small certificates, the @mtc architecture introduces an update mechanism between the Transparency Service and the @rp. We listed three update scenarios with either 150 or 15~@ca:pl and argued that the new update mechanism does not harm the @mtc architecture too much. -Firstly, we think that 15 @mtc @ca:pl is a realistic estimate based on that @mtc are not meant as a replacement but an optional optimization of the current Web@pki. -Therefore, many @ca:pl will refrain from implementing these significant changes into their operation as they mainly serve small use-cases which do not amortize the effort to adopt @mtc. -The second argument we made is that the Transparency Services are likely operated by the browser vendors, which a user must unavoidable trust anyway. -Therefore, the @ca signatures can be checked by the Transparency Service in most cases and therefore save a lot of update bandwidth. +Firstly, we think that 15 @mtc @ca:pl is a realistic estimate based on the fact that @mtc:pl are not meant as a replacement but an optional optimization of the current Web@pki. +Therefore, many @ca:pl will refrain from implementing these significant changes into their operation as many of them serve small use cases that do not amortize the effort to adopt @mtc. +The second argument we made is that the Transparency Services are likely operated by the browser vendors, which a user must unavoidably trust anyway. +Therefore, the Transparency Service can usually check the @ca signatures, ultimately saving a lot of update bandwidth. This results in about 12~kB update bandwidth per day and @rp. -Compared to about 900~kB to 1,300~kB per day for application updates in Chrome and Firefox, this is only a small addition. -Additionally, a single @tls handshake with @mtc instead of a @pq X.509 certificate chain amortizes the daily updates. -Other scenarios require a bigger update bandwidth, but we expect them to be relevant for only few instances (client signature checks), or far in the future if at all (150~@ca:pl). +This is only a small addition compared to the about 900~kB to 1,300~kB per day for application updates in Chrome and Firefox. +A single @tls handshake with @mtc instead of a @pq X.509 certificate chain amortizes the daily updates. +Other scenarios require a bigger update bandwidth, but we expect them to be relevant for only a few instances (client that perform signature checks), or far in the future if at all (150~@ca:pl). In addition to the size analysis, we estimated the computational cost associated with X.509 and @mtc. -We pointed out that there is no difference for a server, but clients can save about 81~% to 93~% in computational cost per handshake when using classical signature algorithms and about 73~% to 85~% when using #gls("pq")-save signature algorithms. +We pointed out that there is no difference for a server. +Still, clients can save about 81~% to 93~% in computational cost per handshake when using classical signature algorithms and about 73~% to 85~% when using #gls("pq")-save signature algorithms. +This is mainly because clients have to perform way fewer signature verifications, which are computationally expensive. +Instead, clients have to perform hash operations to rebuild the Merkle Tree. +Because hash operations are much more lightweight than signature verifications, the client saves computational resources, which in turn helps with a longer battery life or frees up resources for other tasks. + +To explore the practicality of the @mtc architecture, we adopted the @tls library #emph[Rustls] to support @mtc:pl. +This included the negotiation mechanisms for the certificate type and the specific trust anchor, i.e., the specific @mtc batch. +Additionally, we developed a library that validates @mtc:pl and integrated this into Rustls. +We successfully performed a handshake between the modified client and server and analyzed it on a byte level to conform with the specification. + +Overall, our work showed that the @mtc architecture has the potential to mitigate or even overcompensate the performance penalty associated with introducing @pq secure algorithms for server identification. +Nevertheless, there are still some open points that should be investigated further. +One point could be to investigate the memory usage associated with the usage of the X.509 and @mtc architectures. +Another challenging task is to design an update mechanism that safely transfers the batch tree heads from the Transparency Service to the @rp. +To be practical, it must be reasonably small, i.e., the overhead to create a secure channel must not be too big. +At the same time, this update protocol must be secured against quantum computers to create an end-to-end secure architecture. +Lastly, it must not solely rely on @mtc:pl, as it is designed to bootstrap @mtc. + +Finally, it is up to the big companies to run real-world experiments and use their telemetry collection mechanism to gather information that shows how the mostly theoretical numbers from this work translate to big deployments. +From what I perceived from the community, I expect this will happen in 2025. -To explore the practicality of the @mtc architecture, we adopted Rustls to support @mtc:pl. -This // - Problem statement (the bigger picture) // - Quantum computers endanger current server identity validation in TLS @@ -53,19 +72,20 @@ This // - PQ is better than ECDSA anyway (Better than RSA for the server) // - MTC is even better -- Common file structure -- Implemented - - Changes in Rustls - - Negotiation - - MTC verifier -- Various changes to the I-D +// - Implemented +// - Changes in Rustls +// - Negotiation +// - MTC verifier -- Figure out a good update mechanism - - Must be PQ save - - Must be reasonably small - - Must work without MTC -- Analyze memory usage -- Use MTC in a larger real-world experiment - - Collect telemetry data -- My personal expectation: There will be bigger tests within the Google ecosystem in 2025 \ No newline at end of file +// - Figure out a good update mechanism +// - Must be PQ save +// - Must be reasonably small +// - Must work without MTC +// - Analyze memory usage +// - Use MTC in a larger real-world experiment +// - Collect telemetry data +// - My personal expectation: There will be bigger tests within the Google ecosystem in 2025 + +// - Various changes to the I-D +// - Common file structure diff --git a/main.typ b/main.typ index 66b075b..2aee427 100644 --- a/main.typ +++ b/main.typ @@ -2,7 +2,7 @@ #import "imports.typ": * -#import "style/radboud_cover.typ": title_page +#import "style/radboud_cover.typ": * #import "style/ru_template.typ": report, appendix #import "style/todo.typ": outline-todos #import "A_abbreviations.typ": abbreviations @@ -36,13 +36,11 @@ #set document(title: [#title - #subtitle], author: author) -// #outline-todos() - #show: make-glossary -#show: doc => report(table_of_contents: true, doc) - #register-glossary(abbreviations) +#show: doc => report(doc, table_of_contents: true, abstract: [#include "0_abstract.typ"]) + // #word-count(total => [ #include "1_introduction.typ" // #total.words words in total]) diff --git a/style/ru_template.typ b/style/ru_template.typ index c8b5b4c..44dbb23 100644 --- a/style/ru_template.typ +++ b/style/ru_template.typ @@ -1,4 +1,4 @@ -#let report(doc, table_of_contents: true) = { +#let report(doc, table_of_contents: true, abstract: none) = { set text(font: "New Computer Modern") show link: set text(fill: rgb(0, 0, 180)) @@ -67,6 +67,15 @@ ) } + if abstract != none { + set page(margin: 6cm) + align(horizon, box[ + #align(center, strong[Abstract]) + #v(1em) + #abstract + ]) + } + if table_of_contents { set par(spacing: 0.0em) @@ -78,7 +87,8 @@ counter(page).update(0) set page( numbering: "1", - margin: (x: 8em, top:10em, bottom: 14em)) + margin: (x: 8em, top:10em, bottom: 14em) + ) doc }