Skip to content

Commit

Permalink
LibWeb: Implement TextEncoderStream
Browse files Browse the repository at this point in the history
Required by the server-side rendering mode of React Router, used by
https://chatgpt.com/

Note that the imported tests do not have the worker variants to prevent
freezing on macOS.
  • Loading branch information
Lubrsi authored and trflynn89 committed Feb 7, 2025
1 parent 24d5f24 commit cae0ee6
Show file tree
Hide file tree
Showing 36 changed files with 1,375 additions and 0 deletions.
1 change: 1 addition & 0 deletions Libraries/LibWeb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ set(SOURCES
Encoding/TextDecoder.cpp
Encoding/TextEncoder.cpp
Encoding/TextEncoderCommon.cpp
Encoding/TextEncoderStream.cpp
EntriesAPI/FileSystemEntry.cpp
EventTiming/PerformanceEventTiming.cpp
Fetch/Body.cpp
Expand Down
215 changes: 215 additions & 0 deletions Libraries/LibWeb/Encoding/TextEncoderStream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <AK/UnicodeUtils.h>
#include <LibJS/Runtime/ArrayBuffer.h>
#include <LibJS/Runtime/Realm.h>
#include <LibJS/Runtime/TypedArray.h>
#include <LibWeb/Bindings/ExceptionOrUtils.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/Bindings/TextEncoderStreamPrototype.h>
#include <LibWeb/Encoding/TextEncoderStream.h>
#include <LibWeb/Streams/AbstractOperations.h>
#include <LibWeb/Streams/TransformStream.h>
#include <LibWeb/WebIDL/Promise.h>

namespace Web::Encoding {

GC_DEFINE_ALLOCATOR(TextEncoderStream);

// https://encoding.spec.whatwg.org/#dom-textencoderstream
WebIDL::ExceptionOr<GC::Ref<TextEncoderStream>> TextEncoderStream::construct_impl(JS::Realm& realm)
{
// 1. Set this’s encoder to an instance of the UTF-8 encoder.
// NOTE: No-op, as AK::String is already in UTF-8 format.

// NOTE: We do these steps first so that we may store it as nonnull in the GenericTransformStream.
// 4. Let transformStream be a new TransformStream.
auto transform_stream = realm.create<Streams::TransformStream>(realm);

// 6. Set this's transform to a new TransformStream.
auto stream = realm.create<TextEncoderStream>(realm, transform_stream);

// 2. Let transformAlgorithm be an algorithm which takes a chunk argument and runs the encode and enqueue a chunk
// algorithm with this and chunk.
auto transform_algorithm = GC::create_function(realm.heap(), [stream](JS::Value chunk) -> GC::Ref<WebIDL::Promise> {
auto& realm = stream->realm();
auto& vm = realm.vm();

if (auto result = stream->encode_and_enqueue_chunk(chunk); result.is_error()) {
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value());
}

return WebIDL::create_resolved_promise(realm, JS::js_undefined());
});

// 3. Let flushAlgorithm be an algorithm which runs the encode and flush algorithm with this.
auto flush_algorithm = GC::create_function(realm.heap(), [stream]() -> GC::Ref<WebIDL::Promise> {
auto& realm = stream->realm();
auto& vm = realm.vm();

if (auto result = stream->encode_and_flush(); result.is_error()) {
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value());
}

return WebIDL::create_resolved_promise(realm, JS::js_undefined());
});

// 5. Set up transformStream with transformAlgorithm set to transformAlgorithm and flushAlgorithm set to flushAlgorithm.
transform_stream->set_up(transform_algorithm, flush_algorithm);

return stream;
}

TextEncoderStream::TextEncoderStream(JS::Realm& realm, GC::Ref<Streams::TransformStream> transform)
: Bindings::PlatformObject(realm)
, Streams::GenericTransformStreamMixin(transform)
{
}

TextEncoderStream::~TextEncoderStream() = default;

void TextEncoderStream::initialize(JS::Realm& realm)
{
Base::initialize(realm);
WEB_SET_PROTOTYPE_FOR_INTERFACE(TextEncoderStream);
}

void TextEncoderStream::visit_edges(JS::Cell::Visitor& visitor)
{
Base::visit_edges(visitor);
Streams::GenericTransformStreamMixin::visit_edges(visitor);
}

// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_enqueue_chunk(JS::Value chunk)
{
// Spec Note: This is equivalent to the "convert a string into a scalar value string" algorithm from the Infra
// Standard, but allows for surrogate pairs that are split between strings. [INFRA]

auto& realm = this->realm();
auto& vm = this->vm();

// 1. Let input be the result of converting chunk to a DOMString.
auto input = TRY(chunk.to_string(vm));

// 2. Convert input to an I/O queue of code units.
// Spec Note: DOMString, as well as an I/O queue of code units rather than scalar values, are used here so that a
// surrogate pair that is split between chunks can be reassembled into the appropriate scalar value.
// The behavior is otherwise identical to USVString. In particular, lone surrogates will be replaced
// with U+FFFD.
auto code_points = input.code_points();
auto it = code_points.begin();

// 3. Let output be the I/O queue of bytes « end-of-queue ».
ByteBuffer output;

// 4. While true:
while (true) {
// 2. If item is end-of-queue, then:
// NOTE: This is done out-of-order so that we're not dereferencing a code point iterator that points to the end.
if (it.done()) {
// 1. Convert output into a byte sequence.
// Note: No-op.

// 2. If output is non-empty, then:
if (!output.is_empty()) {
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing output.
auto array_buffer = JS::ArrayBuffer::create(realm, move(output));
auto array = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer);

// 2. Enqueue chunk into encoder’s transform.
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), array));
}

// 3. Return.
return {};
}

// 1. Let item be the result of reading from input.
auto item = *it;

// 3. Let result be the result of executing the convert code unit to scalar value algorithm with encoder, item and input.
auto result = convert_code_unit_to_scalar_value(item, it);

// 4. If result is not continue, then process an item with result, encoder’s encoder, input, output, and "fatal".
if (result.has_value()) {
(void)AK::UnicodeUtils::code_point_to_utf8(result.value(), [&output](char utf8_byte) {
output.append(static_cast<u8>(utf8_byte));
});
}
}
}

// https://encoding.spec.whatwg.org/#encode-and-flush
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_flush()
{
auto& realm = this->realm();

// 1. If encoder’s leading surrogate is non-null, then:
if (m_leading_surrogate.has_value()) {
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing 0xEF 0xBF 0xBD.
// Spec Note: This is U+FFFD (�) in UTF-8 bytes.
constexpr static u8 replacement_character_utf8_bytes[3] = { 0xEF, 0xBF, 0xBD };
auto bytes = MUST(ByteBuffer::copy(replacement_character_utf8_bytes, sizeof(replacement_character_utf8_bytes)));
auto array_buffer = JS::ArrayBuffer::create(realm, bytes);
auto chunk = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer);

// 2. Enqueue chunk into encoder’s transform.
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), chunk));
}

return {};
}

// https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value
Optional<u32> TextEncoderStream::convert_code_unit_to_scalar_value(u32 item, Utf8CodePointIterator& code_point_iterator)
{
ArmedScopeGuard move_to_next_code_point_guard = [&] {
++code_point_iterator;
};

// 1. If encoder’s leading surrogate is non-null, then:
if (m_leading_surrogate.has_value()) {
// 1. Let leadingSurrogate be encoder’s leading surrogate.
auto leading_surrogate = m_leading_surrogate.value();

// 2. Set encoder’s leading surrogate to null.
m_leading_surrogate.clear();

// 3. If item is a trailing surrogate, then return a scalar value from surrogates given leadingSurrogate
// and item.
if (Utf16View::is_low_surrogate(item)) {
// https://encoding.spec.whatwg.org/#scalar-value-from-surrogates
// To obtain a scalar value from surrogates, given a leading surrogate leading and a trailing surrogate
// trailing, return 0x10000 + ((leading − 0xD800) << 10) + (trailing − 0xDC00).
return Utf16View::decode_surrogate_pair(leading_surrogate, item);
}

// 4. Restore item to input.
move_to_next_code_point_guard.disarm();

// 5. Return U+FFFD.
return 0xFFFD;
}

// 2. If item is a leading surrogate, then set encoder’s leading surrogate to item and return continue.
if (Utf16View::is_high_surrogate(item)) {
m_leading_surrogate = item;
return OptionalNone {};
}

// 3. If item is a trailing surrogate, then return U+FFFD.
if (Utf16View::is_low_surrogate(item))
return 0xFFFD;

// 4. Return item.
return item;
}

}
41 changes: 41 additions & 0 deletions Libraries/LibWeb/Encoding/TextEncoderStream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include <LibWeb/Bindings/PlatformObject.h>
#include <LibWeb/Encoding/TextEncoderCommon.h>
#include <LibWeb/Streams/GenericTransformStream.h>

namespace Web::Encoding {

class TextEncoderStream final
: public Bindings::PlatformObject
, public Streams::GenericTransformStreamMixin
, public TextEncoderCommonMixin {
WEB_PLATFORM_OBJECT(TextEncoderStream, Bindings::PlatformObject);
GC_DECLARE_ALLOCATOR(TextEncoderStream);

public:
static WebIDL::ExceptionOr<GC::Ref<TextEncoderStream>> construct_impl(JS::Realm&);
virtual ~TextEncoderStream() override;

private:
TextEncoderStream(JS::Realm&, GC::Ref<Streams::TransformStream>);

virtual void initialize(JS::Realm&) override;
virtual void visit_edges(Cell::Visitor&) override;

WebIDL::ExceptionOr<void> encode_and_enqueue_chunk(JS::Value);
WebIDL::ExceptionOr<void> encode_and_flush();

Optional<u32> convert_code_unit_to_scalar_value(u32 item, Utf8CodePointIterator& code_point_iterator);

// https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate
Optional<u32> m_leading_surrogate;
};

}
11 changes: 11 additions & 0 deletions Libraries/LibWeb/Encoding/TextEncoderStream.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#import <Encoding/TextEncoder.idl>
#import <Streams/GenericTransformStream.idl>

// https://encoding.spec.whatwg.org/#textencoderstream
[Exposed=*]
interface TextEncoderStream {
constructor();
};

TextEncoderStream includes TextEncoderCommon;
TextEncoderStream includes GenericTransformStream;
1 change: 1 addition & 0 deletions Libraries/LibWeb/Forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ class XMLSerializer;
namespace Web::Encoding {
class TextDecoder;
class TextEncoder;
class TextEncoderStream;

struct TextDecodeOptions;
struct TextDecoderOptions;
Expand Down
1 change: 1 addition & 0 deletions Libraries/LibWeb/idl_files.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ libweb_js_bindings(DOMURL/DOMURL)
libweb_js_bindings(DOMURL/URLSearchParams ITERABLE)
libweb_js_bindings(Encoding/TextDecoder)
libweb_js_bindings(Encoding/TextEncoder)
libweb_js_bindings(Encoding/TextEncoderStream)
libweb_js_bindings(EntriesAPI/FileSystemEntry)
libweb_js_bindings(EventTiming/PerformanceEventTiming)
libweb_js_bindings(Fetch/Headers ITERABLE)
Expand Down
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/all-window-properties.txt
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ SyntaxError
Text
TextDecoder
TextEncoder
TextEncoderStream
TextEvent
TextMetrics
TextTrack
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Harness status: Error

Found 0 tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Harness status: Error

Found 0 tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Harness status: OK

Found 4 tests

2 Pass
2 Fail
Fail write() should not complete until read relieves backpressure for TextDecoderStream
Fail additional writes should wait for backpressure to be relieved for class TextDecoderStream
Pass write() should not complete until read relieves backpressure for TextEncoderStream
Pass additional writes should wait for backpressure to be relieved for class TextEncoderStream
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Harness status: Error

Found 6 tests

1 Pass
5 Fail
Pass a chunk that cannot be converted to a string should error the streams
Fail input of type undefined should be converted correctly to string
Fail input of type null should be converted correctly to string
Fail input of type numeric should be converted correctly to string
Fail input of type object should be converted correctly to string
Fail input of type array should be converted correctly to string
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Harness status: OK

Found 19 tests

19 Pass
Pass encoding one string of UTF-8 should give one complete chunk
Pass a character split between chunks should be correctly encoded
Pass a character following one split between chunks should be correctly encoded
Pass two consecutive astral characters each split down the middle should be correctly reassembled
Pass two consecutive astral characters each split down the middle with an invalid surrogate in the middle should be correctly encoded
Pass a stream ending in a leading surrogate should emit a replacement character as a final chunk
Pass an unmatched surrogate at the end of a chunk followed by an astral character in the next chunk should be replaced with the replacement character at the start of the next output chunk
Pass an unmatched surrogate at the end of a chunk followed by an ascii character in the next chunk should be replaced with the replacement character at the start of the next output chunk
Pass an unmatched surrogate at the end of a chunk followed by a plane 1 character split into two chunks should result in the encoded plane 1 character appearing in the last output chunk
Pass two leading chunks should result in two replacement characters
Pass a non-terminal unpaired leading surrogate should immediately be replaced
Pass a terminal unpaired trailing surrogate should immediately be replaced
Pass a leading surrogate chunk should be carried past empty chunks
Pass a leading surrogate chunk should error when it is clear it didn't form a pair
Pass an empty string should result in no output chunk
Pass a leading empty chunk should be ignored
Pass a trailing empty chunk should be ignored
Pass a plain ASCII chunk should be converted
Pass characters in the ISO-8859-1 range should be encoded correctly
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Harness status: OK

Found 4 tests

2 Pass
2 Fail
Fail TextDecoderStream: write in detached realm should succeed
Pass TextEncoderStream: write in detached realm should succeed
Pass TextEncoderStream: close in detached realm should succeed
Fail TextDecoderStream: close in detached realm should succeed
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Harness status: Error

Found 0 tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Harness status: Error

Found 0 tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Harness status: OK

Found 2 tests

1 Pass
1 Fail
Pass TextEncoderStream readable and writable properties must pass brand checks
Fail TextDecoderStream readable and writable properties must pass brand checks
Loading

0 comments on commit cae0ee6

Please sign in to comment.