forked from LadybirdBrowser/ladybird
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Required by the server-side rendering mode of React Router, used by https://chatgpt.com/ Note that the imported tests do not have the worker variants to prevent freezing on macOS.
- Loading branch information
Showing
36 changed files
with
1,375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
/* | ||
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org> | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause | ||
*/ | ||
|
||
#include <AK/UnicodeUtils.h> | ||
#include <LibJS/Runtime/ArrayBuffer.h> | ||
#include <LibJS/Runtime/Realm.h> | ||
#include <LibJS/Runtime/TypedArray.h> | ||
#include <LibWeb/Bindings/ExceptionOrUtils.h> | ||
#include <LibWeb/Bindings/Intrinsics.h> | ||
#include <LibWeb/Bindings/TextEncoderStreamPrototype.h> | ||
#include <LibWeb/Encoding/TextEncoderStream.h> | ||
#include <LibWeb/Streams/AbstractOperations.h> | ||
#include <LibWeb/Streams/TransformStream.h> | ||
#include <LibWeb/WebIDL/Promise.h> | ||
|
||
namespace Web::Encoding { | ||
|
||
GC_DEFINE_ALLOCATOR(TextEncoderStream); | ||
|
||
// https://encoding.spec.whatwg.org/#dom-textencoderstream | ||
WebIDL::ExceptionOr<GC::Ref<TextEncoderStream>> TextEncoderStream::construct_impl(JS::Realm& realm) | ||
{ | ||
// 1. Set this’s encoder to an instance of the UTF-8 encoder. | ||
// NOTE: No-op, as AK::String is already in UTF-8 format. | ||
|
||
// NOTE: We do these steps first so that we may store it as nonnull in the GenericTransformStream. | ||
// 4. Let transformStream be a new TransformStream. | ||
auto transform_stream = realm.create<Streams::TransformStream>(realm); | ||
|
||
// 6. Set this's transform to a new TransformStream. | ||
auto stream = realm.create<TextEncoderStream>(realm, transform_stream); | ||
|
||
// 2. Let transformAlgorithm be an algorithm which takes a chunk argument and runs the encode and enqueue a chunk | ||
// algorithm with this and chunk. | ||
auto transform_algorithm = GC::create_function(realm.heap(), [stream](JS::Value chunk) -> GC::Ref<WebIDL::Promise> { | ||
auto& realm = stream->realm(); | ||
auto& vm = realm.vm(); | ||
|
||
if (auto result = stream->encode_and_enqueue_chunk(chunk); result.is_error()) { | ||
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception()); | ||
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value()); | ||
} | ||
|
||
return WebIDL::create_resolved_promise(realm, JS::js_undefined()); | ||
}); | ||
|
||
// 3. Let flushAlgorithm be an algorithm which runs the encode and flush algorithm with this. | ||
auto flush_algorithm = GC::create_function(realm.heap(), [stream]() -> GC::Ref<WebIDL::Promise> { | ||
auto& realm = stream->realm(); | ||
auto& vm = realm.vm(); | ||
|
||
if (auto result = stream->encode_and_flush(); result.is_error()) { | ||
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception()); | ||
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value()); | ||
} | ||
|
||
return WebIDL::create_resolved_promise(realm, JS::js_undefined()); | ||
}); | ||
|
||
// 5. Set up transformStream with transformAlgorithm set to transformAlgorithm and flushAlgorithm set to flushAlgorithm. | ||
transform_stream->set_up(transform_algorithm, flush_algorithm); | ||
|
||
return stream; | ||
} | ||
|
||
TextEncoderStream::TextEncoderStream(JS::Realm& realm, GC::Ref<Streams::TransformStream> transform) | ||
: Bindings::PlatformObject(realm) | ||
, Streams::GenericTransformStreamMixin(transform) | ||
{ | ||
} | ||
|
||
TextEncoderStream::~TextEncoderStream() = default; | ||
|
||
void TextEncoderStream::initialize(JS::Realm& realm) | ||
{ | ||
Base::initialize(realm); | ||
WEB_SET_PROTOTYPE_FOR_INTERFACE(TextEncoderStream); | ||
} | ||
|
||
void TextEncoderStream::visit_edges(JS::Cell::Visitor& visitor) | ||
{ | ||
Base::visit_edges(visitor); | ||
Streams::GenericTransformStreamMixin::visit_edges(visitor); | ||
} | ||
|
||
// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk | ||
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_enqueue_chunk(JS::Value chunk) | ||
{ | ||
// Spec Note: This is equivalent to the "convert a string into a scalar value string" algorithm from the Infra | ||
// Standard, but allows for surrogate pairs that are split between strings. [INFRA] | ||
|
||
auto& realm = this->realm(); | ||
auto& vm = this->vm(); | ||
|
||
// 1. Let input be the result of converting chunk to a DOMString. | ||
auto input = TRY(chunk.to_string(vm)); | ||
|
||
// 2. Convert input to an I/O queue of code units. | ||
// Spec Note: DOMString, as well as an I/O queue of code units rather than scalar values, are used here so that a | ||
// surrogate pair that is split between chunks can be reassembled into the appropriate scalar value. | ||
// The behavior is otherwise identical to USVString. In particular, lone surrogates will be replaced | ||
// with U+FFFD. | ||
auto code_points = input.code_points(); | ||
auto it = code_points.begin(); | ||
|
||
// 3. Let output be the I/O queue of bytes « end-of-queue ». | ||
ByteBuffer output; | ||
|
||
// 4. While true: | ||
while (true) { | ||
// 2. If item is end-of-queue, then: | ||
// NOTE: This is done out-of-order so that we're not dereferencing a code point iterator that points to the end. | ||
if (it.done()) { | ||
// 1. Convert output into a byte sequence. | ||
// Note: No-op. | ||
|
||
// 2. If output is non-empty, then: | ||
if (!output.is_empty()) { | ||
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing output. | ||
auto array_buffer = JS::ArrayBuffer::create(realm, move(output)); | ||
auto array = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer); | ||
|
||
// 2. Enqueue chunk into encoder’s transform. | ||
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), array)); | ||
} | ||
|
||
// 3. Return. | ||
return {}; | ||
} | ||
|
||
// 1. Let item be the result of reading from input. | ||
auto item = *it; | ||
|
||
// 3. Let result be the result of executing the convert code unit to scalar value algorithm with encoder, item and input. | ||
auto result = convert_code_unit_to_scalar_value(item, it); | ||
|
||
// 4. If result is not continue, then process an item with result, encoder’s encoder, input, output, and "fatal". | ||
if (result.has_value()) { | ||
(void)AK::UnicodeUtils::code_point_to_utf8(result.value(), [&output](char utf8_byte) { | ||
output.append(static_cast<u8>(utf8_byte)); | ||
}); | ||
} | ||
} | ||
} | ||
|
||
// https://encoding.spec.whatwg.org/#encode-and-flush | ||
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_flush() | ||
{ | ||
auto& realm = this->realm(); | ||
|
||
// 1. If encoder’s leading surrogate is non-null, then: | ||
if (m_leading_surrogate.has_value()) { | ||
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing 0xEF 0xBF 0xBD. | ||
// Spec Note: This is U+FFFD (�) in UTF-8 bytes. | ||
constexpr static u8 replacement_character_utf8_bytes[3] = { 0xEF, 0xBF, 0xBD }; | ||
auto bytes = MUST(ByteBuffer::copy(replacement_character_utf8_bytes, sizeof(replacement_character_utf8_bytes))); | ||
auto array_buffer = JS::ArrayBuffer::create(realm, bytes); | ||
auto chunk = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer); | ||
|
||
// 2. Enqueue chunk into encoder’s transform. | ||
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), chunk)); | ||
} | ||
|
||
return {}; | ||
} | ||
|
||
// https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value | ||
Optional<u32> TextEncoderStream::convert_code_unit_to_scalar_value(u32 item, Utf8CodePointIterator& code_point_iterator) | ||
{ | ||
ArmedScopeGuard move_to_next_code_point_guard = [&] { | ||
++code_point_iterator; | ||
}; | ||
|
||
// 1. If encoder’s leading surrogate is non-null, then: | ||
if (m_leading_surrogate.has_value()) { | ||
// 1. Let leadingSurrogate be encoder’s leading surrogate. | ||
auto leading_surrogate = m_leading_surrogate.value(); | ||
|
||
// 2. Set encoder’s leading surrogate to null. | ||
m_leading_surrogate.clear(); | ||
|
||
// 3. If item is a trailing surrogate, then return a scalar value from surrogates given leadingSurrogate | ||
// and item. | ||
if (Utf16View::is_low_surrogate(item)) { | ||
// https://encoding.spec.whatwg.org/#scalar-value-from-surrogates | ||
// To obtain a scalar value from surrogates, given a leading surrogate leading and a trailing surrogate | ||
// trailing, return 0x10000 + ((leading − 0xD800) << 10) + (trailing − 0xDC00). | ||
return Utf16View::decode_surrogate_pair(leading_surrogate, item); | ||
} | ||
|
||
// 4. Restore item to input. | ||
move_to_next_code_point_guard.disarm(); | ||
|
||
// 5. Return U+FFFD. | ||
return 0xFFFD; | ||
} | ||
|
||
// 2. If item is a leading surrogate, then set encoder’s leading surrogate to item and return continue. | ||
if (Utf16View::is_high_surrogate(item)) { | ||
m_leading_surrogate = item; | ||
return OptionalNone {}; | ||
} | ||
|
||
// 3. If item is a trailing surrogate, then return U+FFFD. | ||
if (Utf16View::is_low_surrogate(item)) | ||
return 0xFFFD; | ||
|
||
// 4. Return item. | ||
return item; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org> | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <LibWeb/Bindings/PlatformObject.h> | ||
#include <LibWeb/Encoding/TextEncoderCommon.h> | ||
#include <LibWeb/Streams/GenericTransformStream.h> | ||
|
||
namespace Web::Encoding { | ||
|
||
class TextEncoderStream final | ||
: public Bindings::PlatformObject | ||
, public Streams::GenericTransformStreamMixin | ||
, public TextEncoderCommonMixin { | ||
WEB_PLATFORM_OBJECT(TextEncoderStream, Bindings::PlatformObject); | ||
GC_DECLARE_ALLOCATOR(TextEncoderStream); | ||
|
||
public: | ||
static WebIDL::ExceptionOr<GC::Ref<TextEncoderStream>> construct_impl(JS::Realm&); | ||
virtual ~TextEncoderStream() override; | ||
|
||
private: | ||
TextEncoderStream(JS::Realm&, GC::Ref<Streams::TransformStream>); | ||
|
||
virtual void initialize(JS::Realm&) override; | ||
virtual void visit_edges(Cell::Visitor&) override; | ||
|
||
WebIDL::ExceptionOr<void> encode_and_enqueue_chunk(JS::Value); | ||
WebIDL::ExceptionOr<void> encode_and_flush(); | ||
|
||
Optional<u32> convert_code_unit_to_scalar_value(u32 item, Utf8CodePointIterator& code_point_iterator); | ||
|
||
// https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate | ||
Optional<u32> m_leading_surrogate; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#import <Encoding/TextEncoder.idl> | ||
#import <Streams/GenericTransformStream.idl> | ||
|
||
// https://encoding.spec.whatwg.org/#textencoderstream | ||
[Exposed=*] | ||
interface TextEncoderStream { | ||
constructor(); | ||
}; | ||
|
||
TextEncoderStream includes TextEncoderCommon; | ||
TextEncoderStream includes GenericTransformStream; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -381,6 +381,7 @@ SyntaxError | |
Text | ||
TextDecoder | ||
TextEncoder | ||
TextEncoderStream | ||
TextEvent | ||
TextMetrics | ||
TextTrack | ||
|
3 changes: 3 additions & 0 deletions
3
...Text/expected/wpt-import/encoding/streams/backpressure.any.shadowrealm-in-shadowrealm.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Harness status: Error | ||
|
||
Found 0 tests |
3 changes: 3 additions & 0 deletions
3
...bWeb/Text/expected/wpt-import/encoding/streams/backpressure.any.shadowrealm-in-window.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Harness status: Error | ||
|
||
Found 0 tests |
10 changes: 10 additions & 0 deletions
10
Tests/LibWeb/Text/expected/wpt-import/encoding/streams/backpressure.any.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Harness status: OK | ||
|
||
Found 4 tests | ||
|
||
2 Pass | ||
2 Fail | ||
Fail write() should not complete until read relieves backpressure for TextDecoderStream | ||
Fail additional writes should wait for backpressure to be relieved for class TextDecoderStream | ||
Pass write() should not complete until read relieves backpressure for TextEncoderStream | ||
Pass additional writes should wait for backpressure to be relieved for class TextEncoderStream |
12 changes: 12 additions & 0 deletions
12
Tests/LibWeb/Text/expected/wpt-import/encoding/streams/encode-bad-chunks.any.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Harness status: Error | ||
|
||
Found 6 tests | ||
|
||
1 Pass | ||
5 Fail | ||
Pass a chunk that cannot be converted to a string should error the streams | ||
Fail input of type undefined should be converted correctly to string | ||
Fail input of type null should be converted correctly to string | ||
Fail input of type numeric should be converted correctly to string | ||
Fail input of type object should be converted correctly to string | ||
Fail input of type array should be converted correctly to string |
24 changes: 24 additions & 0 deletions
24
Tests/LibWeb/Text/expected/wpt-import/encoding/streams/encode-utf8.any.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Harness status: OK | ||
|
||
Found 19 tests | ||
|
||
19 Pass | ||
Pass encoding one string of UTF-8 should give one complete chunk | ||
Pass a character split between chunks should be correctly encoded | ||
Pass a character following one split between chunks should be correctly encoded | ||
Pass two consecutive astral characters each split down the middle should be correctly reassembled | ||
Pass two consecutive astral characters each split down the middle with an invalid surrogate in the middle should be correctly encoded | ||
Pass a stream ending in a leading surrogate should emit a replacement character as a final chunk | ||
Pass an unmatched surrogate at the end of a chunk followed by an astral character in the next chunk should be replaced with the replacement character at the start of the next output chunk | ||
Pass an unmatched surrogate at the end of a chunk followed by an ascii character in the next chunk should be replaced with the replacement character at the start of the next output chunk | ||
Pass an unmatched surrogate at the end of a chunk followed by a plane 1 character split into two chunks should result in the encoded plane 1 character appearing in the last output chunk | ||
Pass two leading chunks should result in two replacement characters | ||
Pass a non-terminal unpaired leading surrogate should immediately be replaced | ||
Pass a terminal unpaired trailing surrogate should immediately be replaced | ||
Pass a leading surrogate chunk should be carried past empty chunks | ||
Pass a leading surrogate chunk should error when it is clear it didn't form a pair | ||
Pass an empty string should result in no output chunk | ||
Pass a leading empty chunk should be ignored | ||
Pass a trailing empty chunk should be ignored | ||
Pass a plain ASCII chunk should be converted | ||
Pass characters in the ISO-8859-1 range should be encoded correctly |
10 changes: 10 additions & 0 deletions
10
Tests/LibWeb/Text/expected/wpt-import/encoding/streams/invalid-realm.window.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Harness status: OK | ||
|
||
Found 4 tests | ||
|
||
2 Pass | ||
2 Fail | ||
Fail TextDecoderStream: write in detached realm should succeed | ||
Pass TextEncoderStream: write in detached realm should succeed | ||
Pass TextEncoderStream: close in detached realm should succeed | ||
Fail TextDecoderStream: close in detached realm should succeed |
3 changes: 3 additions & 0 deletions
3
...t-import/encoding/streams/readable-writable-properties.any.shadowrealm-in-shadowrealm.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Harness status: Error | ||
|
||
Found 0 tests |
3 changes: 3 additions & 0 deletions
3
...ed/wpt-import/encoding/streams/readable-writable-properties.any.shadowrealm-in-window.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Harness status: Error | ||
|
||
Found 0 tests |
8 changes: 8 additions & 0 deletions
8
Tests/LibWeb/Text/expected/wpt-import/encoding/streams/readable-writable-properties.any.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
Harness status: OK | ||
|
||
Found 2 tests | ||
|
||
1 Pass | ||
1 Fail | ||
Pass TextEncoderStream readable and writable properties must pass brand checks | ||
Fail TextDecoderStream readable and writable properties must pass brand checks |
Oops, something went wrong.