Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions components/DataLiberation/CSS/class-cssprocessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1571,7 +1571,7 @@ private function decode_string_or_url( int $start, int $length ): string {
if ( $normal_len > 0 ) {
// Clamp to not exceed the end boundary.
$normal_len = min( $normal_len, $end - $at );
$decoded .= substr( $this->css, $at, $normal_len );
$decoded .= wp_scrub_utf8( substr( $this->css, $at, $normal_len ) );
$at += $normal_len;
}

Expand All @@ -1585,7 +1585,7 @@ private function decode_string_or_url( int $start, int $length ): string {
if ( '\\' === $char ) {
if ( $this->is_valid_escape( $at ) ) {
++$at;
$decoded .= $this->decode_escape_at( $at, $bytes_consumed );
$decoded .= wp_scrub_utf8( $this->decode_escape_at( $at, $bytes_consumed ) );
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or should we call this in decode_escape_at()?

$at += $bytes_consumed;
continue;
}
Expand Down
62 changes: 62 additions & 0 deletions components/DataLiberation/Tests/CSSProcessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,68 @@ public function test_invalid_utf8_with_two_single_byte_invalid_sequences(): void
$this->assertSame( $expected, $actual_tokens );
}

/**
* In the slow path of decode_string_or_url() (triggered by a backslash escape), normal
* text segments must still have invalid UTF-8 bytes replaced with U+FFFD, just
* as the fast path does via wp_scrub_utf8().
*/
public function test_invalid_utf8_in_normal_segment_combined_with_escape(): void {
// The ident token contains an invalid UTF-8 byte (0xF1) in the "normal"
// segment before a CSS hex escape (\41 = U+0041 = 'A'). The backslash
// triggers the slow path, which previously skipped wp_scrub_utf8() on the
// normal segment.
$css = ".test\xF1\\41name";

$expected = array(
array(
'type' => CSSProcessor::TOKEN_DELIM,
'raw' => '.',
'value' => '.',
),
array(
'type' => CSSProcessor::TOKEN_IDENT,
// raw contains the original bytes.
'raw' => "test\xF1\\41name",
// value must have 0xF1 replaced with U+FFFD and \41 decoded to 'A'.
'value' => "test\u{FFFD}Aname",
),
);

$processor = CSSProcessor::create( $css );
$actual_tokens = $this->collect_tokens( $processor, array( 'type', 'raw', 'value' ) );
$this->assertSame( $expected, $actual_tokens );
}

/**
* When an invalid UTF-8 byte is the character directly after a backslash
* (i.e. it is the escaped character itself), decode_escape_at() returns the
* raw byte. The caller must scrub it to U+FFFD.
*/
public function test_invalid_utf8_as_escaped_character(): void {
// The CSS `.\xF1` is a delim + ident containing a lone invalid byte.
// Adding a backslash before the invalid byte makes it an escape sequence:
// `.\\\xF1` => delim + ident whose value is the escaped 0xF1 byte.
$css = ".a\\\xF1b";

$expected = array(
array(
'type' => CSSProcessor::TOKEN_DELIM,
'raw' => '.',
'value' => '.',
),
array(
'type' => CSSProcessor::TOKEN_IDENT,
'raw' => "a\\\xF1b",
// The escaped 0xF1 must be replaced with U+FFFD.
'value' => "a\u{FFFD}b",
),
);

$processor = CSSProcessor::create( $css );
$actual_tokens = $this->collect_tokens( $processor, array( 'type', 'raw', 'value' ) );
$this->assertSame( $expected, $actual_tokens );
}

/**
* Legacy test to ensure basic tokenization still works.
*/
Expand Down
Loading