WordPress · sirreal · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/components/DataLiberation/CSS/class-cssprocessor.php b/components/DataLiberation/CSS/class-cssprocessor.php
@@ -632,9 +632,10 @@ public function get_normalized_token(): ?string {
 			return null;
 		}
 
-		return $this->decode_string_or_url(
+		return $this->decode_range(
 			$this->token_starts_at,
-			$this->token_length
+			$this->token_length,
+			self::TOKEN_STRING === $this->token_type
 		);
 	}
 
@@ -680,29 +681,39 @@ public function get_token_value() {
 			switch ( $this->token_type ) {
 				case self::TOKEN_HASH:
 					// Hash value starts after the # character.
-					$this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 );
+					$this->token_value = $this->decode_range( $this->token_starts_at + 1, $this->token_length - 1 );
 					break;
 
 				case self::TOKEN_AT_KEYWORD:
 					// At-keyword value starts after the @ character.
-					$this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 );
+					$this->token_value = $this->decode_range( $this->token_starts_at + 1, $this->token_length - 1 );
 					break;
 
 				case self::TOKEN_FUNCTION:
 					// Function name is everything except the final (.
-					$this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length - 1 );
+					$this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length - 1 );
 					break;
 
 				case self::TOKEN_IDENT:
 					// Identifier is the entire token.
-					$this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length );
+					$this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length );
 					break;
 
 				case self::TOKEN_STRING:
+					if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) {
+						$this->token_value = $this->decode_range(
+							$this->token_value_starts_at,
+							$this->token_value_length,
+							true
+						);
+					} else {
+						$this->token_value = null;
+					}
+					break;
+
 				case self::TOKEN_URL:
-					// Decode and cache the string/URL value.
 					if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) {
-						$this->token_value = $this->decode_string_or_url(
+						$this->token_value = $this->decode_range(
 							$this->token_value_starts_at,
 							$this->token_value_length
 						);
@@ -713,7 +724,7 @@ public function get_token_value() {
 
 				case self::TOKEN_DELIM:
 					// Delim value is the single code point.
-					$this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length );
+					$this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length );
 					break;
 
 				case self::TOKEN_NUMBER:
@@ -1183,7 +1194,7 @@ private function consume_numeric(): bool {
 			// Consume an ident sequence. Set the <dimension-token>'s unit to the returned value.
 			$unit_starts_at = $this->at;
 			$this->consume_ident_sequence();
-			$this->token_unit   = $this->decode_string_or_url( $unit_starts_at, $this->at - $unit_starts_at );
+			$this->token_unit   = $this->decode_range( $unit_starts_at, $this->at - $unit_starts_at );
 			$this->token_type   = self::TOKEN_DIMENSION;
 			$this->token_length = $this->at - $this->token_starts_at;
 			return true;
@@ -1218,7 +1229,7 @@ private function consume_ident_like(): bool {
 		// Consume an ident sequence, and let string be the result.
 		$ident_start = $this->at;
 		$decoded     = $this->consume_ident_sequence();
-		$string      = $decoded ?? $this->decode_string_or_url( $ident_start, $this->at - $ident_start );
+		$string      = $decoded ?? $this->decode_range( $ident_start, $this->at - $ident_start );
 
 		// If string's value is an ASCII case-insensitive match for "url",
 		// and the next input code point is U+0028 LEFT PARENTHESIS (().
@@ -1537,19 +1548,27 @@ private function consume_ident_start_codepoint( $at ): int {
 	}
 
 	/**
-	 * Decodes a string or URL value with escape sequences and normalization.
-	 *
-	 * Fast path: If the slice contains no special characters, returns the raw
-	 * substring with almost zero allocations.
-	 *
-	 * Slow path: Builds the decoded string by optionally processing escapes and
-	 * normalizing line endings and null bytes.
-	 *
-	 * @param int $start           Start byte offset.
-	 * @param int $length          Length of the substring to decode.
-	 * @return string Decoded/normalized string.
+	 * Decodes and normalizes ident-like or string CSS values from a byte range.
+	 *
+	 * For example:
+	 * ┌──────────────┬────────┐
+	 * │ Input        │ Output │
+	 * ├──────────────┼────────┤
+	 * │ 'xyz'        │ 'xyz'  │
+	 * │ '\x\y\z'     │ 'xyz'  │
+	 * │ 'x\79z'      │ 'xyz'  │
+	 * │ 'x\000079 z' │ 'xyz'  │
+	 * │ 'a\r\nb'     │ 'a\nb' │
+	 * │ 'a\0b'       │ 'a�b'  │
+	 * └──────────────┴────────┘
+	 *
+	 * @param int  $start           Start byte offset.
+	 * @param int  $length          Length of the substring to decode.
+	 * @param bool $string_escapes  Optional, default false. When true, apply additional escape
+	 *                              rules that apply only to string tokens (CSS §4.3.5).
+	 * @return string Decoded and normalized string.
 	 */
-	private function decode_string_or_url( int $start, int $length ): string {
+	private function decode_range( int $start, int $length, bool $string_escapes = false ): string {
 		// Fast path: check if any processing is needed.
 		$slice         = wp_scrub_utf8( substr( $this->css, $start, $length ) );
 		$special_chars = "\\\r\f\x00";
@@ -1579,8 +1598,39 @@ private function decode_string_or_url( int $start, int $length ): string {
 
 			$char = $this->css[ $at ];
 
-			// Handle escapes (if enabled).
+			// Handle escapes.
 			if ( '\\' === $char ) {
+				/*
+				 * String tokens have special escape rules per §4.3.5:
+				 * - 0x5C (backslash) at EOF: consume the backslash, produce no value.
+				 * - 0x5C (backslash) followed by 0x0A (LF), 0x0C (FF), or 0x0D (CR):
+				 *   consume both characters as a line continuation, produce no value.
+				 * - 0x5C (backslash) followed by 0x0D 0x0A (CRLF):
+				 *   consume all three characters as a line continuation, produce no value.
+				 * These must be checked before the general escape path.
+				 */
+				if ( $string_escapes ) {
+					if ( $at + 1 >= $end ) {
+						// 0x5C at EOF: consume the backslash and stop.
+						++$at;
+						continue;
+					}
+					$next = $this->css[ $at + 1 ];
+					if ( "\n" === $next || "\f" === $next ) {
+						// 0x5C followed by 0x0A (LF) or 0x0C (FF): line continuation.
+						$at += 2;
+						continue;
+					}
+					if ( "\r" === $next ) {
+						// 0x5C followed by 0x0D (CR): line continuation; 0x0D 0x0A counts as one newline.
+						$at += 2;
+						if ( $at < $end && "\n" === $this->css[ $at ] ) {
+							++$at;
+						}
+						continue;
+					}
+				}
+
 				if ( $this->is_valid_escape( $at ) ) {
 					++$at;
 					$decoded .= $this->decode_escape_at( $at, $bytes_consumed );

diff --git a/components/DataLiberation/Tests/CSSProcessorTest.php b/components/DataLiberation/Tests/CSSProcessorTest.php
@@ -5,6 +5,8 @@
 
 /**
  * Comprehensive CSS processor tests based on the CSS Syntax Level 3 specification.
+ *
+ * @group css
  */
 class CSSProcessorTest extends TestCase {
 
@@ -25,7 +27,7 @@ public function test_processor_matches_spec( string $css, array $expected_tokens
 	 * @see https://github.com/romainmenke/css-processor-tests/
 	 * @return array
 	 */
-	static public function corpus_provider(): array {
+	public static function corpus_provider(): array {
 		return json_decode(file_get_contents(__DIR__ . '/css-test-cases.json'), true);
 	}
 
@@ -35,7 +37,7 @@ static public function corpus_provider(): array {
 	 * @param CSSProcessor $processor The CSS processor.
 	 * @return array Array of tokens with type, raw, startIndex, endIndex, structured.
 	 */
-	static public function collect_tokens( CSSProcessor $processor, $keys = null ): array {
+	public static function collect_tokens( CSSProcessor $processor, $keys = null ): array {
 		$tokens = array();
 
 		while ( $processor->next_token() ) {
@@ -1542,6 +1544,144 @@ public function test_ident_start_codepoint_bounds_check(): void {
 		$this->assertSame( $expected_tokens, $actual_tokens );
 	}
 
+	/**
+	 * Tests that backslash-newline in a string token contributes nothing to the value.
+	 *
+	 * CSS spec §4.3.5 consume-string-token:
+	 * > U+005C REVERSE SOLIDUS (\)
+	 * > Otherwise, if the next input code point is a newline, consume it.
+	 *
+	 * The backslash and newline are both consumed and produce no value.
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token
+	 * @see https://github.com/WordPress/php-toolkit/issues/222
+	 *
+	 * @dataProvider data_string_backslash_newline
+	 */
+	public function test_string_backslash_newline( string $css, string $expected_value ): void {
+		$processor = CSSProcessor::create( $css );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() );
+		$this->assertSame( $expected_value, $processor->get_token_value() );
+	}
+
+	public static function data_string_backslash_newline(): array {
+		return array(
+			'backslash-LF'   => array( "'str\\\ning'", 'string' ),
+			'backslash-FF'   => array( "'str\\\fing'", 'string' ),
+			'backslash-CR'   => array( "'str\\\ring'", 'string' ),
+			'backslash-CRLF' => array( "'str\\\r\ning'", 'string' ),
+		);
+	}
+
+	/**
+	 * Tests that backslash-EOF in a string token contributes nothing to the value.
+	 *
+	 * CSS spec §4.3.5 consume-string-token:
+	 * > U+005C REVERSE SOLIDUS (\)
+	 * > If the next input code point is EOF, do nothing.
+	 *
+	 * The trailing backslash is consumed and produces no value.
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-string-token
+	 * @see https://github.com/WordPress/php-toolkit/issues/223
+	 */
+	public function test_string_backslash_eof(): void {
+		$processor = CSSProcessor::create( "'string\\" );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( CSSProcessor::TOKEN_STRING, $processor->get_token_type() );
+		$this->assertSame( 'string', $processor->get_token_value() );
+	}
+
+	/**
+	 * Tests that backslash-newline in an unquoted URL produces a bad-url token.
+	 *
+	 * In unquoted URLs, the backslash-newline check goes through is_valid_escape()
+	 * which returns false for newlines, triggering consume_remnants_of_bad_url().
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-url-token
+	 *
+	 * @dataProvider data_url_backslash_newline
+	 */
+	public function test_url_backslash_newline( string $css ): void {
+		$processor = CSSProcessor::create( $css );
+
+		$found_bad_url = false;
+		while ( $processor->next_token() ) {
+			if ( CSSProcessor::TOKEN_BAD_URL === $processor->get_token_type() ) {
+				$found_bad_url = true;
+				break;
+			}
+		}
+
+		$this->assertTrue( $found_bad_url, 'Expected a BAD_URL token but none was found.' );
+	}
+
+	public static function data_url_backslash_newline(): array {
+		return array(
+			'backslash-LF'   => array( "url(ab\\\ncd)" ),
+			'backslash-FF'   => array( "url(ab\\\fcd)" ),
+			'backslash-CR'   => array( "url(ab\\\rcd)" ),
+			'backslash-CRLF' => array( "url(ab\\\r\ncd)" ),
+		);
+	}
+
+	/**
+	 * Tests that backslash-EOF in an unquoted URL produces U+FFFD in the value.
+	 *
+	 * In unquoted URLs, is_valid_escape() returns true for backslash-EOF,
+	 * and consuming the escaped code point at EOF produces U+FFFD per spec.
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-url-token
+	 */
+	public function test_url_backslash_eof(): void {
+		$processor = CSSProcessor::create( "url(string\\" );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( CSSProcessor::TOKEN_URL, $processor->get_token_type() );
+		$this->assertSame( "string\u{FFFD}", $processor->get_token_value() );
+	}
+
+	/**
+	 * Tests that backslash-newline stops an ident sequence.
+	 *
+	 * In idents, is_valid_escape() returns false for backslash-newline,
+	 * so the ident stops before the backslash.
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-name
+	 *
+	 * @dataProvider data_ident_backslash_newline
+	 */
+	public function test_ident_backslash_newline( string $css ): void {
+		$processor = CSSProcessor::create( $css );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( CSSProcessor::TOKEN_IDENT, $processor->get_token_type() );
+		$this->assertSame( 'abc', $processor->get_token_value() );
+	}
+
+	public static function data_ident_backslash_newline(): array {
+		return array(
+			'backslash-LF'   => array( "abc\\\n" ),
+			'backslash-FF'   => array( "abc\\\f" ),
+			'backslash-CR'   => array( "abc\\\r" ),
+			'backslash-CRLF' => array( "abc\\\r\n" ),
+		);
+	}
+
+	/**
+	 * Tests that backslash-EOF in an ident produces U+FFFD in the value.
+	 *
+	 * In idents, is_valid_escape() returns true for backslash-EOF,
+	 * and consuming the escaped code point at EOF produces U+FFFD per spec.
+	 *
+	 * @see https://www.w3.org/TR/css-syntax-3/#consume-name
+	 */
+	public function test_ident_backslash_eof(): void {
+		$processor = CSSProcessor::create( "abc\\" );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( CSSProcessor::TOKEN_IDENT, $processor->get_token_type() );
+		$this->assertSame( "abc\u{FFFD}", $processor->get_token_value() );
+	}
+
 	/**
 	 * Tests that bad-string-token returns null for get_token_value().
 	 *

diff --git a/components/DataLiberation/Tests/CSSUrlProcessorTest.php b/components/DataLiberation/Tests/CSSUrlProcessorTest.php
@@ -3,6 +3,9 @@
 use PHPUnit\Framework\TestCase;
 use WordPress\DataLiberation\URL\CSSURLProcessor;
 
+/**
+ * @group css
+ */
 class CSSURLProcessorTest extends TestCase {
 
 	/**