-
Notifications
You must be signed in to change notification settings - Fork 16
CSS: Improve $string_escapes documentation clarity
#227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from all commits
4858eae
5409b37
b38e757
a832130
fdf2e08
e8fda7d
a73187b
6bbda4d
a24a2c2
96b91af
79fe74b
b0d082e
59fe777
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -632,9 +632,10 @@ public function get_normalized_token(): ?string { | |
| return null; | ||
| } | ||
|
|
||
| return $this->decode_string_or_url( | ||
| return $this->decode_range( | ||
| $this->token_starts_at, | ||
| $this->token_length | ||
| $this->token_length, | ||
| self::TOKEN_STRING === $this->token_type | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -680,29 +681,39 @@ public function get_token_value() { | |
| switch ( $this->token_type ) { | ||
| case self::TOKEN_HASH: | ||
| // Hash value starts after the # character. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_range( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_AT_KEYWORD: | ||
| // At-keyword value starts after the @ character. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_range( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_FUNCTION: | ||
| // Function name is everything except the final (. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_IDENT: | ||
| // Identifier is the entire token. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length ); | ||
| $this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length ); | ||
| break; | ||
|
|
||
| case self::TOKEN_STRING: | ||
| if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) { | ||
| $this->token_value = $this->decode_range( | ||
| $this->token_value_starts_at, | ||
| $this->token_value_length, | ||
| true | ||
| ); | ||
| } else { | ||
| $this->token_value = null; | ||
| } | ||
| break; | ||
|
|
||
| case self::TOKEN_URL: | ||
| // Decode and cache the string/URL value. | ||
| if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) { | ||
| $this->token_value = $this->decode_string_or_url( | ||
| $this->token_value = $this->decode_range( | ||
| $this->token_value_starts_at, | ||
| $this->token_value_length | ||
| ); | ||
|
|
@@ -713,7 +724,7 @@ public function get_token_value() { | |
|
|
||
| case self::TOKEN_DELIM: | ||
| // Delim value is the single code point. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length ); | ||
| $this->token_value = $this->decode_range( $this->token_starts_at, $this->token_length ); | ||
| break; | ||
|
|
||
| case self::TOKEN_NUMBER: | ||
|
|
@@ -1183,7 +1194,7 @@ private function consume_numeric(): bool { | |
| // Consume an ident sequence. Set the <dimension-token>'s unit to the returned value. | ||
| $unit_starts_at = $this->at; | ||
| $this->consume_ident_sequence(); | ||
| $this->token_unit = $this->decode_string_or_url( $unit_starts_at, $this->at - $unit_starts_at ); | ||
| $this->token_unit = $this->decode_range( $unit_starts_at, $this->at - $unit_starts_at ); | ||
| $this->token_type = self::TOKEN_DIMENSION; | ||
| $this->token_length = $this->at - $this->token_starts_at; | ||
| return true; | ||
|
|
@@ -1218,7 +1229,7 @@ private function consume_ident_like(): bool { | |
| // Consume an ident sequence, and let string be the result. | ||
| $ident_start = $this->at; | ||
| $decoded = $this->consume_ident_sequence(); | ||
| $string = $decoded ?? $this->decode_string_or_url( $ident_start, $this->at - $ident_start ); | ||
| $string = $decoded ?? $this->decode_range( $ident_start, $this->at - $ident_start ); | ||
|
|
||
| // If string's value is an ASCII case-insensitive match for "url", | ||
| // and the next input code point is U+0028 LEFT PARENTHESIS ((). | ||
|
|
@@ -1537,19 +1548,27 @@ private function consume_ident_start_codepoint( $at ): int { | |
| } | ||
|
|
||
| /** | ||
| * Decodes a string or URL value with escape sequences and normalization. | ||
| * | ||
| * Fast path: If the slice contains no special characters, returns the raw | ||
| * substring with almost zero allocations. | ||
| * | ||
| * Slow path: Builds the decoded string by optionally processing escapes and | ||
| * normalizing line endings and null bytes. | ||
| * | ||
| * @param int $start Start byte offset. | ||
| * @param int $length Length of the substring to decode. | ||
| * @return string Decoded/normalized string. | ||
| * Decodes and normalizes ident-like or string CSS values from a byte range. | ||
| * | ||
| * For example: | ||
| * ┌──────────────┬────────┐ | ||
| * │ Input │ Output │ | ||
| * ├──────────────┼────────┤ | ||
| * │ 'xyz' │ 'xyz' │ | ||
| * │ '\x\y\z' │ 'xyz' │ | ||
| * │ 'x\79z' │ 'xyz' │ | ||
| * │ 'x\000079 z' │ 'xyz' │ | ||
| * │ 'a\r\nb' │ 'a\nb' │ | ||
| * │ 'a\0b' │ 'a�b' │ | ||
| * └──────────────┴────────┘ | ||
| * | ||
| * @param int $start Start byte offset. | ||
| * @param int $length Length of the substring to decode. | ||
| * @param bool $string_escapes Optional, default false. When true, apply additional escape | ||
| * rules that apply only to string tokens (CSS §4.3.5). | ||
| * @return string Decoded and normalized string. | ||
| */ | ||
| private function decode_string_or_url( int $start, int $length ): string { | ||
| private function decode_range( int $start, int $length, bool $string_escapes = false ): string { | ||
| // Fast path: check if any processing is needed. | ||
| $slice = wp_scrub_utf8( substr( $this->css, $start, $length ) ); | ||
| $special_chars = "\\\r\f\x00"; | ||
|
|
@@ -1579,8 +1598,39 @@ private function decode_string_or_url( int $start, int $length ): string { | |
|
|
||
| $char = $this->css[ $at ]; | ||
|
|
||
| // Handle escapes (if enabled). | ||
| // Handle escapes. | ||
| if ( '\\' === $char ) { | ||
| /* | ||
| * String tokens have special escape rules per §4.3.5: | ||
| * - 0x5C (backslash) at EOF: consume the backslash, produce no value. | ||
| * - 0x5C (backslash) followed by 0x0A (LF), 0x0C (FF), or 0x0D (CR): | ||
| * consume both characters as a line continuation, produce no value. | ||
| * - 0x5C (backslash) followed by 0x0D 0x0A (CRLF): | ||
| * consume all three characters as a line continuation, produce no value. | ||
| * These must be checked before the general escape path. | ||
| */ | ||
| if ( $string_escapes ) { | ||
| if ( $at + 1 >= $end ) { | ||
| // 0x5C at EOF: consume the backslash and stop. | ||
| ++$at; | ||
| continue; | ||
| } | ||
| $next = $this->css[ $at + 1 ]; | ||
| if ( "\n" === $next || "\f" === $next ) { | ||
| // 0x5C followed by 0x0A (LF) or 0x0C (FF): line continuation. | ||
| $at += 2; | ||
| continue; | ||
| } | ||
| if ( "\r" === $next ) { | ||
| // 0x5C followed by 0x0D (CR): line continuation; 0x0D 0x0A counts as one newline. | ||
| $at += 2; | ||
| if ( $at < $end && "\n" === $this->css[ $at ] ) { | ||
| ++$at; | ||
| } | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if ( $this->is_valid_escape( $at ) ) { | ||
| ++$at; | ||
| $decoded .= $this->decode_escape_at( $at, $bytes_consumed ); | ||
|
Comment on lines
1634
to
1636
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice rename @sirreal!