-
Notifications
You must be signed in to change notification settings - Fork 16
CSS: Improve $string_escapes documentation clarity
#227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from 7 commits
4858eae
5409b37
b38e757
a832130
fdf2e08
e8fda7d
a73187b
6bbda4d
a24a2c2
96b91af
79fe74b
b0d082e
59fe777
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -632,9 +632,10 @@ public function get_normalized_token(): ?string { | |
| return null; | ||
| } | ||
|
|
||
| return $this->decode_string_or_url( | ||
| return $this->decode_escapes( | ||
| $this->token_starts_at, | ||
| $this->token_length | ||
| $this->token_length, | ||
| self::TOKEN_STRING === $this->token_type || self::TOKEN_BAD_STRING === $this->token_type | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -680,42 +681,53 @@ public function get_token_value() { | |
| switch ( $this->token_type ) { | ||
| case self::TOKEN_HASH: | ||
| // Hash value starts after the # character. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_escapes( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_AT_KEYWORD: | ||
| // At-keyword value starts after the @ character. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_escapes( $this->token_starts_at + 1, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_FUNCTION: | ||
| // Function name is everything except the final (. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length - 1 ); | ||
| $this->token_value = $this->decode_escapes( $this->token_starts_at, $this->token_length - 1 ); | ||
| break; | ||
|
|
||
| case self::TOKEN_IDENT: | ||
| // Identifier is the entire token. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length ); | ||
| $this->token_value = $this->decode_escapes( $this->token_starts_at, $this->token_length ); | ||
| break; | ||
|
|
||
| case self::TOKEN_STRING: | ||
| case self::TOKEN_BAD_STRING: | ||
| // Decode and cache the string value. | ||
| if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) { | ||
| $this->token_value = $this->decode_escapes( | ||
| $this->token_value_starts_at, | ||
| $this->token_value_length, | ||
| true | ||
| ); | ||
| } else { | ||
| $this->token_value = null; | ||
| } | ||
| break; | ||
|
|
||
| case self::TOKEN_URL: | ||
| // Decode and cache the string/URL value. | ||
| // Decode and cache the URL value. | ||
| if ( null !== $this->token_value_starts_at && null !== $this->token_value_length ) { | ||
| $this->token_value = $this->decode_string_or_url( | ||
| $this->token_value = $this->decode_escapes( | ||
| $this->token_value_starts_at, | ||
| $this->token_value_length | ||
| ); | ||
| $this->token_value = $this->token_value; | ||
| } else { | ||
| $this->token_value = null; | ||
| } | ||
| break; | ||
|
|
||
| case self::TOKEN_DELIM: | ||
| // Delim value is the single code point. | ||
| $this->token_value = $this->decode_string_or_url( $this->token_starts_at, $this->token_length ); | ||
| $this->token_value = $this->decode_escapes( $this->token_starts_at, $this->token_length ); | ||
| break; | ||
|
|
||
| case self::TOKEN_NUMBER: | ||
|
|
@@ -1185,7 +1197,7 @@ private function consume_numeric(): bool { | |
| // Consume an ident sequence. Set the <dimension-token>'s unit to the returned value. | ||
| $unit_starts_at = $this->at; | ||
| $this->consume_ident_sequence(); | ||
| $this->token_unit = $this->decode_string_or_url( $unit_starts_at, $this->at - $unit_starts_at ); | ||
| $this->token_unit = $this->decode_escapes( $unit_starts_at, $this->at - $unit_starts_at ); | ||
| $this->token_type = self::TOKEN_DIMENSION; | ||
| $this->token_length = $this->at - $this->token_starts_at; | ||
| return true; | ||
|
|
@@ -1220,7 +1232,7 @@ private function consume_ident_like(): bool { | |
| // Consume an ident sequence, and let string be the result. | ||
| $ident_start = $this->at; | ||
| $decoded = $this->consume_ident_sequence(); | ||
| $string = $decoded ?? $this->decode_string_or_url( $ident_start, $this->at - $ident_start ); | ||
| $string = $decoded ?? $this->decode_escapes( $ident_start, $this->at - $ident_start ); | ||
|
|
||
| // If string's value is an ASCII case-insensitive match for "url", | ||
| // and the next input code point is U+0028 LEFT PARENTHESIS ((). | ||
|
|
@@ -1547,11 +1559,15 @@ private function consume_ident_start_codepoint( $at ): int { | |
| * Slow path: Builds the decoded string by optionally processing escapes and | ||
| * normalizing line endings and null bytes. | ||
sirreal marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| * | ||
| * @param int $start Start byte offset. | ||
| * @param int $length Length of the substring to decode. | ||
| * @return string Decoded/normalized string. | ||
| * @param int $start Start byte offset. | ||
| * @param int $length Length of the substring to decode. | ||
| * @param bool $string_escapes Optional, default false. When true, apply special CSS string | ||
| * token escape rules: | ||
| * - \-newline is consumed as a line continuation (ignored). | ||
| * - \-EOF is silently discarded. | ||
|
||
| * @return string Decoded and normalized string. | ||
| */ | ||
| private function decode_string_or_url( int $start, int $length ): string { | ||
| private function decode_escapes( int $start, int $length, bool $string_escapes = false ): string { | ||
| // Fast path: check if any processing is needed. | ||
| $slice = wp_scrub_utf8( substr( $this->css, $start, $length ) ); | ||
| $special_chars = "\\\r\f\x00"; | ||
|
|
@@ -1581,8 +1597,35 @@ private function decode_string_or_url( int $start, int $length ): string { | |
|
|
||
| $char = $this->css[ $at ]; | ||
|
|
||
| // Handle escapes (if enabled). | ||
| // Handle escapes. | ||
| if ( '\\' === $char ) { | ||
| /* | ||
| * String tokens have special escape rules per §4.3.5: | ||
| * - \-EOF: do nothing (consume the backslash, produce no value). | ||
| * - \-newline: consume both (line continuation, produce no value). | ||
|
||
| * These must be checked before the general escape path. | ||
| */ | ||
| if ( $string_escapes ) { | ||
| if ( $at + 1 >= $end ) { | ||
| // \-EOF: consume the backslash and stop. | ||
| ++$at; | ||
| continue; | ||
| } | ||
| $next = $this->css[ $at + 1 ]; | ||
| if ( "\n" === $next || "\f" === $next ) { | ||
| $at += 2; | ||
| continue; | ||
| } | ||
| if ( "\r" === $next ) { | ||
| $at += 2; | ||
| // \r\n counts as one newline. | ||
| if ( $at < $end && "\n" === $this->css[ $at ] ) { | ||
| ++$at; | ||
| } | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if ( $this->is_valid_escape( $at ) ) { | ||
| ++$at; | ||
| $decoded .= $this->decode_escape_at( $at, $bytes_consumed ); | ||
|
Comment on lines
1634
to
1636
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.