-
-
Notifications
You must be signed in to change notification settings - Fork 92
Expand file tree
/
Copy pathUrlEncoderTrait.php
More file actions
102 lines (90 loc) · 3.13 KB
/
UrlEncoderTrait.php
File metadata and controls
102 lines (90 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
namespace samdark\sitemap;
/**
* Provides URL encoding functionality for sitemap classes.
* Percent-encodes non-ASCII characters in URL components per RFC 3986
* while preserving existing percent-encoded sequences to avoid double-encoding.
*/
trait UrlEncoderTrait
{
/**
* Encodes a URL to ensure international characters are properly percent-encoded
* according to RFC 3986 while avoiding double-encoding of existing %HH sequences.
*
* @param string $url the URL to encode
* @return string the encoded URL
*/
protected function encodeUrl($url)
{
$parsed = parse_url($url);
if ($parsed === false) {
return $url;
}
$encoded = '';
// Scheme (http, https, etc.)
if (isset($parsed['scheme'])) {
$encoded .= $parsed['scheme'] . '://';
}
// User info (credentials)
if (isset($parsed['user'])) {
$encoded .= $parsed['user'];
if (isset($parsed['pass'])) {
$encoded .= ':' . $parsed['pass'];
}
$encoded .= '@';
}
// Host (domain)
if (isset($parsed['host'])) {
if (function_exists('idn_to_ascii') && defined('INTL_IDNA_VARIANT_UTS46')) {
$host = idn_to_ascii($parsed['host'], IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46);
$encoded .= $host !== false ? $host : $parsed['host'];
} else {
$encoded .= $parsed['host'];
}
}
// Port
if (isset($parsed['port'])) {
$encoded .= ':' . $parsed['port'];
}
// Path — encode only non-ASCII bytes; existing %HH sequences are ASCII and are preserved
if (isset($parsed['path'])) {
$encoded .= $this->encodeNonAscii($parsed['path']);
}
// Query string — encode only non-ASCII bytes in each key and value
if (isset($parsed['query'])) {
$parts = explode('&', $parsed['query']);
$encodedParts = array();
foreach ($parts as $part) {
if (strpos($part, '=') !== false) {
list($key, $value) = explode('=', $part, 2);
$encodedParts[] = $this->encodeNonAscii($key) . '=' . $this->encodeNonAscii($value);
} else {
$encodedParts[] = $this->encodeNonAscii($part);
}
}
$encoded .= '?' . implode('&', $encodedParts);
}
// Fragment
if (isset($parsed['fragment'])) {
$encoded .= '#' . $this->encodeNonAscii($parsed['fragment']);
}
return $encoded;
}
/**
* Percent-encodes sequences of non-ASCII bytes in a string while leaving
* all ASCII characters (including existing %HH sequences) untouched.
*
* @param string $value the string to encode
* @return string
*/
private function encodeNonAscii($value)
{
return preg_replace_callback(
'/[^\x00-\x7F]+/',
function ($matches) {
return rawurlencode($matches[0]);
},
$value
);
}
}