Filename | /Users/ether/.perlbrew/libs/36.0@std/lib/perl5/URI/Escape.pm |
Statements | Executed 274 statements in 748µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 18µs | 19µs | BEGIN@3 | URI::Escape::
1 | 1 | 1 | 7µs | 20µs | BEGIN@147 | URI::Escape::
1 | 1 | 1 | 5µs | 18µs | BEGIN@191 | URI::Escape::
1 | 1 | 1 | 3µs | 22µs | BEGIN@4 | URI::Escape::
2 | 1 | 1 | 3µs | 3µs | CORE:qr (opcode) | URI::Escape::
1 | 1 | 1 | 2µs | 2µs | BEGIN@153 | URI::Escape::
0 | 0 | 0 | 0s | 0s | _fail_hi | URI::Escape::
0 | 0 | 0 | 0s | 0s | escape_char | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_escape | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_escape_utf8 | URI::Escape::
0 | 0 | 0 | 0s | 0s | uri_unescape | URI::Escape::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package URI::Escape; | ||||
2 | |||||
3 | 2 | 18µs | 2 | 20µs | # spent 19µs (18+1) within URI::Escape::BEGIN@3 which was called:
# once (18µs+1µs) by OpenAPI::Modern::BEGIN@25 at line 3 # spent 19µs making 1 call to URI::Escape::BEGIN@3
# spent 1µs making 1 call to strict::import |
4 | 2 | 64µs | 2 | 41µs | # spent 22µs (3+19) within URI::Escape::BEGIN@4 which was called:
# once (3µs+19µs) by OpenAPI::Modern::BEGIN@25 at line 4 # spent 22µs making 1 call to URI::Escape::BEGIN@4
# spent 19µs making 1 call to warnings::import |
5 | |||||
6 | =head1 NAME | ||||
7 | |||||
8 | URI::Escape - Percent-encode and percent-decode unsafe characters | ||||
9 | |||||
10 | =head1 SYNOPSIS | ||||
11 | |||||
12 | use URI::Escape; | ||||
13 | $safe = uri_escape("10% is enough\n"); | ||||
14 | $verysafe = uri_escape("foo", "\0-\377"); | ||||
15 | $str = uri_unescape($safe); | ||||
16 | |||||
17 | =head1 DESCRIPTION | ||||
18 | |||||
19 | This module provides functions to percent-encode and percent-decode URI strings as | ||||
20 | defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping". | ||||
21 | This is the terminology used by this module, which predates the formalization of the | ||||
22 | terms by the RFC by several years. | ||||
23 | |||||
24 | A URI consists of a restricted set of characters. The restricted set | ||||
25 | of characters consists of digits, letters, and a few graphic symbols | ||||
26 | chosen from those common to most of the character encodings and input | ||||
27 | facilities available to Internet users. They are made up of the | ||||
28 | "unreserved" and "reserved" character sets as defined in RFC 3986. | ||||
29 | |||||
30 | unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | ||||
31 | reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@" | ||||
32 | "!" / "$" / "&" / "'" / "(" / ")" | ||||
33 | / "*" / "+" / "," / ";" / "=" | ||||
34 | |||||
35 | In addition, any byte (octet) can be represented in a URI by an escape | ||||
36 | sequence: a triplet consisting of the character "%" followed by two | ||||
37 | hexadecimal digits. A byte can also be represented directly by a | ||||
38 | character, using the US-ASCII character for that octet. | ||||
39 | |||||
40 | Some of the characters are I<reserved> for use as delimiters or as | ||||
41 | part of certain URI components. These must be escaped if they are to | ||||
42 | be treated as ordinary data. Read RFC 3986 for further details. | ||||
43 | |||||
44 | The functions provided (and exported by default) from this module are: | ||||
45 | |||||
46 | =over 4 | ||||
47 | |||||
48 | =item uri_escape( $string ) | ||||
49 | |||||
50 | =item uri_escape( $string, $unsafe ) | ||||
51 | |||||
52 | Replaces each unsafe character in the $string with the corresponding | ||||
53 | escape sequence and returns the result. The $string argument should | ||||
54 | be a string of bytes. The uri_escape() function will croak if given a | ||||
55 | characters with code above 255. Use uri_escape_utf8() if you know you | ||||
56 | have such chars or/and want chars in the 128 .. 255 range treated as | ||||
57 | UTF-8. | ||||
58 | |||||
59 | The uri_escape() function takes an optional second argument that | ||||
60 | overrides the set of characters that are to be escaped. The set is | ||||
61 | specified as a string that can be used in a regular expression | ||||
62 | character class (between [ ]). E.g.: | ||||
63 | |||||
64 | "\x00-\x1f\x7f-\xff" # all control and hi-bit characters | ||||
65 | "a-z" # all lower case characters | ||||
66 | "^A-Za-z" # everything not a letter | ||||
67 | |||||
68 | The default set of characters to be escaped is all those which are | ||||
69 | I<not> part of the C<unreserved> character class shown above as well | ||||
70 | as the reserved characters. I.e. the default is: | ||||
71 | |||||
72 | "^A-Za-z0-9\-\._~" | ||||
73 | |||||
74 | The second argument can also be specified as a regular expression object: | ||||
75 | |||||
76 | qr/[^A-Za-z]/ | ||||
77 | |||||
78 | Any strings matched by this regular expression will have all of their | ||||
79 | characters escaped. | ||||
80 | |||||
81 | =item uri_escape_utf8( $string ) | ||||
82 | |||||
83 | =item uri_escape_utf8( $string, $unsafe ) | ||||
84 | |||||
85 | Works like uri_escape(), but will encode chars as UTF-8 before | ||||
86 | escaping them. This makes this function able to deal with characters | ||||
87 | with code above 255 in $string. Note that chars in the 128 .. 255 | ||||
88 | range will be escaped differently by this function compared to what | ||||
89 | uri_escape() would. For chars in the 0 .. 127 range there is no | ||||
90 | difference. | ||||
91 | |||||
92 | Equivalent to: | ||||
93 | |||||
94 | utf8::encode($string); | ||||
95 | my $uri = uri_escape($string); | ||||
96 | |||||
97 | Note: JavaScript has a function called escape() that produces the | ||||
98 | sequence "%uXXXX" for chars in the 256 .. 65535 range. This function | ||||
99 | has really nothing to do with URI escaping but some folks got confused | ||||
100 | since it "does the right thing" in the 0 .. 255 range. Because of | ||||
101 | this you sometimes see "URIs" with these kind of escapes. The | ||||
102 | JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). | ||||
103 | |||||
104 | =item uri_unescape($string,...) | ||||
105 | |||||
106 | Returns a string with each %XX sequence replaced with the actual byte | ||||
107 | (octet). | ||||
108 | |||||
109 | This does the same as: | ||||
110 | |||||
111 | $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
112 | |||||
113 | but does not modify the string in-place as this RE would. Using the | ||||
114 | uri_unescape() function instead of the RE might make the code look | ||||
115 | cleaner and is a few characters less to type. | ||||
116 | |||||
117 | In a simple benchmark test I did, | ||||
118 | calling the function (instead of the inline RE above) if a few chars | ||||
119 | were unescaped was something like 40% slower, and something like 700% slower if none were. If | ||||
120 | you are going to unescape a lot of times it might be a good idea to | ||||
121 | inline the RE. | ||||
122 | |||||
123 | If the uri_unescape() function is passed multiple strings, then each | ||||
124 | one is returned unescaped. | ||||
125 | |||||
126 | =back | ||||
127 | |||||
128 | The module can also export the C<%escapes> hash, which contains the | ||||
129 | mapping from all 256 bytes to the corresponding escape codes. Lookup | ||||
130 | in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> | ||||
131 | each time. | ||||
132 | |||||
133 | =head1 SEE ALSO | ||||
134 | |||||
135 | L<URI> | ||||
136 | |||||
137 | |||||
138 | =head1 COPYRIGHT | ||||
139 | |||||
140 | Copyright 1995-2004 Gisle Aas. | ||||
141 | |||||
142 | This program is free software; you can redistribute it and/or modify | ||||
143 | it under the same terms as Perl itself. | ||||
144 | |||||
145 | =cut | ||||
146 | |||||
147 | 3 | 47µs | 3 | 33µs | # spent 20µs (7+13) within URI::Escape::BEGIN@147 which was called:
# once (7µs+13µs) by OpenAPI::Modern::BEGIN@25 at line 147 # spent 20µs making 1 call to URI::Escape::BEGIN@147
# spent 7µs making 1 call to UNIVERSAL::VERSION
# spent 6µs making 1 call to Exporter::import |
148 | our %escapes; | ||||
149 | 1 | 1µs | our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); | ||
150 | 1 | 0s | our @EXPORT_OK = qw(%escapes); | ||
151 | 1 | 0s | our $VERSION = '5.17'; | ||
152 | |||||
153 | 2 | 205µs | 1 | 2µs | # spent 2µs within URI::Escape::BEGIN@153 which was called:
# once (2µs+0s) by OpenAPI::Modern::BEGIN@25 at line 153 # spent 2µs making 1 call to URI::Escape::BEGIN@153 |
154 | |||||
155 | # Build a char->hex map | ||||
156 | 1 | 1µs | for (0..255) { | ||
157 | 256 | 152µs | $escapes{chr($_)} = sprintf("%%%02X", $_); | ||
158 | } | ||||
159 | |||||
160 | 1 | 1µs | my %subst; # compiled patterns | ||
161 | |||||
162 | 1 | 8µs | 2 | 3µs | my %Unsafe = ( # spent 3µs making 2 calls to URI::Escape::CORE:qr, avg 2µs/call |
163 | RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/, | ||||
164 | RFC3986 => qr/[^A-Za-z0-9\-\._~]/, | ||||
165 | ); | ||||
166 | |||||
167 | sub uri_escape { | ||||
168 | my($text, $patn) = @_; | ||||
169 | return undef unless defined $text; | ||||
170 | my $re; | ||||
171 | if (defined $patn){ | ||||
172 | if (ref $patn eq 'Regexp') { | ||||
173 | $text =~ s{($patn)}{ | ||||
174 | join('', map +($escapes{$_} || _fail_hi($_)), split //, "$1") | ||||
175 | }ge; | ||||
176 | return $text; | ||||
177 | } | ||||
178 | $re = $subst{$patn}; | ||||
179 | if (!defined $re) { | ||||
180 | $re = $patn; | ||||
181 | # we need to escape the [] characters, except for those used in | ||||
182 | # posix classes. if they are prefixed by a backslash, allow them | ||||
183 | # through unmodified. | ||||
184 | $re =~ s{(\[:\w+:\])|(\\)?([\[\]]|\\\z)}{ | ||||
185 | defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3" | ||||
186 | }ge; | ||||
187 | eval { | ||||
188 | # disable the warnings here, since they will trigger later | ||||
189 | # when used, and we only want them to appear once per call, | ||||
190 | # but every time the same pattern is used. | ||||
191 | 2 | 246µs | 2 | 31µs | # spent 18µs (5+13) within URI::Escape::BEGIN@191 which was called:
# once (5µs+13µs) by OpenAPI::Modern::BEGIN@25 at line 191 # spent 18µs making 1 call to URI::Escape::BEGIN@191
# spent 13µs making 1 call to warnings::unimport |
192 | $re = $subst{$patn} = qr{[$re]}; | ||||
193 | 1; | ||||
194 | } or Carp::croak("uri_escape: $@"); | ||||
195 | } | ||||
196 | } | ||||
197 | else { | ||||
198 | $re = $Unsafe{RFC3986}; | ||||
199 | } | ||||
200 | $text =~ s/($re)/$escapes{$1} || _fail_hi($1)/ge; | ||||
201 | $text; | ||||
202 | } | ||||
203 | |||||
204 | sub _fail_hi { | ||||
205 | my $chr = shift; | ||||
206 | Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); | ||||
207 | } | ||||
208 | |||||
209 | sub uri_escape_utf8 { | ||||
210 | my $text = shift; | ||||
211 | return undef unless defined $text; | ||||
212 | utf8::encode($text); | ||||
213 | return uri_escape($text, @_); | ||||
214 | } | ||||
215 | |||||
216 | sub uri_unescape { | ||||
217 | # Note from RFC1630: "Sequences which start with a percent sign | ||||
218 | # but are not followed by two hexadecimal characters are reserved | ||||
219 | # for future extension" | ||||
220 | my $str = shift; | ||||
221 | if (@_ && wantarray) { | ||||
222 | # not executed for the common case of a single argument | ||||
223 | my @str = ($str, @_); # need to copy | ||||
224 | for (@str) { | ||||
225 | s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
226 | } | ||||
227 | return @str; | ||||
228 | } | ||||
229 | $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; | ||||
230 | $str; | ||||
231 | } | ||||
232 | |||||
233 | # XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format. | ||||
234 | sub escape_char { | ||||
235 | # Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1). | ||||
236 | # The following forces a fetch to occur beforehand. | ||||
237 | my $dummy = substr($_[0], 0, 0); | ||||
238 | |||||
239 | if (utf8::is_utf8($_[0])) { | ||||
240 | my $s = shift; | ||||
241 | utf8::encode($s); | ||||
242 | unshift(@_, $s); | ||||
243 | } | ||||
244 | |||||
245 | return join '', @URI::Escape::escapes{split //, $_[0]}; | ||||
246 | } | ||||
247 | |||||
248 | 1 | 5µs | 1; | ||
# spent 3µs within URI::Escape::CORE:qr which was called 2 times, avg 2µs/call:
# 2 times (3µs+0s) by OpenAPI::Modern::BEGIN@25 at line 162, avg 2µs/call |