Profile of URI/Escape.pm

Filename	/Users/ether/.perlbrew/libs/36.0@std/lib/perl5/URI/Escape.pm
Statements	Executed 274 statements in 748µs

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	18µs	19µs	URI::Escape::::BEGIN@3URI::Escape::BEGIN@3
1	1	1	7µs	20µs	URI::Escape::::BEGIN@147URI::Escape::BEGIN@147
1	1	1	5µs	18µs	URI::Escape::::BEGIN@191URI::Escape::BEGIN@191
1	1	1	3µs	22µs	URI::Escape::::BEGIN@4URI::Escape::BEGIN@4
2	1	1	3µs	3µs	URI::Escape::::CORE:qrURI::Escape::CORE:qr (opcode)
1	1	1	2µs	2µs	URI::Escape::::BEGIN@153URI::Escape::BEGIN@153
0	0	0	0s	0s	URI::Escape::::_fail_hiURI::Escape::_fail_hi
0	0	0	0s	0s	URI::Escape::::escape_charURI::Escape::escape_char
0	0	0	0s	0s	URI::Escape::::uri_escapeURI::Escape::uri_escape
0	0	0	0s	0s	URI::Escape::::uri_escape_utf8URI::Escape::uri_escape_utf8
0	0	0	0s	0s	URI::Escape::::uri_unescapeURI::Escape::uri_unescape

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package URI::Escape;
2
3	2	18µs	2	20µs	# spent 19µs (18+1) within URI::Escape::BEGIN@3 which was called: # once (18µs+1µs) by OpenAPI::Modern::BEGIN@25 at line 3 use strict; # spent 19µs making 1 call to URI::Escape::BEGIN@3 # spent 1µs making 1 call to strict::import
4	2	64µs	2	41µs	# spent 22µs (3+19) within URI::Escape::BEGIN@4 which was called: # once (3µs+19µs) by OpenAPI::Modern::BEGIN@25 at line 4 use warnings; # spent 22µs making 1 call to URI::Escape::BEGIN@4 # spent 19µs making 1 call to warnings::import
5
6					=head1 NAME
7
8					URI::Escape - Percent-encode and percent-decode unsafe characters
9
10					=head1 SYNOPSIS
11
12					use URI::Escape;
13					$safe = uri_escape("10% is enough\n");
14					$verysafe = uri_escape("foo", "\0-\377");
15					$str = uri_unescape($safe);
16
17					=head1 DESCRIPTION
18
19					This module provides functions to percent-encode and percent-decode URI strings as
20					defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping".
21					This is the terminology used by this module, which predates the formalization of the
22					terms by the RFC by several years.
23
24					A URI consists of a restricted set of characters. The restricted set
25					of characters consists of digits, letters, and a few graphic symbols
26					chosen from those common to most of the character encodings and input
27					facilities available to Internet users. They are made up of the
28					"unreserved" and "reserved" character sets as defined in RFC 3986.
29
30					unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
31					reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@"
32					"!" / "$" / "&" / "'" / "(" / ")"
33					/ "*" / "+" / "," / ";" / "="
34
35					In addition, any byte (octet) can be represented in a URI by an escape
36					sequence: a triplet consisting of the character "%" followed by two
37					hexadecimal digits. A byte can also be represented directly by a
38					character, using the US-ASCII character for that octet.
39
40					Some of the characters are I<reserved> for use as delimiters or as
41					part of certain URI components. These must be escaped if they are to
42					be treated as ordinary data. Read RFC 3986 for further details.
43
44					The functions provided (and exported by default) from this module are:
45
46					=over 4
47
48					=item uri_escape( $string )
49
50					=item uri_escape( $string, $unsafe )
51
52					Replaces each unsafe character in the $string with the corresponding
53					escape sequence and returns the result. The $string argument should
54					be a string of bytes. The uri_escape() function will croak if given a
55					characters with code above 255. Use uri_escape_utf8() if you know you
56					have such chars or/and want chars in the 128 .. 255 range treated as
57					UTF-8.
58
59					The uri_escape() function takes an optional second argument that
60					overrides the set of characters that are to be escaped. The set is
61					specified as a string that can be used in a regular expression
62					character class (between [ ]). E.g.:
63
64					"\x00-\x1f\x7f-\xff" # all control and hi-bit characters
65					"a-z" # all lower case characters
66					"^A-Za-z" # everything not a letter
67
68					The default set of characters to be escaped is all those which are
69					I<not> part of the C<unreserved> character class shown above as well
70					as the reserved characters. I.e. the default is:
71
72					"^A-Za-z0-9\-\._~"
73
74					The second argument can also be specified as a regular expression object:
75
76					qr/[^A-Za-z]/
77
78					Any strings matched by this regular expression will have all of their
79					characters escaped.
80
81					=item uri_escape_utf8( $string )
82
83					=item uri_escape_utf8( $string, $unsafe )
84
85					Works like uri_escape(), but will encode chars as UTF-8 before
86					escaping them. This makes this function able to deal with characters
87					with code above 255 in $string. Note that chars in the 128 .. 255
88					range will be escaped differently by this function compared to what
89					uri_escape() would. For chars in the 0 .. 127 range there is no
90					difference.
91
92					Equivalent to:
93
94					utf8::encode($string);
95					my $uri = uri_escape($string);
96
97					Note: JavaScript has a function called escape() that produces the
98					sequence "%uXXXX" for chars in the 256 .. 65535 range. This function
99					has really nothing to do with URI escaping but some folks got confused
100					since it "does the right thing" in the 0 .. 255 range. Because of
101					this you sometimes see "URIs" with these kind of escapes. The
102					JavaScript encodeURIComponent() function is similar to uri_escape_utf8().
103
104					=item uri_unescape($string,...)
105
106					Returns a string with each %XX sequence replaced with the actual byte
107					(octet).
108
109					This does the same as:
110
111					$string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
112
113					but does not modify the string in-place as this RE would. Using the
114					uri_unescape() function instead of the RE might make the code look
115					cleaner and is a few characters less to type.
116
117					In a simple benchmark test I did,
118					calling the function (instead of the inline RE above) if a few chars
119					were unescaped was something like 40% slower, and something like 700% slower if none were. If
120					you are going to unescape a lot of times it might be a good idea to
121					inline the RE.
122
123					If the uri_unescape() function is passed multiple strings, then each
124					one is returned unescaped.
125
126					=back
127
128					The module can also export the C<%escapes> hash, which contains the
129					mapping from all 256 bytes to the corresponding escape codes. Lookup
130					in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
131					each time.
132
133					=head1 SEE ALSO
134
135					L<URI>
136
137
138					=head1 COPYRIGHT
139
140					Copyright 1995-2004 Gisle Aas.
141
142					This program is free software; you can redistribute it and/or modify
143					it under the same terms as Perl itself.
144
145					=cut
146
147	3	47µs	3	33µs	# spent 20µs (7+13) within URI::Escape::BEGIN@147 which was called: # once (7µs+13µs) by OpenAPI::Modern::BEGIN@25 at line 147 use Exporter 5.57 'import'; # spent 20µs making 1 call to URI::Escape::BEGIN@147 # spent 7µs making 1 call to UNIVERSAL::VERSION # spent 6µs making 1 call to Exporter::import
148					our %escapes;
149	1	1µs			our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8);
150	1	0s			our @EXPORT_OK = qw(%escapes);
151	1	0s			our $VERSION = '5.17';
152
153	2	205µs	1	2µs	# spent 2µs within URI::Escape::BEGIN@153 which was called: # once (2µs+0s) by OpenAPI::Modern::BEGIN@25 at line 153 use Carp (); # spent 2µs making 1 call to URI::Escape::BEGIN@153
154
155					# Build a char->hex map
156	1	1µs			for (0..255) {
157	256	152µs			$escapes{chr($_)} = sprintf("%%%02X", $_);
158					}
159
160	1	1µs			my %subst; # compiled patterns
161
162	1	8µs	2	3µs	my %Unsafe = ( # spent 3µs making 2 calls to URI::Escape::CORE:qr, avg 2µs/call
163					RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/,
164					RFC3986 => qr/[^A-Za-z0-9\-\._~]/,
165					);
166
167					sub uri_escape {
168					my($text, $patn) = @_;
169					return undef unless defined $text;
170					my $re;
171					if (defined $patn){
172					if (ref $patn eq 'Regexp') {
173					$text =~ s{($patn)}{
174					join('', map +($escapes{$_} \|\| _fail_hi($_)), split //, "$1")
175					}ge;
176					return $text;
177					}
178					$re = $subst{$patn};
179					if (!defined $re) {
180					$re = $patn;
181					# we need to escape the [] characters, except for those used in
182					# posix classes. if they are prefixed by a backslash, allow them
183					# through unmodified.
184					$re =~ s{(\[:\w+:\])\|(\\)?([\[\]]\|\\\z)}{
185					defined $1 ? $1 : defined $2 ? "$2$3" : "\\$3"
186					}ge;
187					eval {
188					# disable the warnings here, since they will trigger later
189					# when used, and we only want them to appear once per call,
190					# but every time the same pattern is used.
191	2	246µs	2	31µs	# spent 18µs (5+13) within URI::Escape::BEGIN@191 which was called: # once (5µs+13µs) by OpenAPI::Modern::BEGIN@25 at line 191 no warnings 'regexp'; # spent 18µs making 1 call to URI::Escape::BEGIN@191 # spent 13µs making 1 call to warnings::unimport
192					$re = $subst{$patn} = qr{[$re]};
193					1;
194					} or Carp::croak("uri_escape: $@");
195					}
196					}
197					else {
198					$re = $Unsafe{RFC3986};
199					}
200					$text =~ s/($re)/$escapes{$1} \|\| _fail_hi($1)/ge;
201					$text;
202					}
203
204					sub _fail_hi {
205					my $chr = shift;
206					Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
207					}
208
209					sub uri_escape_utf8 {
210					my $text = shift;
211					return undef unless defined $text;
212					utf8::encode($text);
213					return uri_escape($text, @_);
214					}
215
216					sub uri_unescape {
217					# Note from RFC1630: "Sequences which start with a percent sign
218					# but are not followed by two hexadecimal characters are reserved
219					# for future extension"
220					my $str = shift;
221					if (@_ && wantarray) {
222					# not executed for the common case of a single argument
223					my @str = ($str, @_); # need to copy
224					for (@str) {
225					s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
226					}
227					return @str;
228					}
229					$str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
230					$str;
231					}
232
233					# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format.
234					sub escape_char {
235					# Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1).
236					# The following forces a fetch to occur beforehand.
237					my $dummy = substr($_[0], 0, 0);
238
239					if (utf8::is_utf8($_[0])) {
240					my $s = shift;
241					utf8::encode($s);
242					unshift(@_, $s);
243					}
244
245					return join '', @URI::Escape::escapes{split //, $_[0]};
246					}
247
248	1	5µs			1;

					# spent 3µs within URI::Escape::CORE:qr which was called 2 times, avg 2µs/call: # 2 times (3µs+0s) by OpenAPI::Modern::BEGIN@25 at line 162, avg 2µs/call sub URI::Escape::CORE:qr; # opcode