2 // System.Web.HttpUtility
5 // Patrik Torstensson (Patrik.Torstensson@labs2.com)
6 // Wictor Wilén (decode/encode functions) (wictor@ibizkit.se)
7 // Tim Coleman (tim@timcoleman.com)
8 // Gonzalo Paniagua Javier (gonzalo@ximian.com)
10 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System.Collections;
34 using System.Collections.Specialized;
35 using System.Globalization;
39 namespace System.Compat.Web
41 public sealed class HttpUtility
45 private static Hashtable entities;
46 private static readonly object lock_ = new object();
50 private static Hashtable Entities
68 private static void InitEntities()
70 // Build the hash table of HTML entity references. This list comes
71 // from the HTML 4.01 W3C recommendation.
72 entities = new Hashtable
102 {"frac14", '\u00BC'},
103 {"frac12", '\u00BD'},
104 {"frac34", '\u00BE'},
105 {"iquest", '\u00BF'},
106 {"Agrave", '\u00C0'},
107 {"Aacute", '\u00C1'},
109 {"Atilde", '\u00C3'},
113 {"Ccedil", '\u00C7'},
114 {"Egrave", '\u00C8'},
115 {"Eacute", '\u00C9'},
118 {"Igrave", '\u00CC'},
119 {"Iacute", '\u00CD'},
123 {"Ntilde", '\u00D1'},
124 {"Ograve", '\u00D2'},
125 {"Oacute", '\u00D3'},
127 {"Otilde", '\u00D5'},
130 {"Oslash", '\u00D8'},
131 {"Ugrave", '\u00D9'},
132 {"Uacute", '\u00DA'},
135 {"Yacute", '\u00DD'},
138 {"agrave", '\u00E0'},
139 {"aacute", '\u00E1'},
141 {"atilde", '\u00E3'},
145 {"ccedil", '\u00E7'},
146 {"egrave", '\u00E8'},
147 {"eacute", '\u00E9'},
150 {"igrave", '\u00EC'},
151 {"iacute", '\u00ED'},
155 {"ntilde", '\u00F1'},
156 {"ograve", '\u00F2'},
157 {"oacute", '\u00F3'},
159 {"otilde", '\u00F5'},
161 {"divide", '\u00F7'},
162 {"oslash", '\u00F8'},
163 {"ugrave", '\u00F9'},
164 {"uacute", '\u00FA'},
167 {"yacute", '\u00FD'},
175 {"Epsilon", '\u0395'},
181 {"Lambda", '\u039B'},
185 {"Omicron", '\u039F'},
190 {"Upsilon", '\u03A5'},
199 {"epsilon", '\u03B5'},
205 {"lambda", '\u03BB'},
209 {"omicron", '\u03BF'},
212 {"sigmaf", '\u03C2'},
215 {"upsilon", '\u03C5'},
220 {"thetasym", '\u03D1'},
224 {"hellip", '\u2026'},
229 {"weierp", '\u2118'},
233 {"alefsym", '\u2135'},
245 {"forall", '\u2200'},
256 {"lowast", '\u2217'},
266 {"there4", '\u2234'},
280 {"otimes", '\u2297'},
285 {"lfloor", '\u230A'},
286 {"rfloor", '\u230B'},
290 {"spades", '\u2660'},
292 {"hearts", '\u2665'},
300 {"Scaron", '\u0160'},
301 {"scaron", '\u0161'},
307 {"thinsp", '\u2009'},
320 {"dagger", '\u2020'},
321 {"Dagger", '\u2021'},
322 {"permil", '\u2030'},
323 {"lsaquo", '\u2039'},
324 {"rsaquo", '\u203A'},
329 #endregion // Constructors
333 private static readonly char[] hexChars = "0123456789abcdef".ToCharArray();
335 public static void HtmlAttributeEncode(string s, TextWriter output)
337 output.Write(HtmlAttributeEncode(s));
340 public static string HtmlAttributeEncode(string s)
347 var needEncode = false;
348 for (var i = 0; i < s.Length; i++)
350 if (s[i] == '&' || s[i] == '"' || s[i] == '<')
362 var output = new StringBuilder();
364 for (var i = 0; i < len; i++)
369 output.Append("&");
372 output.Append(""");
375 output.Append("<");
383 return output.ToString();
386 public static string UrlDecode(string str)
388 return UrlDecode(str, Encoding.UTF8);
391 private static char[] GetChars(MemoryStream b, Encoding e)
393 return e.GetChars(b.GetBuffer(), 0, (int) b.Length);
396 public static string UrlDecode(string s, Encoding e)
403 if (s.IndexOf('%') == -1 && s.IndexOf('+') == -1)
413 var output = new StringBuilder();
415 var bytes = new MemoryStream();
417 for (var i = 0; i < len; i++)
419 if (s[i] == '%' && i + 2 < len && s[i + 1] != '%')
422 if (s[i + 1] == 'u' && i + 5 < len)
424 if (bytes.Length > 0)
426 output.Append(GetChars(bytes, e));
430 xchar = GetChar(s, i + 2, 4);
433 output.Append((char) xchar);
441 else if ((xchar = GetChar(s, i + 1, 2)) != -1)
443 bytes.WriteByte((byte) xchar);
453 if (bytes.Length > 0)
455 output.Append(GetChars(bytes, e));
469 if (bytes.Length > 0)
471 output.Append(GetChars(bytes, e));
474 return output.ToString();
477 public static string UrlDecode(byte[] bytes, Encoding e)
479 return bytes == null ? null : UrlDecode(bytes, 0, bytes.Length, e);
482 private static int GetInt(byte b)
485 if (c >= '0' && c <= '9')
490 if (c >= 'a' && c <= 'f')
495 if (c >= 'A' && c <= 'F')
503 private static int GetChar(byte[] bytes, int offset, int length)
506 var end = length + offset;
507 for (var i = offset; i < end; i++)
509 var current = GetInt(bytes[i]);
514 value = (value << 4) + current;
520 private static int GetChar(string str, int offset, int length)
523 var end = length + offset;
524 for (var i = offset; i < end; i++)
532 var current = GetInt((byte) c);
537 val = (val << 4) + current;
543 public static string UrlDecode(byte[] bytes, int offset, int count, Encoding e)
556 throw new ArgumentNullException("bytes");
559 if (offset < 0 || offset > bytes.Length)
561 throw new ArgumentOutOfRangeException("offset");
564 if (count < 0 || offset + count > bytes.Length)
566 throw new ArgumentOutOfRangeException("count");
569 var output = new StringBuilder();
570 var acc = new MemoryStream();
572 var end = count + offset;
573 for (var i = offset; i < end; i++)
575 if (bytes[i] == '%' && i + 2 < count && bytes[i + 1] != '%')
578 if (bytes[i + 1] == (byte) 'u' && i + 5 < end)
582 output.Append(GetChars(acc, e));
585 xchar = GetChar(bytes, i + 2, 4);
588 output.Append((char) xchar);
593 else if ((xchar = GetChar(bytes, i + 1, 2)) != -1)
595 acc.WriteByte((byte) xchar);
603 output.Append(GetChars(acc, e));
613 output.Append((char) bytes[i]);
619 output.Append(GetChars(acc, e));
622 return output.ToString();
625 public static byte[] UrlDecodeToBytes(byte[] bytes)
627 return bytes == null ? null : UrlDecodeToBytes(bytes, 0, bytes.Length);
630 public static byte[] UrlDecodeToBytes(string str)
632 return UrlDecodeToBytes(str, Encoding.UTF8);
635 public static byte[] UrlDecodeToBytes(string str, Encoding e)
644 throw new ArgumentNullException("e");
647 return UrlDecodeToBytes(e.GetBytes(str));
650 public static byte[] UrlDecodeToBytes(byte[] bytes, int offset, int count)
661 var len = bytes.Length;
662 if (offset < 0 || offset >= len)
664 throw new ArgumentOutOfRangeException("offset");
667 if (count < 0 || offset > len - count)
669 throw new ArgumentOutOfRangeException("count");
672 var result = new MemoryStream();
673 var end = offset + count;
674 for (var i = offset; i < end; i++)
676 var c = (char) bytes[i];
681 else if (c == '%' && i < end - 2)
683 var xchar = GetChar(bytes, i + 1, 2);
690 result.WriteByte((byte) c);
693 return result.ToArray();
696 public static string UrlEncode(string str)
698 return UrlEncode(str, Encoding.UTF8);
701 public static string UrlEncode(string s, Encoding Enc)
713 var needEncode = false;
715 for (var i = 0; i < len; i++)
718 if ((c < '0') || (c < 'A' && c > '9') || (c > 'Z' && c < 'a') || (c > 'z'))
735 // avoided GetByteCount call
736 var bytes = new byte[Enc.GetMaxByteCount(s.Length)];
737 var realLen = Enc.GetBytes(s, 0, s.Length, bytes, 0);
738 return Encoding.ASCII.GetString(UrlEncodeToBytes(bytes, 0, realLen), 0, realLen);
741 public static string UrlEncode(byte[] bytes)
748 if (bytes.Length == 0)
753 return Encoding.ASCII.GetString(UrlEncodeToBytes(bytes, 0, bytes.Length), 0, bytes.Length);
756 public static string UrlEncode(byte[] bytes, int offset, int count)
763 if (bytes.Length == 0)
768 return Encoding.ASCII.GetString(UrlEncodeToBytes(bytes, offset, count), offset, count);
771 public static byte[] UrlEncodeToBytes(string str)
773 return UrlEncodeToBytes(str, Encoding.UTF8);
776 public static byte[] UrlEncodeToBytes(string str, Encoding e)
788 var bytes = e.GetBytes(str);
789 return UrlEncodeToBytes(bytes, 0, bytes.Length);
792 public static byte[] UrlEncodeToBytes(byte[] bytes)
799 if (bytes.Length == 0)
804 return UrlEncodeToBytes(bytes, 0, bytes.Length);
807 private static bool NotEncoded(char c)
809 return (c == '!' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '-' || c == '.' || c == '_');
812 private static void UrlEncodeChar(char c, Stream result, bool isUnicode)
816 //FIXME: what happens when there is an internal error?
818 // throw new ArgumentOutOfRangeException ("c", c, "c must be less than 256");
821 result.WriteByte((byte) '%');
822 result.WriteByte((byte) 'u');
824 result.WriteByte((byte) hexChars[idx]);
825 idx = (i >> 8) & 0x0F;
826 result.WriteByte((byte) hexChars[idx]);
827 idx = (i >> 4) & 0x0F;
828 result.WriteByte((byte) hexChars[idx]);
830 result.WriteByte((byte) hexChars[idx]);
834 if (c > ' ' && NotEncoded(c))
836 result.WriteByte((byte) c);
841 result.WriteByte((byte) '+');
845 (c < 'A' && c > '9') ||
846 (c > 'Z' && c < 'a') ||
849 if (isUnicode && c > 127)
851 result.WriteByte((byte) '%');
852 result.WriteByte((byte) 'u');
853 result.WriteByte((byte) '0');
854 result.WriteByte((byte) '0');
858 result.WriteByte((byte) '%');
862 result.WriteByte((byte) hexChars[idx]);
864 result.WriteByte((byte) hexChars[idx]);
868 result.WriteByte((byte) c);
872 public static byte[] UrlEncodeToBytes(byte[] bytes, int offset, int count)
879 var len = bytes.Length;
885 if (offset < 0 || offset >= len)
887 throw new ArgumentOutOfRangeException("offset");
890 if (count < 0 || count > len - offset)
892 throw new ArgumentOutOfRangeException("count");
895 var result = new MemoryStream(count);
896 var end = offset + count;
897 for (var i = offset; i < end; i++)
899 UrlEncodeChar((char) bytes[i], result, false);
902 return result.ToArray();
905 public static string UrlEncodeUnicode(string str)
912 var bytes = UrlEncodeUnicodeToBytes(str);
913 return Encoding.ASCII.GetString(bytes, 0, bytes.Length);
916 public static byte[] UrlEncodeUnicodeToBytes(string str)
928 var result = new MemoryStream(str.Length);
929 foreach (var c in str)
931 UrlEncodeChar(c, result, true);
933 return result.ToArray();
937 /// Decodes an HTML-encoded string and returns the decoded string.
939 /// <param name="s">The HTML string to decode. </param>
940 /// <returns>The decoded text.</returns>
941 public static string HtmlDecode(string s)
945 throw new ArgumentNullException("s");
948 if (s.IndexOf('&') == -1)
953 var entity = new StringBuilder();
954 var output = new StringBuilder();
957 // 1 -> right after '&'
958 // 2 -> between '&' and ';' but no '#'
959 // 3 -> '#' found after '&' and getting numbers
962 var have_trailing_digits = false;
964 for (var i = 0; i < len; i++)
984 if (have_trailing_digits)
986 entity.Append(number.ToString(CultureInfo.InvariantCulture));
987 have_trailing_digits = false;
990 output.Append(entity.ToString());
1001 output.Append(entity.ToString());
1008 state = c != '#' ? 2 : 3;
1012 else if (state == 2)
1017 var key = entity.ToString();
1018 if (key.Length > 1 && Entities.ContainsKey(key.Substring(1, key.Length - 2)))
1020 key = Entities[key.Substring(1, key.Length - 2)].ToString();
1028 else if (state == 3)
1034 output.Append("&#");
1035 output.Append(number.ToString(CultureInfo.InvariantCulture));
1040 output.Append((char) number);
1044 have_trailing_digits = false;
1046 else if (Char.IsDigit(c))
1048 number = number*10 + (c - '0');
1049 have_trailing_digits = true;
1054 if (have_trailing_digits)
1056 entity.Append(number.ToString(CultureInfo.InvariantCulture));
1057 have_trailing_digits = false;
1064 if (entity.Length > 0)
1066 output.Append(entity.ToString());
1068 else if (have_trailing_digits)
1070 output.Append(number.ToString(CultureInfo.InvariantCulture));
1072 return output.ToString();
1076 /// Decodes an HTML-encoded string and sends the resulting output to a TextWriter output stream.
1078 /// <param name="s">The HTML string to decode</param>
1079 /// <param name="output">The TextWriter output stream containing the decoded string. </param>
1080 public static void HtmlDecode(string s, TextWriter output)
1084 output.Write(HtmlDecode(s));
1089 /// HTML-encodes a string and returns the encoded string.
1091 /// <param name="s">The text string to encode. </param>
1092 /// <returns>The HTML-encoded text.</returns>
1093 public static string HtmlEncode(string s)
1100 var needEncode = false;
1101 for (var i = 0; i < s.Length; i++)
1104 if (c == '&' || c == '"' || c == '<' || c == '>' || c > 159)
1116 var output = new StringBuilder();
1119 for (var i = 0; i < len; i++)
1124 output.Append("&");
1127 output.Append(">");
1130 output.Append("<");
1133 output.Append(""");
1136 // MS starts encoding with &# from 160 and stops at 255.
1137 // We don't do that. One reason is the 65308/65310 unicode
1138 // characters that look like '<' and '>'.
1140 if (s [i] > 159 && s [i] < 256) {
1145 output.Append("&#");
1146 output.Append(((int) s[i]).ToString(CultureInfo.InvariantCulture));
1151 output.Append(s[i]);
1156 return output.ToString();
1160 /// HTML-encodes a string and sends the resulting output to a TextWriter output stream.
1162 /// <param name="s">The string to encode. </param>
1163 /// <param name="output">The TextWriter output stream containing the encoded string. </param>
1164 public static void HtmlEncode(string s, TextWriter output)
1168 output.Write(HtmlEncode(s));
1172 public static string UrlPathEncode(string s)
1174 if (string.IsNullOrEmpty(s))
1177 var result = new MemoryStream();
1178 var length = s.Length;
1179 for (var i = 0; i < length; i++)
1181 UrlPathEncodeChar(s[i], result);
1184 var bytes = result.ToArray();
1185 return Encoding.ASCII.GetString(bytes, 0, bytes.Length);
1188 private static void UrlPathEncodeChar(char c, Stream result)
1190 if (c < 33 || c > 126)
1192 var bIn = Encoding.UTF8.GetBytes(c.ToString());
1193 for (var i = 0; i < bIn.Length; i++)
1195 result.WriteByte((byte) '%');
1196 var idx = bIn[i] >> 4;
1197 result.WriteByte((byte) hexChars[idx]);
1198 idx = bIn[i] & 0x0F;
1199 result.WriteByte((byte) hexChars[idx]);
1204 result.WriteByte((byte) '%');
1205 result.WriteByte((byte) '2');
1206 result.WriteByte((byte) '0');
1209 result.WriteByte((byte) c);
1212 public static NameValueCollection ParseQueryString(string query)
1214 return ParseQueryString(query, Encoding.UTF8);
1217 public static NameValueCollection ParseQueryString(string query, Encoding encoding)
1220 throw new ArgumentNullException("query");
1221 if (encoding == null)
1222 throw new ArgumentNullException("encoding");
1223 if (query.Length == 0 || (query.Length == 1 && query[0] == '?'))
1224 return new NameValueCollection();
1225 if (query[0] == '?')
1226 query = query.Substring(1);
1228 var result = new NameValueCollection();
1229 ParseQueryString(query, encoding, result);
1233 internal static void ParseQueryString(string query, Encoding encoding, NameValueCollection result)
1235 if (query.Length == 0)
1240 var decoded = HtmlDecode(query);
1241 var decodedLength = decoded.Length;
1244 while (namePos <= decodedLength)
1246 int valuePos = -1, valueEnd = -1;
1247 for (var q = namePos; q < decodedLength; q++)
1249 if (valuePos == -1 && decoded[q] == '=')
1253 else if (decoded[q] == '&')
1263 if (decoded[namePos] == '?')
1277 name = UrlDecode(decoded.Substring(namePos, valuePos - namePos - 1), encoding);
1282 valueEnd = decoded.Length;
1286 namePos = valueEnd + 1;
1288 var value = UrlDecode(decoded.Substring(valuePos, valueEnd - valuePos), encoding);
1290 result.Add(name, value);
1298 #endregion // Methods