2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package gr.ebs.gss.server.webdav;
20 import java.util.Hashtable;
23 * MIME2Java is a convenience class which handles conversions between MIME charset names
24 * and Java encoding names.
25 * <p>The supported XML encodings are the intersection of XML-supported code sets and those
26 * supported in JDK 1.1.
27 * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
28 * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
29 * <p>Java encoding names are used on <var>encoding</var> parameters to
30 * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>.
32 * <TABLE BORDER="0" WIDTH="100%">
35 * <P ALIGN="CENTER"><B>Common Name</B>
38 * <P ALIGN="CENTER"><B>Use this name in XML files</B>
41 * <P ALIGN="CENTER"><B>Name Type</B>
44 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
48 * <TD WIDTH="33%">8 bit Unicode</TD>
50 * <P ALIGN="CENTER">UTF-8
53 * <P ALIGN="CENTER">IANA
56 * <P ALIGN="CENTER">UTF8
60 * <TD WIDTH="33%">ISO Latin 1</TD>
62 * <P ALIGN="CENTER">ISO-8859-1
65 * <P ALIGN="CENTER">MIME
68 * <P ALIGN="CENTER">ISO-8859-1
72 * <TD WIDTH="33%">ISO Latin 2</TD>
74 * <P ALIGN="CENTER">ISO-8859-2
77 * <P ALIGN="CENTER">MIME
80 * <P ALIGN="CENTER">ISO-8859-2
84 * <TD WIDTH="33%">ISO Latin 3</TD>
86 * <P ALIGN="CENTER">ISO-8859-3
89 * <P ALIGN="CENTER">MIME
92 * <P ALIGN="CENTER">ISO-8859-3
96 * <TD WIDTH="33%">ISO Latin 4</TD>
98 * <P ALIGN="CENTER">ISO-8859-4
101 * <P ALIGN="CENTER">MIME
104 * <P ALIGN="CENTER">ISO-8859-4
108 * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
110 * <P ALIGN="CENTER">ISO-8859-5
113 * <P ALIGN="CENTER">MIME
116 * <P ALIGN="CENTER">ISO-8859-5
120 * <TD WIDTH="33%">ISO Latin Arabic</TD>
122 * <P ALIGN="CENTER">ISO-8859-6
125 * <P ALIGN="CENTER">MIME
128 * <P ALIGN="CENTER">ISO-8859-6
132 * <TD WIDTH="33%">ISO Latin Greek</TD>
134 * <P ALIGN="CENTER">ISO-8859-7
137 * <P ALIGN="CENTER">MIME
140 * <P ALIGN="CENTER">ISO-8859-7
144 * <TD WIDTH="33%">ISO Latin Hebrew</TD>
146 * <P ALIGN="CENTER">ISO-8859-8
149 * <P ALIGN="CENTER">MIME
152 * <P ALIGN="CENTER">ISO-8859-8
156 * <TD WIDTH="33%">ISO Latin 5</TD>
158 * <P ALIGN="CENTER">ISO-8859-9
161 * <P ALIGN="CENTER">MIME
164 * <P ALIGN="CENTER">ISO-8859-9
168 * <TD WIDTH="33%">EBCDIC: US</TD>
170 * <P ALIGN="CENTER">ebcdic-cp-us
173 * <P ALIGN="CENTER">IANA
176 * <P ALIGN="CENTER">cp037
180 * <TD WIDTH="33%">EBCDIC: Canada</TD>
182 * <P ALIGN="CENTER">ebcdic-cp-ca
185 * <P ALIGN="CENTER">IANA
188 * <P ALIGN="CENTER">cp037
192 * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
194 * <P ALIGN="CENTER">ebcdic-cp-nl
197 * <P ALIGN="CENTER">IANA
200 * <P ALIGN="CENTER">cp037
204 * <TD WIDTH="33%">EBCDIC: Denmark</TD>
206 * <P ALIGN="CENTER">ebcdic-cp-dk
209 * <P ALIGN="CENTER">IANA
212 * <P ALIGN="CENTER">cp277
216 * <TD WIDTH="33%">EBCDIC: Norway</TD>
218 * <P ALIGN="CENTER">ebcdic-cp-no
221 * <P ALIGN="CENTER">IANA
224 * <P ALIGN="CENTER">cp277
228 * <TD WIDTH="33%">EBCDIC: Finland</TD>
230 * <P ALIGN="CENTER">ebcdic-cp-fi
233 * <P ALIGN="CENTER">IANA
236 * <P ALIGN="CENTER">cp278
240 * <TD WIDTH="33%">EBCDIC: Sweden</TD>
242 * <P ALIGN="CENTER">ebcdic-cp-se
245 * <P ALIGN="CENTER">IANA
248 * <P ALIGN="CENTER">cp278
252 * <TD WIDTH="33%">EBCDIC: Italy</TD>
254 * <P ALIGN="CENTER">ebcdic-cp-it
257 * <P ALIGN="CENTER">IANA
260 * <P ALIGN="CENTER">cp280
264 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
266 * <P ALIGN="CENTER">ebcdic-cp-es
269 * <P ALIGN="CENTER">IANA
272 * <P ALIGN="CENTER">cp284
276 * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
278 * <P ALIGN="CENTER">ebcdic-cp-gb
281 * <P ALIGN="CENTER">IANA
284 * <P ALIGN="CENTER">cp285
288 * <TD WIDTH="33%">EBCDIC: France</TD>
290 * <P ALIGN="CENTER">ebcdic-cp-fr
293 * <P ALIGN="CENTER">IANA
296 * <P ALIGN="CENTER">cp297
300 * <TD WIDTH="33%">EBCDIC: Arabic</TD>
302 * <P ALIGN="CENTER">ebcdic-cp-ar1
305 * <P ALIGN="CENTER">IANA
308 * <P ALIGN="CENTER">cp420
312 * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
314 * <P ALIGN="CENTER">ebcdic-cp-he
317 * <P ALIGN="CENTER">IANA
320 * <P ALIGN="CENTER">cp424
324 * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
326 * <P ALIGN="CENTER">ebcdic-cp-ch
329 * <P ALIGN="CENTER">IANA
332 * <P ALIGN="CENTER">cp500
336 * <TD WIDTH="33%">EBCDIC: Roece</TD>
338 * <P ALIGN="CENTER">ebcdic-cp-roece
341 * <P ALIGN="CENTER">IANA
344 * <P ALIGN="CENTER">cp870
348 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
350 * <P ALIGN="CENTER">ebcdic-cp-yu
353 * <P ALIGN="CENTER">IANA
356 * <P ALIGN="CENTER">cp870
360 * <TD WIDTH="33%">EBCDIC: Iceland</TD>
362 * <P ALIGN="CENTER">ebcdic-cp-is
365 * <P ALIGN="CENTER">IANA
368 * <P ALIGN="CENTER">cp871
372 * <TD WIDTH="33%">EBCDIC: Urdu</TD>
374 * <P ALIGN="CENTER">ebcdic-cp-ar2
377 * <P ALIGN="CENTER">IANA
380 * <P ALIGN="CENTER">cp918
384 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
386 * <P ALIGN="CENTER">gb2312
389 * <P ALIGN="CENTER">MIME
392 * <P ALIGN="CENTER">GB2312
396 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
398 * <P ALIGN="CENTER">euc-jp
401 * <P ALIGN="CENTER">MIME
404 * <P ALIGN="CENTER">eucjis
408 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
410 * <P ALIGN="CENTER">iso-2020-jp
413 * <P ALIGN="CENTER">MIME
416 * <P ALIGN="CENTER">JIS
420 * <TD WIDTH="33%">Japanese: Shift JIS</TD>
422 * <P ALIGN="CENTER">Shift_JIS
425 * <P ALIGN="CENTER">MIME
428 * <P ALIGN="CENTER">SJIS
432 * <TD WIDTH="33%">Chinese: Big5</TD>
434 * <P ALIGN="CENTER">Big5
437 * <P ALIGN="CENTER">MIME
440 * <P ALIGN="CENTER">Big5
444 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
446 * <P ALIGN="CENTER">euc-kr
449 * <P ALIGN="CENTER">MIME
452 * <P ALIGN="CENTER">iso2022kr
456 * <TD WIDTH="33%">Cyrillic</TD>
458 * <P ALIGN="CENTER">koi8-r
461 * <P ALIGN="CENTER">MIME
464 * <P ALIGN="CENTER">koi8-r
469 * @version $Revision: 467222 $ $Date: 2006-10-24 05:17:11 +0200 (mar., 24 oct. 2006) $
470 * @author TAMURA Kent <kent@trl.ibm.co.jp>
472 public class MIME2Java {
477 static private Hashtable s_enchash;
481 static private Hashtable s_revhash;
484 s_enchash = new Hashtable();
485 // <preferred MIME name>, <Java encoding name>
486 s_enchash.put("UTF-8", "UTF8");
487 s_enchash.put("US-ASCII", "8859_1"); // ?
488 s_enchash.put("ISO-8859-1", "8859_1");
489 s_enchash.put("ISO-8859-2", "8859_2");
490 s_enchash.put("ISO-8859-3", "8859_3");
491 s_enchash.put("ISO-8859-4", "8859_4");
492 s_enchash.put("ISO-8859-5", "8859_5");
493 s_enchash.put("ISO-8859-6", "8859_6");
494 s_enchash.put("ISO-8859-7", "8859_7");
495 s_enchash.put("ISO-8859-8", "8859_8");
496 s_enchash.put("ISO-8859-9", "8859_9");
497 s_enchash.put("ISO-2022-JP", "JIS");
498 s_enchash.put("SHIFT_JIS", "SJIS");
499 s_enchash.put("EUC-JP", "EUCJIS");
500 s_enchash.put("GB2312", "GB2312");
501 s_enchash.put("BIG5", "Big5");
502 s_enchash.put("EUC-KR", "KSC5601");
503 s_enchash.put("ISO-2022-KR", "ISO2022KR");
504 s_enchash.put("KOI8-R", "KOI8_R");
506 s_enchash.put("EBCDIC-CP-US", "CP037");
507 s_enchash.put("EBCDIC-CP-CA", "CP037");
508 s_enchash.put("EBCDIC-CP-NL", "CP037");
509 s_enchash.put("EBCDIC-CP-DK", "CP277");
510 s_enchash.put("EBCDIC-CP-NO", "CP277");
511 s_enchash.put("EBCDIC-CP-FI", "CP278");
512 s_enchash.put("EBCDIC-CP-SE", "CP278");
513 s_enchash.put("EBCDIC-CP-IT", "CP280");
514 s_enchash.put("EBCDIC-CP-ES", "CP284");
515 s_enchash.put("EBCDIC-CP-GB", "CP285");
516 s_enchash.put("EBCDIC-CP-FR", "CP297");
517 s_enchash.put("EBCDIC-CP-AR1", "CP420");
518 s_enchash.put("EBCDIC-CP-HE", "CP424");
519 s_enchash.put("EBCDIC-CP-CH", "CP500");
520 s_enchash.put("EBCDIC-CP-ROECE", "CP870");
521 s_enchash.put("EBCDIC-CP-YU", "CP870");
522 s_enchash.put("EBCDIC-CP-IS", "CP871");
523 s_enchash.put("EBCDIC-CP-AR2", "CP918");
525 // j:CNS11643 -> EUC-TW?
526 // ISO-2022-CN? ISO-2022-CN-EXT?
528 s_revhash = new Hashtable();
529 // <Java encoding name>, <preferred MIME name>
530 s_revhash.put("UTF8", "UTF-8");
531 //s_revhash.put("8859_1", "US-ASCII"); // ?
532 s_revhash.put("8859_1", "ISO-8859-1");
533 s_revhash.put("8859_2", "ISO-8859-2");
534 s_revhash.put("8859_3", "ISO-8859-3");
535 s_revhash.put("8859_4", "ISO-8859-4");
536 s_revhash.put("8859_5", "ISO-8859-5");
537 s_revhash.put("8859_6", "ISO-8859-6");
538 s_revhash.put("8859_7", "ISO-8859-7");
539 s_revhash.put("8859_8", "ISO-8859-8");
540 s_revhash.put("8859_9", "ISO-8859-9");
541 s_revhash.put("JIS", "ISO-2022-JP");
542 s_revhash.put("SJIS", "Shift_JIS");
543 s_revhash.put("EUCJIS", "EUC-JP");
544 s_revhash.put("GB2312", "GB2312");
545 s_revhash.put("BIG5", "Big5");
546 s_revhash.put("KSC5601", "EUC-KR");
547 s_revhash.put("ISO2022KR", "ISO-2022-KR");
548 s_revhash.put("KOI8_R", "KOI8-R");
550 s_revhash.put("CP037", "EBCDIC-CP-US");
551 s_revhash.put("CP037", "EBCDIC-CP-CA");
552 s_revhash.put("CP037", "EBCDIC-CP-NL");
553 s_revhash.put("CP277", "EBCDIC-CP-DK");
554 s_revhash.put("CP277", "EBCDIC-CP-NO");
555 s_revhash.put("CP278", "EBCDIC-CP-FI");
556 s_revhash.put("CP278", "EBCDIC-CP-SE");
557 s_revhash.put("CP280", "EBCDIC-CP-IT");
558 s_revhash.put("CP284", "EBCDIC-CP-ES");
559 s_revhash.put("CP285", "EBCDIC-CP-GB");
560 s_revhash.put("CP297", "EBCDIC-CP-FR");
561 s_revhash.put("CP420", "EBCDIC-CP-AR1");
562 s_revhash.put("CP424", "EBCDIC-CP-HE");
563 s_revhash.put("CP500", "EBCDIC-CP-CH");
564 s_revhash.put("CP870", "EBCDIC-CP-ROECE");
565 s_revhash.put("CP870", "EBCDIC-CP-YU");
566 s_revhash.put("CP871", "EBCDIC-CP-IS");
567 s_revhash.put("CP918", "EBCDIC-CP-AR2");
573 private MIME2Java() {
577 * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
578 * @param mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
579 * ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
580 * ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,
581 * EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
582 * EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
583 * EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
584 * EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
585 * EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
586 * EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
587 * @return Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
591 public static String convert(String mimeCharsetName) {
592 return (String)s_enchash.get(mimeCharsetName.toUpperCase());
596 * Convert a Java encoding name to MIME charset name.
597 * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
598 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS",
599 * "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278",
600 * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
601 * @param encoding Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
602 * 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,
603 * GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
604 * CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
606 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
609 public static String reverse(String encoding) {
610 return (String)s_revhash.get(encoding.toUpperCase());