Entity Decoder
/** * Copyright (C) 2003 Manfred Andres * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ import java.util.StringTokenizer; public class EntityDecoder { public final static String htmlEntities[] = { "€", "", "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", "", "Ž", "", "", "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ", "", "ž", "Ÿ", " ", "¡", "¢", "£", "¤", "¥", "¦", "§", "¨", "©", "ª", "«", "¬", "­", "®", "¯", "°", "±", "²", "³", "´", "µ", "¶", "·", "¸", "¹", "º", "»", "¼", "½", "¾", "¿", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "Þ", "ß", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "÷", "ø", "ù", "ú", "û", "ü", "ý", "þ", "ÿ"}; public final static String entities[] = { "F6", "E4", "FC", "D6", "C4", "DC", "DF", "3F", "5C", "2C", "3A", "3B", "23", "2B", "7E", "21", "22", "A7", "24", "25", "26", "28", "29", "3D", "3C", "3E", "7B", "5B", "5D", "7D", "2F", "E2", "EA", "EE", "F4", "FB", "C2", "CA", "CE", "D4", "DB", "E1", "E9", "ED", "F3", "FA", "C1", "C9", "CD", "D3", "DA", "E0", "E8", "EC", "F2", "F9", "C1", "C9", "CD", "D3", "DA", "B0", "B3", "B2", "80", "7C", "5E", "60", "B4", "27", "20", "40", "98", "2A"}; public final static String charsHtml[] = { "", "", "", "", "", "", "", "?", "\\", ",", ":", ";", "#", "+", "˜", "!", "\"", "§", "$", "%", "&", "(", ")", "=", "<", ">", "{", "[", "]", "}", "/", "â", "ê", "î", "ô", "û", "Â", "Ê", "Î", "Ô", "Û", "á", "é", "í", "ó", "ú", "Á", "É", "Í", "Ó", "Ú", "à", "è", "ì", "ò", "Ù", "À", "È", "Ì", "Ò", "Ù", "°", "³", "²", "€", "|", "ˆ", "`", "´", "'", " ", "@", "~", "*"}; public final static String chars[] = { "", "", "", "", "", "", "", "?", "\\", ",", ":", ";", "#", "+", "~", "!", "\"", "", "$", "%", "&", "(", ")", "=", "<", ">", "{", "[", "]", "}", "/", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "|", "^", "`", "", "'", " ", "@", "~", "*"}; public static String entityToChar (String raw) { return (entityTo (raw, chars)); } public static String entityToHtml (String raw) { return (charToHtml(entityTo (raw, chars))); } public static String htmlToChar (String raw) { return convert (raw, charsHtml, chars); } public static String charToHtml (String raw) { if (raw == null) return null; char[] chars = raw.toCharArray(); StringBuffer encoded = new StringBuffer(); for (int i = 0; i < chars.length; i++) { char c = chars[i]; if (c == '<') encoded.append("<"); else if (c == '>') encoded.append(">"); else if (c < 128) encoded.append(c); else if (c < 256) encoded.append(htmlEntities[c-128]); else { encoded.append("&#"); encoded.append((int) c); encoded.append(";"); } } return encoded.toString(); } public static String entityTo (String raw, String[] tc) { StringBuffer sb = new StringBuffer (); boolean entity = false; raw = raw.replace ('+', ' '); String tokens = tc == charsHtml ? "%<>" : "%"; for (StringTokenizer st = new StringTokenizer (raw, tokens, true); st.hasMoreTokens (); ) { String token = st.nextToken (); if (entity) { boolean replaced = false; for (int i = 0; i < entities.length; i++) { if (token.startsWith (entities[i])) { sb.append (tc[i]); sb.append (token.substring (2)); replaced = true; break; } } if (!replaced) sb.append (token); entity = false; } else if (token.equals ("%")) { entity = true; continue; } else if (token.equals ("<")) { sb.append ("<"); } else if (token.equals (">")) { sb.append (">"); } else { sb.append (token); } } return (sb.toString ()); } public static String convert (String raw, String[] from, String[] to) { String result = raw; for (int i = 0 ; i < from.length; i++) { int idx = result.indexOf(from[i]); if (idx < 0) continue; StringBuffer sb = new StringBuffer(); while (idx > -1) { sb.append (result.substring(0,idx)); sb.append (to[i]); result = result.substring(idx + from[i].length()); idx = result.indexOf(from[i]); } sb.append (result); result = sb.toString(); } return result; } }