Returns true if the argument, a UCS-4 character code, is valid in XML documents.
/* * $Id: XmlChars.java,v 1.1 2004/08/19 05:30:22 aslom Exp $ * * The Apache Software License, Version 1.1 * * * Copyright (c) 2000 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Crimson" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, Sun Microsystems, Inc., * http://www.sun.com. For more information on the Apache Software * Foundation, please see <http://www.apache.org/>. */ /** * Methods in this class are used to determine whether characters may * appear in certain roles in XML documents. Such methods are used * both to parse and to create such documents. * * @version 1.8 * @author David Brownell */ public class XmlChars { // can't construct instances private XmlChars () { } /** * Returns true if the argument, a UCS-4 character code, is valid in * XML documents. Unicode characters fit into the low sixteen * bits of a UCS-4 character, and pairs of Unicode <em>surrogate * characters</em> can be combined to encode UCS-4 characters in * documents containing only Unicode. (The <code>char</code> datatype * in the Java Programming Language represents Unicode characters, * including unpaired surrogates.) * * <P> In XML, UCS-4 characters can also be encoded by the use of * <em>character references</em> such as <b>&#x12345678;</b>, which * happens to refer to a character that is disallowed in XML documents. * UCS-4 characters allowed in XML documents can be expressed with * one or two Unicode characters. * * @param ucs4char The 32-bit UCS-4 character being tested. */ static public boolean isChar (int ucs4char) { // [2] Char ::= #x0009 | #x000A | #x000D // | [#x0020-#xD7FF] // ... surrogates excluded! // | [#xE000-#xFFFD] // | [#x10000-#x10ffff] return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF) || ucs4char == 0x000A || ucs4char == 0x0009 || ucs4char == 0x000D || (ucs4char >= 0xE000 && ucs4char <= 0xFFFD) || (ucs4char >= 0x10000 && ucs4char <= 0x10ffff)); } }