Count the number of bytes included in the given char[].
import java.io.File; import java.io.FileFilter; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ /** * Various string manipulation methods that are more efficient then chaining * string operations: all is done in the same buffer without creating a bunch of * string objects. * * @author <a href="mailto:dev@labs.apache.org">Dungeon Project</a> */ public class Main { private static final int CHAR_ONE_BYTE_MASK = 0xFFFFFF80; private static final int CHAR_TWO_BYTES_MASK = 0xFFFFF800; private static final int CHAR_THREE_BYTES_MASK = 0xFFFF0000; private static final int CHAR_FOUR_BYTES_MASK = 0xFFE00000; private static final int CHAR_FIVE_BYTES_MASK = 0xFC000000; private static final int CHAR_SIX_BYTES_MASK = 0x80000000; /** * Count the number of bytes included in the given char[]. * * @param chars * The char array to decode * @return The number of bytes in the char array */ public static final int countBytes( char[] chars ) { if ( chars == null ) { return 0; } int nbBytes = 0; int currentPos = 0; while ( currentPos < chars.length ) { int nbb = countNbBytesPerChar( chars[currentPos] ); // If the number of bytes necessary to encode a character is // above 3, we will need two UTF-16 chars currentPos += ( nbb < 4 ? 1 : 2 ); nbBytes += nbb; } return nbBytes; } /** * Return the number of bytes that hold an Unicode char. * * @param car * The character to be decoded * @return The number of bytes to hold the char. TODO : Should stop after * the third byte, as a char is only 2 bytes long. */ public static final int countNbBytesPerChar( char car ) { if ( ( car & CHAR_ONE_BYTE_MASK ) == 0 ) { return 1; } else if ( ( car & CHAR_TWO_BYTES_MASK ) == 0 ) { return 2; } else if ( ( car & CHAR_THREE_BYTES_MASK ) == 0 ) { return 3; } else if ( ( car & CHAR_FOUR_BYTES_MASK ) == 0 ) { return 4; } else if ( ( car & CHAR_FIVE_BYTES_MASK ) == 0 ) { return 5; } else if ( ( car & CHAR_SIX_BYTES_MASK ) == 0 ) { return 6; } else { return -1; } } }