Useful for string set lookups and command completion stuff

     

//package com.ryanm.util.text;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

/**
 * Useful for string set lookups and command completion stuff
 * 
 * @author ryanm
 */
public class RadixTree
{
  private Node root = new Node( "" );

  private final boolean caseSensitive;

  /**
   * @param caseSensitive
   *           <code>true</code> if case matters. Note that a
   *           case-insensitive {@link RadixTree} will convert all
   *           strings passed to it for insertion or query to lower
   *           case.
   */
  public RadixTree( boolean caseSensitive )
  {
    this.caseSensitive = caseSensitive;
    root.isString = false;
  }

  /**
   * Adds string to the set
   * 
   * @param string
   */
  public void add( CharSequence string )
  {
    if( !caseSensitive )
    {
      string = string.toString().toLowerCase();
    }

    root.addString( string );
  }

  /**
   * Removes a string from the set
   * 
   * @param string
   */
  public void remove( CharSequence string )
  {
    if( !caseSensitive )
    {
      string = string.toString().toLowerCase();
    }

    root.removeString( string );
  }

  /**
   * Tests if the string is contained in the set
   * 
   * @param string
   * @return <code>true</code> if the entire string is contained in
   *         the tree
   */
  public boolean contains( CharSequence string )
  {
    if( !caseSensitive )
    {
      string = string.toString().toLowerCase();
    }

    return findPredecessor( string ).length() == string.length();
  }

  /**
   * Finds the substring of the string that is in the set
   * 
   * @param string
   * @return The substring that belongs
   */
  public String findPredecessor( CharSequence string )
  {
    if( !caseSensitive )
    {
      string = string.toString().toLowerCase();
    }

    StringBuilder buff = new StringBuilder();
    root.findPredecessor( string, buff );

    return buff.toString();
  }

  /**
   * Finds possible completions that fit in the set
   * 
   * @param string
   * @param depth
   *           How deeply to search the tree, the maximum number of
   *           decisions that need to be made to type any one
   *           completion
   * @return A list of possible completions
   */
  public List<String> findSuccessors( CharSequence string, int depth )
  {
    if( !caseSensitive )
    {
      string = string.toString().toLowerCase();
    }

    List<String> completions = new LinkedList<String>();

    root.findSuccessors( string, depth, completions );

    return completions;
  }

  @Override
  public String toString()
  {
    StringBuilder buff = new StringBuilder();

    root.buildString( buff, -1 );

    return buff.toString();
  }

  private class Node implements Comparable<Node>
  {
    private CharSequence value;

    private Node[] children = new Node[ 0 ];

    /**
     * Indicates that the string ending at this node is a string in
     * the set
     */
    private boolean isString = true;

    private Node( CharSequence string )
    {
      value = string;
    }

    private void findSuccessors( CharSequence string, int depth, List<String> completions )
    {
      int d = findDivergenceIndex( string );

      if( d < value.length() || d == string.length() )
      {
        StringBuilder prefix = new StringBuilder( value.subSequence( d, value.length() ) );

        if( isString )
        {
          completions.add( prefix.toString() );
        }

        if( depth > 0 )
        {
          for( int i = 0; i < children.length; i++ )
          {
            children[ i ].getCompletions( prefix, depth - 1, completions );
          }
        }
      }
      else
      {
        Node c = findChild( string.charAt( d ) );

        if( c != null )
        {
          c.findSuccessors( string.subSequence( d, string.length() ), depth, completions );
        }
      }
    }

    private void getCompletions( StringBuilder prefix, int depth, List<String> completions )
    {
      int pl = prefix.length();
      prefix.append( value );

      if( isString )
      {
        completions.add( prefix.toString() );
      }

      if( depth > 0 )
      {
        for( int i = 0; i < children.length; i++ )
        {
          children[ i ].getCompletions( prefix, depth - 1, completions );
        }
      }

      prefix.delete( pl, prefix.length() );
    }

    private void addString( CharSequence string )
    {
      int d = findDivergenceIndex( string );

      if( d < value.length() )
      {
        // need to split this node
        Node child = new Node( value.subSequence( d, value.length() ) );
        child.children = children;
        child.isString = isString;
        value = value.subSequence( 0, d );
        children = new Node[] { child };
        isString = false;
      }

      if( d == string.length() && d > 0 )
      {
        isString = true;
      }
      else
      {
        Node c = findChild( string.charAt( d ) );

        if( c != null )
        {
          c.addString( string.subSequence( d, string.length() ) );
        }
        else
        {
          insertNode( new Node( string.subSequence( d, string.length() ) ) );
        }
      }
    }

    private void removeString( CharSequence string )
    {
      int d = findDivergenceIndex( string );

      if( d == value.length() && d == string.length() )
      {
        isString = false;

        if( children.length == 1 )
        { // unify nodes
          StringBuilder buff = new StringBuilder( value );
          buff.append( children[ 0 ].value );
          value = buff;
          isString = children[ 0 ].isString;
          children = children[ 0 ].children;
        }
      }
      else
      {
        if( d == value.length() )
        {
          // check children
          Node c = findChild( string.charAt( d ) );
          if( c != null )
          {
            c.removeString( string.subSequence( d, string.length() ) );
          }
        }
      }
    }

    private void findPredecessor( CharSequence string, StringBuilder buff )
    {
      int d = findDivergenceIndex( string );

      if( d == value.length() && d <= string.length() )
      { // this entire node was in the tree and there still some
        // to go
        buff.append( value.subSequence( 0, d ) );

        // check children
        if( d < string.length() )
        {
          CharSequence c = string.subSequence( d, string.length() );

          Node child = findChild( c.charAt( 0 ) );
          child.findPredecessor( c, buff );
        }
      }
    }

    private Node findChild( char c )
    {
      for( int i = 0; i < children.length; i++ )
      {
        if( c == children[ i ].value.charAt( 0 ) )
        {
          return children[ i ];
        }
      }

      return null;
    }

    private int findDivergenceIndex( CharSequence string )
    {
      int d = 0;
      while( d < value.length() && d < string.length() && value.charAt( d ) == string.charAt( d ) )
      {
        d++;
      }
      return d;
    }

    private void insertNode( Node child )
    {
      int i = Arrays.binarySearch( children, child );
      assert i < 0;

      i += 1;
      i = -i;

      Node[] nc = new Node[ children.length + 1 ];

      System.arraycopy( children, 0, nc, 0, i );

      nc[ i ] = child;

      if( i < nc.length )
      {
        System.arraycopy( children, i, nc, i + 1, children.length - i );
      }

      children = nc;
    }

    @Override
    public int compareTo( Node o )
    {
      return TextUtils.compareTo( value, o.value );
    }

    private void buildString( StringBuilder buff, int indent )
    {
      for( int i = 0; i < indent; i++ )
      {
        buff.append( " " );
      }

      if( isString )
      {
        buff.append( "\"" );
      }
      buff.append( value );
      if( isString )
      {
        buff.append( "\"" );
      }

      indent++;

      for( int i = 0; i < children.length; i++ )
      {
        buff.append( "\n" );
        children[ i ].buildString( buff, indent );
      }
    }
  }
}

/**
 * Utility methods for dealing with text
 * 
 * @author ryanm
 */
class TextUtils
{
  /**
   * Tests if s starts with t, ignoring the case of the characters
   * 
   * @param s
   * @param t
   * @return <code>true</code> if s.toLowerCase().equals(
   *         t.toLowerCase() ), but more efficiently
   */
  public static boolean startsWithIgnoreCase( CharSequence s, CharSequence t )
  {
    if( s.length() < t.length() )
    {
      return false;
    }

    for( int i = 0; i < t.length(); i++ )
    {
      char slc = Character.toLowerCase( s.charAt( i ) );
      char tlc = Character.toLowerCase( t.charAt( i ) );
      if( slc != tlc )
      {
        return false;
      }
    }
    return true;
  }

  /**
   * See {@link String#compareToIgnoreCase(String)}
   * 
   * @param s
   * @param t
   * @return See {@link String#compareToIgnoreCase(String)}
   */
  public static int compareToIgnoreCase( CharSequence s, CharSequence t )
  {
    int i = 0;

    while( i < s.length() && i < t.length() )
    {
      char a = Character.toLowerCase( s.charAt( i ) );
      char b = Character.toLowerCase( t.charAt( i ) );

      int diff = a - b;

      if( diff != 0 )
      {
        return diff;
      }

      i++;
    }

    return s.length() - t.length();
  }

  /**
   * See {@link String#compareTo(String)}
   * 
   * @param s
   * @param t
   * @return See {@link String#compareTo(String)}
   */
  public static int compareTo( CharSequence s, CharSequence t )
  {
    int i = 0;

    while( i < s.length() && i < t.length() )
    {
      char a = s.charAt( i );
      char b = t.charAt( i );

      int diff = a - b;

      if( diff != 0 )
      {
        return diff;
      }

      i++;
    }

    return s.length() - t.length();
  }

  /**
   * Splits a string
   * 
   * @param composite
   *           The composite string
   * @param leftBracket
   *           the opening parenthesis character
   * @param rightBracket
   *           the closing parenthesis character
   * @param separator
   *           The character that separates tokens. Separators that
   *           lie between at least one pair of parenthesis are
   *           ignored
   * @return An array of individual tokens
   */
  public static String[] split( String composite, char leftBracket, char rightBracket,
      char separator )
  {
    List<String> c = new ArrayList<String>();

    int start = 0;
    int i;
    int lbcount = 0;

    for( i = 0; i < composite.length(); i++ )
    {
      if( composite.charAt( i ) == leftBracket )
      {
        lbcount++;
      }
      else if( composite.charAt( i ) == rightBracket )
      {
        lbcount--;
      }
      else if( composite.charAt( i ) == separator && lbcount == 0 )
      {
        c.add( composite.substring( start, i ).trim() );
        start = i + 1;
      }
    }

    c.add( composite.substring( start, i ).trim() );

    return c.toArray( new String[ c.size() ] );
  }

  /**
   * Wraps the input string in {@code <html></html>} and breaks it up
   * into lines with {@code <br>} elements. Useful for making
   * multi-line tootips and the like.
   * 
   * @param s
   *           The input String
   * @param lineLength
   *           The desired length of the output lines.
   * @return The HTMLised string
   */
  public static String HTMLiseString( String s, int lineLength )
  {
    if( s != null )
    {
      StringBuilder buff = new StringBuilder( s );

      int lineStart = 0;

      while( lineStart + lineLength < s.length() )
      {
        // find the first whitespace after the linelength
        int firstSpaceIndex = buff.indexOf( " ", lineStart + lineLength );
        // replace it with a <br>
        if( firstSpaceIndex != -1 )
        {
          buff.deleteCharAt( firstSpaceIndex );
          buff.insert( firstSpaceIndex, "<br>" );
          lineStart = firstSpaceIndex + 4;
        }
        else
        {
          lineStart = s.length();
        }
      }

      buff.insert( 0, "<html>" );
      buff.append( "</html>" );

      return buff.toString();
    }

    return null;
  }

}
Related examples in the same category

1.	Binary Tree
2.	Your own tree with generic user object
3.	Tree Node for the for a general tree of Objects
4.	A tree structure that maps inheritance hierarchies of classes
5.	Data structure that mantains data in a ordered binary tree; each node is greater (smaller) or equal than its 2 sub-nodes, for all the hierarchy.
6.	Tree Node
7.	Ternary Search Tree
8.	Char Prefix Tree
9.	Lightweight tree n-arity structure
10.	This class is designed to provide a generic tree that allows duplicates.
Useful for string set lookups and command completion stuff : Tree « Collections Data Structure « Java

Related examples in the same category