19 package org.sleuthkit.autopsy.coreutils;
 
   21 import java.io.BufferedReader;
 
   22 import java.io.IOException;
 
   23 import java.io.InputStream;
 
   24 import java.io.InputStreamReader;
 
   25 import java.nio.charset.StandardCharsets;
 
   26 import java.util.HashMap;
 
   27 import java.util.List;
 
   28 import java.util.stream.Collectors;
 
   29 import java.util.stream.Stream;
 
   30 import org.apache.commons.lang3.StringUtils;
 
   36 class DomainTokenizer {
 
   43     private static class DomainCategory extends HashMap<String, DomainCategory> {
 
   49                 this.put(childKey, cat);
 
   57     private static final String JOINER = 
".";
 
   59     private static final String DELIMITER = 
"\\" + JOINER;
 
   61     private static final String WILDCARD = 
"*";
 
   62     private static final String EXCEPTION_PREFIX = 
"!";
 
   67     private static final String DOMAIN_LIST = 
"public_suffix_list.dat";
 
   70     private static final String COMMENT_TOKEN = 
"//";
 
   73     private static DomainTokenizer categorizer = null;
 
   81     static DomainTokenizer getInstance() throws IOException {
 
   82         if (categorizer == null) {
 
   95     private static DomainTokenizer load() throws IOException {
 
   96         try (InputStream is = DomainTokenizer.class.getResourceAsStream(DOMAIN_LIST);
 
   97                 InputStreamReader isReader = 
new InputStreamReader(is, StandardCharsets.UTF_8);
 
   98                 BufferedReader reader = 
new BufferedReader(isReader)) {
 
  100             DomainTokenizer categorizer = 
new DomainTokenizer();
 
  101             while (reader.ready()) {
 
  102                 String line = reader.readLine();
 
  103                 String trimmed = line.trim();
 
  104                 if (!StringUtils.isBlank(trimmed) && !trimmed.startsWith(COMMENT_TOKEN)) {
 
  105                     categorizer.addDomainSuffix(trimmed);
 
  113     private DomainTokenizer() {
 
  117     private final DomainCategory trie = 
new DomainCategory();
 
  125     private void addDomainSuffix(String domainSuffix) {
 
  126         if (StringUtils.isBlank(domainSuffix)) {
 
  130         String[] tokens = domainSuffix.toLowerCase().trim().split(DELIMITER);
 
  132         DomainCategory cat = trie;
 
  133         for (
int i = tokens.length - 1; i >= 0; i--) {
 
  134             String token = tokens[i];
 
  135             if (StringUtils.isBlank(token)) {
 
  139             cat = cat.getOrAddChild(tokens[i]);
 
  153     String getDomain(String domain) {
 
  154         if (StringUtils.isBlank(domain)) {
 
  158         List<String> tokens = Stream.of(domain.toLowerCase().split(DELIMITER))
 
  159                 .filter(StringUtils::isNotBlank)
 
  160                 .collect(Collectors.toList());
 
  162         int idx = tokens.size() - 1;
 
  163         DomainCategory cat = trie;
 
  165         for (; idx >= 0; idx--) {
 
  169             if (cat.get(EXCEPTION_PREFIX + tokens.get(idx)) != null) {
 
  173             DomainCategory newCat = cat.get(tokens.get(idx));
 
  176             if (newCat == null) {
 
  179                 newCat = cat.get(WILDCARD);
 
  180                 if (newCat == null) {
 
  189         if (idx == tokens.size() - 1) {
 
  192             int minIndex = Math.max(0, idx);
 
  193             List<String> subList = tokens.subList(minIndex, tokens.size());
 
  194             return String.join(JOINER, subList);
 
DomainCategory getOrAddChild(String childKey)