/*
 * Decompiled with CFR 0.152.
 */
package marytts.language.de.preprocess;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import marytts.language.de.preprocess.ExpansionPattern;
import marytts.language.de.preprocess.REPattern;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class MultiWordEP
extends ExpansionPattern {
    private final String[] _knownTypes = new String[]{"multiword"};
    private final List<String> knownTypes = Arrays.asList(this._knownTypes);
    private static final Map<String, String> multiWordDict = new HashMap<String, String>();
    private static final Set<String> constituentWordSet = new HashSet<String>();
    private final Pattern reMatchingChars = null;
    private static final Logger logger = MaryUtils.getLogger((String)"MultiWordEP");

    @Override
    public List<String> knownTypes() {
        return this.knownTypes;
    }

    @Override
    public Pattern reMatchingChars() {
        return this.reMatchingChars;
    }

    @Override
    protected boolean isCandidate(Element t) {
        String str = MaryDomUtils.tokenText((Element)t);
        return constituentWordSet.contains(str);
    }

    @Override
    protected int canDealWith(String s, int type) {
        return this.match(s, type);
    }

    @Override
    protected int match(String s, int type) {
        if (s.length() > 0) {
            return type;
        }
        return -1;
    }

    @Override
    protected List<Element> expand(List<Element> tokens, String s, int type) {
        if (tokens == null) {
            throw new NullPointerException("Received null argument");
        }
        if (tokens.isEmpty()) {
            throw new IllegalArgumentException("Received empty list");
        }
        ArrayList<Element> expanded = new ArrayList<Element>();
        ArrayList<Element> match = new ArrayList<Element>(tokens);
        StringBuilder sb = new StringBuilder();
        String multiword = null;
        while (!match.isEmpty()) {
            sb.setLength(0);
            Iterator<Element> it = match.iterator();
            while (it.hasNext()) {
                sb.append(MaryDomUtils.tokenText((Element)it.next()));
                sb.append(" ");
            }
            String lookup = sb.toString().trim();
            logger.debug((Object)("Looking up multiword in dictionary: `" + lookup + "'"));
            if (multiWordDict.containsKey(lookup)) {
                multiword = lookup;
                break;
            }
            match.remove(match.size() - 1);
        }
        if (multiword != null) {
            expanded.addAll(this.dictionaryExpandMultiWord(match, multiword));
            logger.debug((Object)("Have found multiword in dictionary: `" + multiword + "'"));
        }
        if (logger.getEffectiveLevel().equals((Object)Level.DEBUG)) {
            StringBuilder logBuf = new StringBuilder();
            for (Element elt : expanded) {
                if (elt.getTagName().equals("t")) {
                    logBuf.append(MaryDomUtils.tokenText((Element)elt));
                } else {
                    logBuf.append(elt.getTagName());
                }
                logBuf.append(" ");
            }
            logger.debug((Object)("Expanded multiword: " + logBuf.toString()));
        }
        if (!expanded.isEmpty()) {
            this.replaceTokens(match, expanded);
        }
        return expanded;
    }

    private List<Element> dictionaryExpandMultiWord(List<Element> match, String multiword) {
        Document doc = match.get(0).getOwnerDocument();
        ArrayList<Element> exp = new ArrayList<Element>();
        String graph = multiWordDict.get(multiword);
        exp.addAll(this.makeNewTokens(doc, graph, true, multiword));
        return exp;
    }

    private static void loadMultiWordDict() throws FileNotFoundException, IOException {
        String line;
        InputStream mwStream = MultiWordEP.class.getResourceAsStream("multiword.dat");
        BufferedReader br = new BufferedReader(new InputStreamReader(mwStream, "UTF-8"));
        while ((line = br.readLine()) != null) {
            if (Pattern.compile("^\\#").matcher(line).find() || REPattern.emptyLine.matcher(line).find()) continue;
            StringTokenizer st = new StringTokenizer(line, "/");
            String key = st.nextToken().trim();
            String graph = st.nextToken().trim();
            key = key.replaceAll("\\s+", " ");
            graph = graph.replaceAll("\\s+", " ");
            multiWordDict.put(key, graph);
            constituentWordSet.addAll(Arrays.asList(key.split(" ")));
        }
    }

    static {
        try {
            MultiWordEP.loadMultiWordDict();
        }
        catch (FileNotFoundException e) {
            logger.warn((Object)"Could not load abbreviation file", (Throwable)e);
        }
        catch (IOException e) {
            logger.warn((Object)"Could not load abbreviation file", (Throwable)e);
        }
    }
}

