/*
 * Decompiled with CFR 0.152.
 */
package org.apache.asterix.fuzzyjoin.tests;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.asterix.fuzzyjoin.tokenizer.HashedUTF8NGramTokenFactory;
import org.apache.asterix.fuzzyjoin.tokenizer.IToken;
import org.apache.asterix.fuzzyjoin.tokenizer.ITokenFactory;
import org.apache.asterix.fuzzyjoin.tokenizer.NGramUTF8StringBinaryTokenizer;
import org.apache.asterix.fuzzyjoin.tokenizer.UTF8NGramTokenFactory;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class NGramTokenizerTest {
    private char PRECHAR = (char)35;
    private char POSTCHAR = (char)36;
    private String str = "J\u00fcrgen S. Generic's Car";
    private byte[] inputBuffer;
    private int gramLength = 3;

    private void getExpectedGrams(String s, int gramLength, ArrayList<String> grams, boolean prePost) {
        String tmp = s.toLowerCase();
        if (prePost) {
            StringBuilder preBuilder = new StringBuilder();
            for (int i = 0; i < gramLength - 1; ++i) {
                preBuilder.append(this.PRECHAR);
            }
            String pre = preBuilder.toString();
            StringBuilder postBuilder = new StringBuilder();
            for (int i = 0; i < gramLength - 1; ++i) {
                postBuilder.append(this.POSTCHAR);
            }
            String post = postBuilder.toString();
            tmp = pre + s.toLowerCase() + post;
        }
        for (int i = 0; i < tmp.length() - gramLength + 1; ++i) {
            String gram = tmp.substring(i, i + gramLength);
            grams.add(gram);
        }
    }

    @Before
    public void init() throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(baos);
        dos.writeUTF(this.str);
        this.inputBuffer = baos.toByteArray();
    }

    void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
        HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(this.gramLength, prePost, false, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        ArrayList<String> expectedGrams = new ArrayList<String>();
        this.getExpectedGrams(this.str, this.gramLength, expectedGrams, prePost);
        ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
        HashMap<String, Integer> gramCounts = new HashMap<String, Integer>();
        for (String s : expectedGrams) {
            Integer count = (Integer)gramCounts.get(s);
            if (count == null) {
                count = 1;
                gramCounts.put(s, count);
            } else {
                Integer n = count;
                Integer n2 = count = Integer.valueOf(count + 1);
            }
            int hash = this.tokenHash(s, count);
            expectedHashedGrams.add(hash);
        }
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            Integer hashedGram = in.readInt();
            Assert.assertEquals(expectedHashedGrams.get(tokenCount), (Object)hashedGram);
            ++tokenCount;
        }
    }

    void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
        HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(this.gramLength, prePost, true, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        ArrayList<String> expectedGrams = new ArrayList<String>();
        this.getExpectedGrams(this.str, this.gramLength, expectedGrams, prePost);
        ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
        for (String s : expectedGrams) {
            int hash = this.tokenHash(s, 1);
            expectedHashedGrams.add(hash);
        }
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            Integer hashedGram = in.readInt();
            Assert.assertEquals(expectedHashedGrams.get(tokenCount), (Object)hashedGram);
            ++tokenCount;
        }
    }

    void runTestNGramTokenizerWithUTF8Tokens(boolean prePost) throws IOException {
        UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(this.gramLength, prePost, true, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        ArrayList<String> expectedGrams = new ArrayList<String>();
        this.getExpectedGrams(this.str, this.gramLength, expectedGrams, prePost);
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            String strGram = in.readUTF();
            Assert.assertEquals((Object)expectedGrams.get(tokenCount), (Object)strGram);
            ++tokenCount;
        }
    }

    @Test
    public void testNGramTokenizerWithCountedHashedUTF8Tokens() throws Exception {
        this.runTestNGramTokenizerWithCountedHashedUTF8Tokens(false);
        this.runTestNGramTokenizerWithCountedHashedUTF8Tokens(true);
    }

    @Test
    public void testNGramTokenizerWithHashedUTF8Tokens() throws Exception {
        this.runTestNGramTokenizerWithHashedUTF8Tokens(false);
        this.runTestNGramTokenizerWithHashedUTF8Tokens(true);
    }

    @Test
    public void testNGramTokenizerWithUTF8Tokens() throws IOException {
        this.runTestNGramTokenizerWithUTF8Tokens(false);
        this.runTestNGramTokenizerWithUTF8Tokens(true);
    }

    public int tokenHash(String token, int tokenCount) {
        int h = -1640531527;
        for (int i = 0; i < token.length(); ++i) {
            h ^= token.charAt(i);
            h *= -1640531527;
        }
        return h + tokenCount;
    }
}

