/*
 * Decompiled with CFR 0.152.
 */
package org.apache.asterix.fuzzyjoin.tests;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import junit.framework.Assert;
import org.apache.asterix.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
import org.apache.asterix.fuzzyjoin.tokenizer.HashedUTF8WordTokenFactory;
import org.apache.asterix.fuzzyjoin.tokenizer.IToken;
import org.apache.asterix.fuzzyjoin.tokenizer.ITokenFactory;
import org.apache.asterix.fuzzyjoin.tokenizer.UTF8WordTokenFactory;
import org.junit.Before;
import org.junit.Test;

public class WordTokenizerTest {
    private String text = "Hello World, I would like to inform you of the importance of Foo Bar. Yes, Foo Bar. J\u00fcrgen.";
    private byte[] inputBuffer;
    private ArrayList<String> expectedUTF8Tokens = new ArrayList();
    private ArrayList<Integer> expectedHashedUTF8Tokens = new ArrayList();
    private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList();

    @Before
    public void init() throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(baos);
        dos.writeUTF(this.text);
        this.inputBuffer = baos.toByteArray();
        this.expectedUTF8Tokens.add("hello");
        this.expectedUTF8Tokens.add("world");
        this.expectedUTF8Tokens.add("i");
        this.expectedUTF8Tokens.add("would");
        this.expectedUTF8Tokens.add("like");
        this.expectedUTF8Tokens.add("to");
        this.expectedUTF8Tokens.add("inform");
        this.expectedUTF8Tokens.add("you");
        this.expectedUTF8Tokens.add("of");
        this.expectedUTF8Tokens.add("the");
        this.expectedUTF8Tokens.add("importance");
        this.expectedUTF8Tokens.add("of");
        this.expectedUTF8Tokens.add("foo");
        this.expectedUTF8Tokens.add("bar");
        this.expectedUTF8Tokens.add("yes");
        this.expectedUTF8Tokens.add("foo");
        this.expectedUTF8Tokens.add("bar");
        this.expectedUTF8Tokens.add("j\u00fcrgen");
        for (int i = 0; i < this.expectedUTF8Tokens.size(); ++i) {
            int hash = this.tokenHash(this.expectedUTF8Tokens.get(i), 1);
            this.expectedHashedUTF8Tokens.add(hash);
        }
        HashMap<String, Integer> tokenCounts = new HashMap<String, Integer>();
        for (int i = 0; i < this.expectedUTF8Tokens.size(); ++i) {
            Integer count = (Integer)tokenCounts.get(this.expectedUTF8Tokens.get(i));
            if (count == null) {
                count = 1;
                tokenCounts.put(this.expectedUTF8Tokens.get(i), count);
            } else {
                Integer n = count;
                Integer n2 = count = Integer.valueOf(count + 1);
            }
            int hash = this.tokenHash(this.expectedUTF8Tokens.get(i), count);
            this.expectedCountedHashedUTF8Tokens.add(hash);
        }
    }

    @Test
    public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
        HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            Integer hashedToken = in.readInt();
            Assert.assertEquals((Object)hashedToken, (Object)this.expectedCountedHashedUTF8Tokens.get(tokenCount));
            ++tokenCount;
        }
    }

    @Test
    public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
        HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            Integer hashedToken = in.readInt();
            Assert.assertEquals((Object)this.expectedHashedUTF8Tokens.get(tokenCount), (Object)hashedToken);
            ++tokenCount;
        }
    }

    @Test
    public void testWordTokenizerWithUTF8Tokens() throws IOException {
        UTF8WordTokenFactory tokenFactory = new UTF8WordTokenFactory();
        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, (ITokenFactory)tokenFactory);
        tokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int tokenCount = 0;
        while (tokenizer.hasNext()) {
            tokenizer.next();
            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
            DataOutputStream tokenDos = new DataOutputStream(tokenBaos);
            IToken token = tokenizer.getToken();
            token.serializeToken((DataOutput)tokenDos);
            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
            DataInputStream in = new DataInputStream(bais);
            String strToken = in.readUTF();
            Assert.assertEquals((String)this.expectedUTF8Tokens.get(tokenCount), (String)strToken);
            ++tokenCount;
        }
    }

    public int tokenHash(String token, int tokenCount) {
        int h = -1640531527;
        for (int i = 0; i < token.length(); ++i) {
            h ^= token.charAt(i);
            h *= -1640531527;
        }
        return h + tokenCount;
    }
}

