package org.apache.lucene.benchmark.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:org/apache/lucene/benchmark/utils/ExtractReuters.class */
public class ExtractReuters {
    private File reutersDir;
    private File outputDir;
    Pattern EXTRACTION_PATTERN = Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
    private static final String LINE_SEPARATOR = System.getProperty("line.separator");
    private static String[] META_CHARS = {"&", "<", ">", "\"", "'"};
    private static String[] META_CHARS_SERIALIZATIONS = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};

    public ExtractReuters(File file, File file2) {
        this.reutersDir = file;
        this.outputDir = file2;
        System.out.println(new StringBuffer().append("Deleting all files in ").append(file2).toString());
        for (File file3 : file2.listFiles()) {
            file3.delete();
        }
    }

    public void extract() {
        File[] listFiles = this.reutersDir.listFiles(new FileFilter(this) { // from class: org.apache.lucene.benchmark.utils.ExtractReuters.1
            private final ExtractReuters this$0;

            {
                this.this$0 = this;
            }

            @Override // java.io.FileFilter
            public boolean accept(File file) {
                return file.getName().endsWith(".sgm");
            }
        });
        if (listFiles == null || listFiles.length <= 0) {
            System.err.println(new StringBuffer().append("No .sgm files in ").append(this.reutersDir).toString());
            return;
        }
        for (File file : listFiles) {
            extractFile(file);
        }
    }

    protected void extractFile(File file) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            StringBuffer stringBuffer = new StringBuffer(1024);
            StringBuffer stringBuffer2 = new StringBuffer(1024);
            int i = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                if (readLine.indexOf("</REUTERS") == -1) {
                    stringBuffer.append(readLine).append(' ');
                } else {
                    Matcher matcher = this.EXTRACTION_PATTERN.matcher(stringBuffer);
                    while (matcher.find()) {
                        for (int i2 = 1; i2 <= matcher.groupCount(); i2++) {
                            if (matcher.group(i2) != null) {
                                stringBuffer2.append(matcher.group(i2));
                            }
                        }
                        stringBuffer2.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
                    }
                    String stringBuffer3 = stringBuffer2.toString();
                    for (int i3 = 0; i3 < META_CHARS_SERIALIZATIONS.length; i3++) {
                        stringBuffer3 = stringBuffer3.replaceAll(META_CHARS_SERIALIZATIONS[i3], META_CHARS[i3]);
                    }
                    int i4 = i;
                    i++;
                    FileWriter fileWriter = new FileWriter(new File(this.outputDir, new StringBuffer().append(file.getName()).append("-").append(i4).append(".txt").toString()));
                    fileWriter.write(stringBuffer3);
                    fileWriter.close();
                    stringBuffer2.setLength(0);
                    stringBuffer.setLength(0);
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length != 2) {
            printUsage();
        }
        File file = new File(strArr[0]);
        if (!file.exists()) {
            printUsage();
            return;
        }
        File file2 = new File(strArr[1]);
        file2.mkdirs();
        new ExtractReuters(file, file2).extract();
    }

    private static void printUsage() {
        System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
    }
}
