/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.eval.tools;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.tika.utils.ProcessUtils;

/**
 * Utility class that runs TopCommonTokenCounter against a directory
 * of table files (named {lang}_table.gz) and outputs common tokens
 * files for each input table file in the output directory.
 */
public class BatchTopCommonTokenCounter {

    public static void main(String[] args) throws Exception {
        Path tableFileDir = Paths.get(args[0]);
        Path commonTokensDir = Paths.get(args[1]);
        for (File f : tableFileDir.toFile().listFiles()) {
            System.err.println(f);
            if (!f.getName().contains("bn_table")) {
                //continue;
            }
            Path commonTokensFile = commonTokensDir.resolve(
                    f.getName().replaceAll("_table(\\.txt)?(\\.gz)?$", ""));


            TopCommonTokenCounter.main(
                    new String[]{
                            ProcessUtils.escapeCommandLine(f.getAbsolutePath()),
                            ProcessUtils.escapeCommandLine(commonTokensFile.toAbsolutePath().toString())
                    }
            );
        }
    }
}
