package org.culturegraph.cluster.job.expand;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.culturegraph.cluster.exception.CulturegraphClusterException;
import org.culturegraph.cluster.inputformat.MultiTableInputFormat;
import org.culturegraph.cluster.mapreduce.KeyChangeMapper;
import org.culturegraph.cluster.mapreduce.ListMapReducer;
import org.culturegraph.cluster.mapreduce.MorphMapper;
import org.culturegraph.cluster.type.NamedValueWritable;
import org.culturegraph.cluster.type.StringListMapWritable;
import org.culturegraph.cluster.util.Column;
import org.culturegraph.cluster.util.ConfigChecker;
import org.culturegraph.cluster.util.ConfigConst;
import org.culturegraph.cluster.util.MapReduceUtil;
import org.culturegraph.metastream.type.ListMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/cluster/job/expand/BundleCheck.class */
public final class BundleCheck extends Configured implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(BundleCheck.class);
    private final ConfigChecker configChecker = new ConfigChecker(new String[0]);

    /* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/cluster/job/expand/BundleCheck$CheckReducer.class */
    public static final class CheckReducer extends Reducer<Text, StringListMapWritable, Text, NamedValueWritable> {
        private static final String NAME = "CheckReducer";
        private final Set<String> tempSet = new HashSet();

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void reduce(Text text, Iterable<StringListMapWritable> iterable, Reducer<Text, StringListMapWritable, Text, NamedValueWritable>.Context context) throws IOException, InterruptedException {
            HashSet<String> hashSet = new HashSet();
            ArrayList arrayList = new ArrayList();
            Iterator<StringListMapWritable> it = iterable.iterator();
            while (it.hasNext()) {
                ListMap<String, String> detachListMap = it.next().detachListMap();
                hashSet.addAll(detachListMap.keySet());
                arrayList.add(detachListMap);
            }
            for (String str : hashSet) {
                this.tempSet.clear();
                Iterator it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    this.tempSet.addAll(((ListMap) it2.next()).get((Object) str));
                }
                if (this.tempSet.size() > 1) {
                    context.write(text, new NamedValueWritable("conflict", str + ":" + this.tempSet));
                    context.getCounter(NAME, "conflicts in " + str).increment(1L);
                }
            }
            context.getCounter(NAME, "properties checked").increment(hashSet.size());
            context.getCounter(NAME, "records checked").increment(arrayList.size());
            context.getCounter(NAME, "bundles checked").increment(1L);
        }
    }

    public BundleCheck() {
        this.configChecker.addOptional(ConfigConst.KEY_NAME);
        this.configChecker.addRequired(ConfigConst.INPUT_TABLE, ConfigConst.OUTPUT_PATH, ConfigConst.MORPH_DEF);
    }

    public static void main(String[] strArr) {
        try {
            System.exit(ToolRunner.run(new BundleCheck(), strArr));
        } catch (Exception e) {
            throw new CulturegraphClusterException("Error during job launch", e);
        }
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        if (!this.configChecker.logAndVerify(LOG, getConf())) {
            return -1;
        }
        String str = MapReduceUtil.makeTmp(getConf()) + "out/";
        runDataCollectJob(str);
        runCheckJob(str);
        return 1;
    }

    private void runCheckJob(String str) throws IOException, InterruptedException, ClassNotFoundException {
        Job job = new Job(getConf(), "merge data and check");
        job.setJarByClass(BundleCheck.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.addInputPath(job, new Path(str));
        job.setMapperClass(KeyChangeMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(StringListMapWritable.class);
        job.setReducerClass(CheckReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, new Path(getConf().get(ConfigConst.OUTPUT_PATH)));
        job.waitForCompletion(true);
    }

    private void runDataCollectJob(String str) throws IOException, InterruptedException, ClassNotFoundException {
        Job job = new Job(getConf(), "collect data");
        job.setJarByClass(BundleCheck.class);
        Scan scan = new Scan();
        scan.addFamily(Column.Family.PROPERTY);
        scan.setCaching(getConf().getInt(ConfigConst.CACHED_ROWS, 500));
        scan.setCacheBlocks(false);
        MultiTableInputFormat.initTableMapperJob(MorphMapper.class, Text.class, Text.class, job);
        MultiTableInputFormat.setTablesInJob(job, getConf().get(ConfigConst.INPUT_TABLE), scan);
        MorphMapper.configureJob(job);
        job.setReducerClass(ListMapReducer.fromNamedValues());
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(StringListMapWritable.class);
        FileOutputFormat.setOutputPath(job, new Path(str));
        job.setNumReduceTasks(2);
        job.waitForCompletion(true);
    }
}
