package org.culturegraph.cluster.job.match;

import com.google.common.base.Charsets;
import com.google.common.primitives.Bytes;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import joptsimple.internal.Strings;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.culturegraph.cluster.inputformat.MultiTableInputFormat;
import org.culturegraph.cluster.mapreduce.AbstractTableMorphMapper;
import org.culturegraph.cluster.sink.ComplexPutWriter;
import org.culturegraph.cluster.util.AbstractJobLauncher;
import org.culturegraph.cluster.util.Column;
import org.culturegraph.cluster.util.ConfigConst;
import org.culturegraph.metamorph.core.Metamorph;
import org.culturegraph.metastream.sink.ValueSet;

/* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/cluster/job/match/Matcher.class */
public final class Matcher extends AbstractJobLauncher {
    public static final String JOB_NAME = "Match";
    public static final String CONTAINS_PROPERTY = "cg:contains";
    public static final char ALG_PREFIX_DELIMITER = '$';
    public static final String EQUAL_PROPERTY = "cg:eq";
    public static final String EQUAL_DEL = "cg:eq\u001f";
    public static final byte[] EQUAL_DEL_BYTES = EQUAL_DEL.getBytes(Charsets.UTF_8);
    public static final String CONTAINS_DEL = "cg:contains\u001f";
    public static final byte[] CONTAINS_DEL_BYTES = CONTAINS_DEL.getBytes(Charsets.UTF_8);

    /* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/cluster/job/match/Matcher$UniquePropertiesMapper.class */
    static final class UniquePropertiesMapper extends AbstractTableMorphMapper<ValueSet, Text, Text> {
        public static final String COUNTER_EQU = "equivalence notices";
        private final Text currentKey = new Text();
        private final Text currentRow = new Text();
        private final Text currentEquivalent = new Text();
        private byte[] algorithm = "unknown$".getBytes(Charsets.UTF_8);

        UniquePropertiesMapper() {
        }

        @Override // org.culturegraph.cluster.mapreduce.AbstractTableMorphMapper
        protected void init(Configuration configuration) {
            this.algorithm = (configuration.get(ConfigConst.ALGORITHM_NAME) + '$').getBytes(Charsets.UTF_8);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // org.culturegraph.cluster.mapreduce.AbstractTableMorphMapper
        public ValueSet createStreamReceiver() {
            return new ValueSet();
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* JADX WARN: Multi-variable type inference failed */
        /* JADX WARN: Type inference failed for: r2v4, types: [byte[], byte[][]] */
        /* JADX WARN: Type inference failed for: r2v9, types: [byte[], byte[][]] */
        @Override // org.culturegraph.cluster.mapreduce.AbstractTableMorphMapper
        public void map(ImmutableBytesWritable immutableBytesWritable, ValueSet valueSet, Result result, Mapper<ImmutableBytesWritable, Result, Text, Text>.Context context) throws InterruptedException, IOException {
            this.currentRow.set(immutableBytesWritable.get(), immutableBytesWritable.getOffset(), immutableBytesWritable.getLength());
            context.getCounter(Matcher.JOB_NAME, "unique properties").increment(valueSet.size());
            if (valueSet.isEmpty()) {
                context.getCounter(Matcher.JOB_NAME, "without match key").increment(1L);
                return;
            }
            String str = (String) Collections.min(valueSet);
            valueSet.remove(str);
            setText(this.currentKey, new byte[]{this.algorithm, str.getBytes(Charsets.UTF_8)});
            context.write(this.currentKey, this.currentRow);
            context.getCounter(Matcher.JOB_NAME, COUNTER_EQU).increment(valueSet.size());
            Iterator<String> it = valueSet.iterator();
            while (it.hasNext()) {
                setText(this.currentEquivalent, new byte[]{Matcher.EQUAL_DEL_BYTES, this.algorithm, it.next().getBytes(Charsets.UTF_8)});
                context.write(this.currentKey, this.currentEquivalent);
            }
        }

        private void setText(Text text, byte[]... bArr) {
            text.set(Bytes.concat(bArr));
        }

        @Override // org.culturegraph.cluster.mapreduce.AbstractTableMorphMapper
        protected String getCounterName() {
            return Matcher.JOB_NAME;
        }
    }

    /* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/cluster/job/match/Matcher$UniquePropertiesReducer.class */
    static final class UniquePropertiesReducer extends TableReducer<Text, Text, Text> {
        private static final Text EMPTY_TEXT = new Text();
        private final Set<Text> movetoNotices = new HashSet();
        private final Set<Text> containsIdList = new HashSet();
        private final ComplexPutWriter putWriter = new ComplexPutWriter();

        UniquePropertiesReducer() {
        }

        public void reduce(Text text, Iterable<Text> iterable, Reducer<Text, Text, Text, Writable>.Context context) throws IOException, InterruptedException {
            this.movetoNotices.clear();
            this.containsIdList.clear();
            processValues(iterable);
            if (this.containsIdList.size() < 2) {
                context.getCounter(Matcher.JOB_NAME, "singles").increment(1L);
            }
            this.movetoNotices.remove(text);
            writeBundle(text, context);
            context.getCounter(Matcher.JOB_NAME, "groups").increment(1L);
        }

        /* JADX WARN: Type inference failed for: r1v4, types: [byte[], byte[][]] */
        private void writeBundle(Text text, Reducer<Text, Text, Text, Writable>.Context context) throws IOException, InterruptedException {
            this.putWriter.startRecord(text.toString());
            Iterator<Text> it = this.containsIdList.iterator();
            while (it.hasNext()) {
                this.putWriter.literal(it.next().getBytes());
            }
            Iterator<Text> it2 = this.movetoNotices.iterator();
            while (it2.hasNext()) {
                this.putWriter.literal(Bytes.concat(new byte[]{Matcher.EQUAL_DEL_BYTES, it2.next().toString().getBytes()}));
            }
            this.putWriter.endRecord();
            context.write(EMPTY_TEXT, this.putWriter.getCurrentPut());
        }

        private void processValues(Iterable<Text> iterable) {
            for (Text text : iterable) {
                if (text.find(Matcher.EQUAL_DEL) == 0) {
                    Text text2 = new Text();
                    text2.append(text.getBytes(), Matcher.EQUAL_DEL_BYTES.length, text.getLength() - Matcher.EQUAL_DEL_BYTES.length);
                    this.movetoNotices.add(text2);
                } else {
                    Text text3 = new Text(Matcher.CONTAINS_DEL);
                    text3.append(text.getBytes(), 0, text.getLength());
                    this.containsIdList.add(text3);
                }
            }
        }

        @Override // org.apache.hadoop.mapreduce.Reducer
        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
            reduce((Text) obj, (Iterable<Text>) iterable, (Reducer<Text, Text, Text, Writable>.Context) context);
        }
    }

    public static void main(String[] strArr) {
        launch(new Matcher(), strArr);
    }

    @Override // org.culturegraph.cluster.util.AbstractJobLauncher
    protected Configuration prepareConf(Configuration configuration) {
        setJobName("Match '" + getConf().get(ConfigConst.INPUT_TABLE) + "' with '" + getConf().get(ConfigConst.MORPH_DEF) + Strings.SINGLE_QUOTE);
        addOptionalArguments(ConfigConst.ALGORITHM_NAME);
        addRequiredArguments(ConfigConst.MORPH_DEF, ConfigConst.INPUT_TABLE, ConfigConst.OUTPUT_TABLE);
        String str = configuration.get(ConfigConst.MORPH_DEF);
        if (null != str) {
            configuration.setIfUnset(ConfigConst.ALGORITHM_NAME, new Metamorph(str).getValue(Metamorph.METADATA, "name"));
        }
        return HBaseConfiguration.create(configuration);
    }

    @Override // org.culturegraph.cluster.util.AbstractJobLauncher
    protected void configureJob(Job job, Configuration configuration) throws IOException {
        Scan scan = new Scan();
        scan.addFamily(Column.Family.PROPERTY);
        scan.setCaching(getConf().getInt(ConfigConst.CACHED_ROWS, 500));
        scan.setCacheBlocks(false);
        MultiTableInputFormat.initTableMapperJob(UniquePropertiesMapper.class, Text.class, Text.class, job);
        MultiTableInputFormat.setTablesInJob(job, configuration.get(ConfigConst.INPUT_TABLE), scan);
        TableMapReduceUtil.initTableReducerJob(configuration.get(ConfigConst.OUTPUT_TABLE), UniquePropertiesReducer.class, job);
        job.setNumReduceTasks(2);
    }
}
