package org.culturegraph.mediawiki.analyzer;

import com.hp.hpl.jena.sparql.ARQConstants;
import de.fau.cs.osr.ptk.common.ast.AstNode;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.culturegraph.mediawiki.converter.WikiTextParser;
import org.culturegraph.mediawiki.type.WikiPage;
import org.culturegraph.mediawiki.util.TextExtractor;
import org.culturegraph.mediawiki.util.TraverseTree;
import org.culturegraph.metastream.framework.DefaultObjectPipe;
import org.culturegraph.metastream.framework.StreamReceiver;
import org.sweble.wikitext.lazy.preprocessor.Template;
import org.sweble.wikitext.lazy.preprocessor.TemplateArgument;

/* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/mediawiki/analyzer/TemplateExtractor.class */
public final class TemplateExtractor extends DefaultObjectPipe<WikiPage, StreamReceiver> implements Analyzer {
    private Matcher nameMatcher;
    private final TemplateVisitor visitor;

    /* loaded from: input_file:lodmill-rd-0.1.0-SNAPSHOT-jar-with-dependencies.jar:org/culturegraph/mediawiki/analyzer/TemplateExtractor$TemplateVisitor.class */
    public class TemplateVisitor extends TraverseTree {
        private final TextExtractor textExtractor = new TextExtractor();

        public TemplateVisitor() {
        }

        @Override // org.culturegraph.mediawiki.util.TraverseTree
        public final void visit(Template template) {
            String extractText = extractText(template.getName());
            TemplateExtractor.this.nameMatcher.reset(extractText.trim());
            if (TemplateExtractor.this.nameMatcher.matches()) {
                ((StreamReceiver) TemplateExtractor.this.getReceiver()).startEntity(sanitizeName(extractText));
                ((StreamReceiver) TemplateExtractor.this.getReceiver()).literal("_TEMPLATE_", "");
                iterate(template.getArgs());
                ((StreamReceiver) TemplateExtractor.this.getReceiver()).endEntity();
            }
        }

        @Override // org.culturegraph.mediawiki.util.TraverseTree
        public final void visit(TemplateArgument templateArgument) {
            if (templateArgument.getHasName()) {
                ((StreamReceiver) TemplateExtractor.this.getReceiver()).literal(sanitizeName(extractText(templateArgument.getName())), sanitizeValue(extractText(templateArgument.getValue())));
            }
        }

        private String extractText(AstNode astNode) {
            return (String) this.textExtractor.go(astNode);
        }

        private String sanitizeName(String str) {
            return str.trim().replace(" ", ARQConstants.allocSSEUnamedVars);
        }

        private String sanitizeValue(String str) {
            return str.replaceAll("<[^>]+>", "").replaceAll("\\[\\[(?:.*?\\|)*?([^|]*?)\\]\\]", "$1").replaceAll("\\[(?:.*? )*?([^ ]*?)\\]", "$1").trim();
        }
    }

    public TemplateExtractor() {
        this("");
    }

    public TemplateExtractor(String str) {
        this.visitor = new TemplateVisitor();
        setNamePattern(str);
    }

    public void setNamePattern(String str) {
        this.nameMatcher = Pattern.compile(str).matcher("");
    }

    public String getNamePattern() {
        return this.nameMatcher.pattern().pattern();
    }

    @Override // org.culturegraph.metastream.framework.DefaultObjectPipe, org.culturegraph.metastream.framework.ObjectReceiver
    public void process(WikiPage wikiPage) {
        ((StreamReceiver) getReceiver()).startRecord(Long.toString(wikiPage.getPageId()));
        this.visitor.go(wikiPage.getWikiAst().getPage());
        ((StreamReceiver) getReceiver()).endRecord();
    }

    @Override // org.culturegraph.mediawiki.analyzer.Analyzer
    public boolean wikiTextOnly() {
        return false;
    }

    @Override // org.culturegraph.mediawiki.analyzer.Analyzer
    public WikiTextParser.ParseLevel requiredParseLevel() {
        return WikiTextParser.ParseLevel.PREPROCESS;
    }
}
