package ontologizer.sampling;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import ontologizer.association.AssociationContainer;
import ontologizer.association.AssociationParser;
import ontologizer.calculation.CalculationRegistry;
import ontologizer.calculation.ICalculation;
import ontologizer.enumeration.TermEnumerator;
import ontologizer.ontology.Namespace;
import ontologizer.ontology.OBOParser;
import ontologizer.ontology.OBOParserFileInput;
import ontologizer.ontology.Ontology;
import ontologizer.ontology.TermContainer;
import ontologizer.ontology.TermID;
import ontologizer.set.PopulationSet;
import ontologizer.set.StudySet;
import ontologizer.statistics.AbstractTestCorrection;
import ontologizer.statistics.TestCorrectionRegistry;
import ontologizer.types.ByteString;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

/* loaded from: input_file:ontologizer/sampling/SetConstructor.class */
public class SetConstructor {
    private static String getRequiredOptionValue(CommandLine commandLine, String str) {
        String optionValue = commandLine.getOptionValue(str);
        if (optionValue == null) {
            System.err.println("Aborting because the required argument \"-" + str + "\" wasn't specified! Use the -h for more help.");
            System.exit(-1);
        }
        return optionValue;
    }

    public static boolean deleteDirectory(File file) {
        if (file.exists()) {
            File[] listFiles = file.listFiles();
            for (int i = 0; i < listFiles.length; i++) {
                if (listFiles[i].isDirectory()) {
                    deleteDirectory(listFiles[i]);
                } else {
                    listFiles[i].delete();
                }
            }
        }
        return file.delete();
    }

    public static void main(String[] strArr) throws Exception {
        try {
            Options options = new Options();
            options.addOption("h", "help", false, "Shows this help");
            options.addOption("g", "go", true, "File containig GO terminology and structure (.obo format). Required.");
            options.addOption("a", "association", true, "File containing associations from genes to GO terms. Required.");
            options.addOption("p", "popsize", true, "Size of population set to create. Optional, defaults to 1000.");
            options.addOption("s", "samplefile", true, "Name of the sample file to be created. A trailing '.txt' will be appended, other suffixes are removed. Optional, defaults to 'sampled_genes'.");
            options.addOption("l", "listallterms", false, "Creates for every GO term a file listing its associated genes if set. Files are written to a directory with the name specified by the '-s' option with a trailing '.tld'.");
            options.addOption("la", "listallannotations", false, "Writes out a file named \"annotations.txt\" containing a list of go terms with their (direct) annotations.");
            options.addOption("o", "outdir", true, "Directory to hold results. Optional, defaults to '.'");
            options.addOption("f", "force", false, "Forces a delete of the outdir if set.");
            options.addOption("c", "complete", false, "The sampled set consists of all annotated genes if set. Ignores -p and -n.");
            options.addOption("n", "nsamples", true, "Number of samples to generate. Samples are numbered if necessary. Defaults to 1");
            options.addOption("sm", "subTermMatrix", false, "If specified, the output is a matrix in which element (i,j) is set to 1, only if term i is a subterm of term j.");
            options.addOption("es", "enrichSingle", true, "Specify a term enrichment strategy for the sampling of a single term enriched set. A term enrichment strategy has to be specified in the following way: For each term to enrich, specify first the GO ID of the term in the form GO:XXXXXXX and then, separated by a comma, an integer value between 0 and 100 specifying how many percent of the term's genes should end up in the sampled file. Finally, add another integer value between 0 and 100 specifying the percentage of genes from the rest to put into the sample. Ignores -p, -n and -c");
            options.addOption("em", "enrichMany", false, "Flag telling that term over-represented sets should be created for a large number of terms. This requires that at least the options -emc, -emp and -emr are specified. The option -emn is optional.");
            options.addOption("emc", "enrichManyCalc", true, "Specifies which calculation methods should be considered, when choosing terms to overrepresent. Specify the full names of the calculation methods as Term-For-Term, Parent-Child, etc. If you want to specify more than one method, separate them by a space and surround everything by double quotes. Required, if -em is set, ignored otherwise.");
            options.addOption("emp", "enrichManyPcut", true, "the p-value cutoff to use when determining the terms with sufficiently small all-subset minimal p-value. Required, if -em is set, ignored otherwise.");
            options.addOption("emr", "enrichManyRule", true, "Specifies the rule to be used for the over-representation. The rule has to be given as a comma-separated list of iteger values between 0 and 100 containing at least two values. The last value represents the noise percentage, i.e. the percentage of the rest of the genes that should end up in the sample. The other values specify the percentage of genes that should be sampled from the terms. The number of terms to over-represent is determined by the number of specified values minus 1 (for the last noise value). For example, if you specify '20,10', then in each sampled set one term is over-represented at a percentage of 20 and 10 percent noise is added (Furthermore -emn is ignored). As another example, if you specify '15,20,10', then in each sample two terms are over-represented at percentages 15 and 20, respectively. Then 10 percent of noise is added. Required, if -em is set, ignored otherwise.");
            options.addOption("emn", "enrichManySamples", true, "Specifies the number of sets to produce. Defaults to the 1000 for multiple term enrichment (i.e., when more than 2 values are specified in -emr). Ignored for single term enrichment (2 values in -emr), all terms are considered then by default. Optional, if -em is set, ignored otherwise.");
            CommandLine parse = new GnuParser().parse(options, strArr);
            if (parse.hasOption("h")) {
                new HelpFormatter().printHelp("SetConstructor [-h] -a <association file> -g <go file> [-p <samplesize> -s <sample file> -o <outdir> -n <nsamples> -c -l -f][-es <rule>][-em -emr <rule> -emc <calculations> -emp <pvaluecut> [-emn <nsaples>]]", "Tool to sample all kinds of sets of genes from a given set of Gene-GO associations. Can sample single or multiple sets with or without enrichment of terms. Has many options, see below!", options, "");
                System.exit(0);
            }
            String requiredOptionValue = getRequiredOptionValue(parse, "g");
            String requiredOptionValue2 = getRequiredOptionValue(parse, "a");
            String replaceAll = Pattern.compile("\\.[a-zA-Z0-9]+$").matcher(parse.getOptionValue("s", "sampled_genes")).replaceAll("");
            String optionValue = parse.getOptionValue("o", ".");
            int intValue = Integer.valueOf(parse.getOptionValue("p", "1000")).intValue();
            boolean hasOption = parse.hasOption("f");
            boolean hasOption2 = parse.hasOption("l");
            boolean hasOption3 = parse.hasOption("c");
            boolean hasOption4 = parse.hasOption("em");
            int intValue2 = Integer.valueOf(parse.getOptionValue("n", "1")).intValue();
            String optionValue2 = parse.getOptionValue("es");
            boolean hasOption5 = parse.hasOption("sm");
            boolean hasOption6 = parse.hasOption("la");
            if (hasOption3) {
                intValue2 = 1;
            }
            System.out.println("Parse obo file");
            OBOParser oBOParser = new OBOParser(new OBOParserFileInput(requiredOptionValue));
            System.out.println(oBOParser.doParse());
            TermContainer termContainer = new TermContainer(oBOParser.getTermMap(), oBOParser.getFormatVersion(), oBOParser.getDate());
            System.out.println("Building graph");
            Ontology create = Ontology.create(termContainer);
            AssociationParser associationParser = new AssociationParser(new OBOParserFileInput(requiredOptionValue2), termContainer, null);
            AssociationContainer associationContainer = new AssociationContainer(associationParser.getAssociations(), associationParser.getSynonym2gene(), associationParser.getDbObject2gene());
            File file = null;
            if (!optionValue.equals(".")) {
                file = new File("./" + optionValue);
                if (hasOption) {
                    deleteDirectory(file);
                }
                file.mkdirs();
            }
            Set<ByteString> allAnnotatedGenes = associationContainer.getAllAnnotatedGenes();
            PopulationSet populationSet = new PopulationSet("AllAnnotated");
            Iterator<ByteString> it = allAnnotatedGenes.iterator();
            while (it.hasNext()) {
                populationSet.addGene(it.next(), "None");
            }
            int min = Math.min(populationSet.getGeneCount(), intValue);
            if (hasOption5) {
                TermEnumerator enumerateGOTerms = populationSet.enumerateGOTerms(create, associationContainer);
                PrintWriter printWriter = new PrintWriter(new File(file, "subtermMatrix.txt"));
                printWriter.print("TermID");
                Iterator<TermID> it2 = enumerateGOTerms.iterator();
                while (it2.hasNext()) {
                    TermID next = it2.next();
                    printWriter.print("\t");
                    printWriter.print(next.toString());
                }
                printWriter.println();
                Iterator<TermID> it3 = enumerateGOTerms.iterator();
                while (it3.hasNext()) {
                    TermID next2 = it3.next();
                    printWriter.print(next2.toString());
                    Iterator<TermID> it4 = enumerateGOTerms.iterator();
                    while (it4.hasNext()) {
                        TermID next3 = it4.next();
                        printWriter.print("\t");
                        if (next2.equals(next3)) {
                            printWriter.print("1");
                        } else if (create.existsPath(next2, next3)) {
                            printWriter.print("1");
                        } else {
                            printWriter.print("0");
                        }
                    }
                    printWriter.println();
                }
                printWriter.flush();
                printWriter.close();
            }
            if (hasOption6) {
                PrintWriter printWriter2 = new PrintWriter(new File(file, "annotations.txt"));
                TermEnumerator enumerateGOTerms2 = populationSet.enumerateGOTerms(create, associationContainer);
                Iterator<TermID> it5 = enumerateGOTerms2.iterator();
                while (it5.hasNext()) {
                    TermID next4 = it5.next();
                    for (ByteString byteString : enumerateGOTerms2.getAnnotatedGenes(next4).directAnnotated) {
                        printWriter2.write(next4.toString());
                        printWriter2.write("\t");
                        printWriter2.write(byteString.toString());
                        printWriter2.println();
                    }
                }
                printWriter2.flush();
                printWriter2.close();
            }
            if (!hasOption4) {
                if (optionValue2 != null) {
                    writeAllFiles(buildFinalSampleFileName(replaceAll, 1, 1), hasOption2, create, associationContainer, file, new StudySetSampler(populationSet).sampleRandomStudySet(create, associationContainer, parseEs(optionValue2, termContainer), false));
                    return;
                } else {
                    if (hasOption3 || min == populationSet.getGeneCount()) {
                        writeAllFiles(replaceAll, hasOption2, create, associationContainer, file, populationSet);
                        return;
                    }
                    for (int i = 0; i < intValue2; i++) {
                        writeAllFiles(buildFinalSampleFileName(replaceAll, i, intValue2), hasOption2, create, associationContainer, file, populationSet.generateRandomStudySet(min));
                    }
                    return;
                }
            }
            System.out.println("You want to construct a large number of study set samples with artificial enrichment");
            String[] split = getRequiredOptionValue(parse, "emr").split(",");
            if (split.length == 2) {
                String[] split2 = split[0].split("\\.");
                if (split2.length > 1) {
                    String str = split[1];
                    split = new String[split2.length + 1];
                    split[split2.length] = str;
                    for (int i2 = 0; i2 < split2.length; i2++) {
                        split[i2] = split2[i2];
                    }
                }
            }
            int length = split.length;
            int i3 = length - 1;
            int[] iArr = new int[i3];
            for (int i4 = 0; i4 < length - 1; i4++) {
                iArr[i4] = Integer.valueOf(split[i4]).intValue();
            }
            int intValue3 = Integer.valueOf(split[i3]).intValue();
            System.out.println("The parsed term enrichment values are:");
            for (int i5 : iArr) {
                System.out.println("\t" + i5);
            }
            System.out.println("the parsed noise enrichment value is: " + intValue3);
            String requiredOptionValue3 = getRequiredOptionValue(parse, "emc");
            if (requiredOptionValue3.endsWith("\"")) {
                requiredOptionValue3 = requiredOptionValue3.substring(0, requiredOptionValue3.length() - 1);
            }
            if (requiredOptionValue3.startsWith("\"")) {
                requiredOptionValue3 = requiredOptionValue3.substring(1);
            }
            String[] split3 = requiredOptionValue3.split(HelpFormatter.DEFAULT_LONG_OPT_SEPARATOR);
            ArrayList arrayList = new ArrayList();
            for (String str2 : split3) {
                ICalculation calculationByName = CalculationRegistry.getCalculationByName(str2);
                if (calculationByName == null) {
                    System.err.println("Calculation \"" + str2 + "\" doesn't exist");
                    System.exit(0);
                }
                arrayList.add(calculationByName);
            }
            System.out.println("The calculation methods to consider are:");
            Iterator it6 = arrayList.iterator();
            while (it6.hasNext()) {
                System.out.println("\t" + ((ICalculation) it6.next()).getName());
            }
            double doubleValue = Double.valueOf(getRequiredOptionValue(parse, "emp")).doubleValue();
            System.out.println("Cutoff to use for all-subset minimal p-values is " + doubleValue);
            System.out.println("Determining terms with an all-subset minimal p-value below " + doubleValue + " (good terms) for all calculation methods.");
            AbstractTestCorrection correctionByName = TestCorrectionRegistry.getCorrectionByName("None");
            HashSet<TermID> hashSet = new HashSet<>();
            boolean z = true;
            Iterator it7 = arrayList.iterator();
            while (it7.hasNext()) {
                ICalculation iCalculation = (ICalculation) it7.next();
                HashSet<TermID> goodTerms = iCalculation.calculateStudySet(create, associationContainer, populationSet, populationSet, correctionByName).getGoodTerms(doubleValue);
                System.out.println("Calculation method " + iCalculation.getName() + " has a total of " + goodTerms.size() + " good terms");
                if (z) {
                    hashSet = goodTerms;
                    z = false;
                } else {
                    hashSet.retainAll(goodTerms);
                }
            }
            System.out.println("We are left with a total of " + hashSet.size() + " terms in the intersection.");
            int size = hashSet.size();
            if (i3 > 1) {
                size = Integer.valueOf(parse.getOptionValue("emn", "1000")).intValue();
            }
            System.out.println("Creating samples...");
            HashSet hashSet2 = new HashSet();
            if (i3 == 1) {
                hashSet2.addAll(new KSubsetSampler(hashSet).sampleManyOrderedWithoutReplacement(i3, size));
            } else {
                int i6 = size / 3;
                for (TermID termID : new TermID[]{new TermID(8150), new TermID(5575), new TermID(3674)}) {
                    Namespace.NamespaceEnum namespaceEnum = Namespace.getNamespaceEnum(create.getTerm(termID).getNamespace());
                    HashSet hashSet3 = new HashSet();
                    Iterator<TermID> it8 = hashSet.iterator();
                    while (it8.hasNext()) {
                        TermID next5 = it8.next();
                        if (namespaceEnum == Namespace.getNamespaceEnum(create.getTerm(next5).getNamespace())) {
                            hashSet3.add(next5);
                        }
                    }
                    System.out.println("We are left with a total number of " + hashSet3.size() + " good terms for namespace " + termID.toString());
                    hashSet2.addAll(new KSubsetSampler(hashSet3).sampleManyOrderedWithoutReplacement(i3, i6));
                }
            }
            StudySetSampler studySetSampler = new StudySetSampler(populationSet);
            int i7 = 0;
            Iterator it9 = hashSet2.iterator();
            while (it9.hasNext()) {
                ArrayList arrayList2 = (ArrayList) it9.next();
                i7++;
                System.out.print("Creating " + i7 + " of " + size + " samples...\r");
                PercentageEnrichmentRule percentageEnrichmentRule = new PercentageEnrichmentRule();
                for (int i8 = 0; i8 < arrayList2.size(); i8++) {
                    percentageEnrichmentRule.addTerm((TermID) arrayList2.get(i8), iArr[i8]);
                }
                percentageEnrichmentRule.setNoisePercentage(intValue3);
                String buildFinalSampleFileNameForMultipleTerm = buildFinalSampleFileNameForMultipleTerm(arrayList2, iArr, intValue3);
                StudySet sampleRandomStudySet = studySetSampler.sampleRandomStudySet(create, associationContainer, percentageEnrichmentRule, true);
                if (sampleRandomStudySet == null) {
                    System.out.println("At least one term wouldn't be overrepresented due to lack of genes annotated to it. Skipping this study set.");
                } else {
                    writeAllFiles(buildFinalSampleFileNameForMultipleTerm, hasOption2, create, associationContainer, file, sampleRandomStudySet);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e2) {
            System.err.println("Unable to parse the command line: " + e2.getLocalizedMessage());
            System.exit(-1);
        }
    }

    private static String buildFinalSampleFileNameForMultipleTerm(ArrayList<TermID> arrayList, int[] iArr, int i) {
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            sb.append(arrayList.get(i2).toString().replace(":", "_"));
            sb.append("_");
            sb.append(iArr[i2]);
            sb.append("_");
        }
        sb.append(i);
        return sb.toString();
    }

    private static PercentageEnrichmentRule parseEs(String str, TermContainer termContainer) throws ParseException {
        PercentageEnrichmentRule percentageEnrichmentRule = new PercentageEnrichmentRule();
        String[] split = str.split(",");
        int length = split.length;
        if (length % 2 == 0) {
            throw new ParseException("I need an odd number of entries in enrichment data!");
        }
        percentageEnrichmentRule.setNoisePercentage(Integer.valueOf(split[length - 1]).intValue());
        for (int i = 0; i < (length - 1) / 2; i++) {
            percentageEnrichmentRule.addTerm(termContainer.get(split[2 * i]).getID(), Integer.valueOf(split[(2 * i) + 1]).intValue());
        }
        return percentageEnrichmentRule;
    }

    private static String buildFinalSampleFileName(String str, int i, int i2) {
        if (i2 == 1) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        new Formatter(sb).format("%06d", Integer.valueOf(i + 1));
        return String.valueOf(str) + "_" + sb.toString();
    }

    private static void writeAllFiles(String str, boolean z, Ontology ontology, AssociationContainer associationContainer, File file, StudySet studySet) throws IOException {
        TermEnumerator enumerateGOTerms = studySet.enumerateGOTerms(ontology, associationContainer);
        writeSampledSet(str, studySet, file);
        if (z) {
            writeAllTermLists(str, enumerateGOTerms, file);
        }
    }

    private static void writeAllTermLists(String str, TermEnumerator termEnumerator, File file) throws IOException {
        File file2 = new File(file, String.valueOf(str) + ".tld");
        file2.mkdirs();
        System.out.println("Writing gene lists for all GO terms. This may take a while...");
        Iterator<TermID> it = termEnumerator.iterator();
        while (it.hasNext()) {
            TermID next = it.next();
            String replaceAll = Pattern.compile(":").matcher(next.toString()).replaceAll("_");
            List<ByteString> list = termEnumerator.getAnnotatedGenes(next).totalAnnotated;
            StringBuilder sb = new StringBuilder();
            Iterator<ByteString> it2 = list.iterator();
            while (it2.hasNext()) {
                sb.append(it2.next() + "\n");
            }
            FileWriter fileWriter = new FileWriter(new File(file2, replaceAll));
            fileWriter.write(sb.toString());
            fileWriter.close();
        }
    }

    private static void writeSampledSet(String str, StudySet studySet, File file) throws IOException {
        FileWriter fileWriter = new FileWriter(new File(file, String.valueOf(str) + ".txt"));
        Iterator<ByteString> it = studySet.iterator();
        while (it.hasNext()) {
            fileWriter.write(it.next() + "\n");
        }
        fileWriter.close();
    }
}
