package de.unijena.bioinf.canopus;

import com.google.common.io.Files;
import de.unijena.bioinf.ChemistryBase.chem.FormulaConstraints;
import de.unijena.bioinf.ChemistryBase.chem.InChIs;
import de.unijena.bioinf.ChemistryBase.chem.MolecularFormula;
import de.unijena.bioinf.ChemistryBase.chem.PeriodicTable;
import de.unijena.bioinf.ChemistryBase.fp.ArrayFingerprint;
import de.unijena.bioinf.ChemistryBase.fp.BooleanFingerprint;
import de.unijena.bioinf.ChemistryBase.fp.CdkFingerprintVersion;
import de.unijena.bioinf.ChemistryBase.fp.MaskedFingerprintVersion;
import de.unijena.bioinf.ChemistryBase.fp.PredictionPerformance;
import de.unijena.bioinf.ChemistryBase.fp.ProbabilityFingerprint;
import de.unijena.bioinf.ChemistryBase.utils.FileUtils;
import de.unijena.bioinf.chemdb.ChemicalDatabase;
import de.unijena.bioinf.chemdb.ChemicalDatabaseException;
import de.unijena.bioinf.chemdb.FingerprintCandidate;
import de.unijena.bioinf.fingerid.Fingerprinter;
import de.unijena.bioinf.fingerid.FormulaBits;
import de.unijena.bioinf.fingerid.KernelToNumpyConverter;
import de.unijena.bioinf.fingerid.TrainedCSIFingerId;
import de.unijena.bioinf.fingerid.fingerprints.FixedFingerprinter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;

/* loaded from: input_file:de/unijena/bioinf/canopus/Prepare.class */
public class Prepare {
    public static final MaskedFingerprintVersion CDK_MASK = new FormulaBits().removeFormulaBits(CdkFingerprintVersion.getComplete().getMaskFor(new CdkFingerprintVersion.USED_FINGERPRINTS[]{CdkFingerprintVersion.USED_FINGERPRINTS.SUBSTRUCTURE, CdkFingerprintVersion.USED_FINGERPRINTS.MACCS, CdkFingerprintVersion.USED_FINGERPRINTS.PUBCHEM}));

    public static void trainableFpPerformance(TrainedCSIFingerId trainedCSIFingerId, File file) {
        try {
            HashMap hashMap = new HashMap();
            int[] allowedIndizes = CDK_MASK.allowedIndizes();
            for (int i : allowedIndizes) {
                hashMap.put(Integer.valueOf(i), new PredictionPerformance(0.0d, 0.0d, 0.0d, 0.0d, 0.0d).modify());
            }
            MaskedFingerprintVersion maskedFingerprintVersion = trainedCSIFingerId.getMaskedFingerprintVersion();
            int[] allowedIndizes2 = maskedFingerprintVersion.getIntersection(CDK_MASK).allowedIndizes();
            Iterator it = Files.readLines(new File(file, "prediction_prediction.csv"), Charset.forName("UTF-8")).iterator();
            while (it.hasNext()) {
                String[] split = ((String) it.next()).split("\t");
                String str = split[3];
                boolean[] zArr = new boolean[str.length()];
                for (int i2 = 0; i2 < zArr.length; i2++) {
                    if (str.charAt(i2) == '1') {
                        zArr[i2] = true;
                    }
                }
                BooleanFingerprint booleanFingerprint = new BooleanFingerprint(maskedFingerprintVersion, zArr);
                double[] dArr = new double[str.length()];
                for (int i3 = 4; i3 < split.length; i3++) {
                    dArr[i3 - 4] = Double.parseDouble(split[i3]);
                }
                ProbabilityFingerprint probabilityFingerprint = new ProbabilityFingerprint(maskedFingerprintVersion, dArr);
                for (int i4 : allowedIndizes2) {
                    ((PredictionPerformance.Modify) hashMap.get(Integer.valueOf(i4))).update(booleanFingerprint.isSet(i4), probabilityFingerprint.getProbability(i4) >= 0.5d);
                }
            }
            BufferedWriter writer = FileUtils.getWriter(new File("trainable_indizes.csv"));
            try {
                writer.write(PredictionPerformance.csvHeader());
                for (int i5 : allowedIndizes) {
                    writer.write(String.valueOf(i5));
                    writer.write(9);
                    writer.write(((PredictionPerformance.Modify) hashMap.get(Integer.valueOf(i5))).done().toCsvRow());
                }
                if (writer != null) {
                    writer.close();
                }
            } finally {
            }
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    public static void prepare(File file) throws IOException {
        ChemicalDatabase chemicalDatabase;
        BufferedWriter writer;
        System.out.println("Molecular formula feature vector length: " + Canopus.getFormulaFeatures(MolecularFormula.parseOrNull("C6H12AsClN3")).length);
        TrainedCSIFingerId load = TrainedCSIFingerId.load(new File(file, "fingerid.data"));
        BufferedWriter writer2 = KernelToNumpyConverter.getWriter(new File("fingerprint_indizes.txt"));
        try {
            for (int i : load.getFingerprintIndizes()) {
                writer2.write(String.valueOf(i));
                writer2.newLine();
            }
            if (writer2 != null) {
                writer2.close();
            }
            List<FingerprintCandidate> filterCsvFiles = filterCsvFiles(load, file);
            HashSet hashSet = new HashSet(3000000);
            BufferedReader reader = KernelToNumpyConverter.getReader(new File("compounds.csv"));
            while (true) {
                try {
                    String readLine = reader.readLine();
                    if (readLine == null) {
                        break;
                    } else {
                        hashSet.add(readLine.substring(0, 14));
                    }
                } catch (Throwable th) {
                    if (reader != null) {
                        try {
                            reader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            }
            if (reader != null) {
                reader.close();
            }
            System.out.println("Using " + hashSet.size() + " entries");
            System.out.println("Download Fingerprints");
            System.out.flush();
            HashSet hashSet2 = new HashSet();
            Iterator<FingerprintCandidate> it = filterCsvFiles.iterator();
            while (it.hasNext()) {
                hashSet.remove(it.next().getInchiKey2D());
            }
            if (new File("fingerprints.csv").exists()) {
                HashSet hashSet3 = new HashSet(hashSet);
                for (String[] strArr : FileUtils.readTable(new File("fingerprints.csv"))) {
                    String str = strArr[1];
                    Objects.requireNonNull(hashSet2);
                    MolecularFormula.parseAndExecute(str, (v1) -> {
                        r1.add(v1);
                    });
                    hashSet3.remove(strArr[0]);
                }
                if (hashSet3.size() > 0) {
                    System.out.println(hashSet3 + " compounds are missing. Update fingerprints table.");
                    try {
                        chemicalDatabase = new ChemicalDatabase("fingerid1.bioinf.uni-jena.de:5432", "fingerid", "tV9QRQHn2THjq5HR");
                        try {
                            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("fingerprints.csv"), true));
                            try {
                                for (FingerprintCandidate fingerprintCandidate : chemicalDatabase.lookupFingerprintsByInchis(hashSet3)) {
                                    bufferedWriter.write(fingerprintCandidate.getInchiKey2D());
                                    bufferedWriter.write(9);
                                    MolecularFormula extractFormulaOrThrow = fingerprintCandidate.getInchi().extractFormulaOrThrow();
                                    hashSet2.add(extractFormulaOrThrow);
                                    bufferedWriter.write(extractFormulaOrThrow.toString());
                                    bufferedWriter.write(9);
                                    bufferedWriter.write(load.getMaskedFingerprintVersion().mask(fingerprintCandidate.getFingerprint().asArray()).asArray().toTabSeparatedString());
                                    bufferedWriter.newLine();
                                    hashSet3.remove(fingerprintCandidate.getInchiKey2D());
                                }
                                if (hashSet3.size() > 0) {
                                    System.out.println(hashSet3.size() + " compounds are missing. Those compounds are computed if a table with smiles, called smiles.csv, is provided.");
                                    if (new File("smiles.csv").exists()) {
                                        for (String[] strArr2 : FileUtils.readTable(new File("smiles.csv"))) {
                                            if (hashSet3.contains(strArr2[0])) {
                                                try {
                                                    if (strArr2[1].matches("[\\\\/@]")) {
                                                        IAtomContainer parseSmiles = new SmilesParser(SilentChemObjectBuilder.getInstance()).parseSmiles(strArr2[1]);
                                                        parseSmiles.setStereoElements(new ArrayList());
                                                        Iterator it2 = parseSmiles.bonds().iterator();
                                                        while (it2.hasNext()) {
                                                            ((IBond) it2.next()).setStereo(IBond.Stereo.NONE);
                                                        }
                                                        strArr2[1] = SmilesGenerator.unique().create(parseSmiles);
                                                    }
                                                    MolecularFormula parseOrThrow = MolecularFormula.parseOrThrow(MolecularFormulaManipulator.getString(MolecularFormulaManipulator.getMolecularFormula(new SmilesParser(SilentChemObjectBuilder.getInstance()).parseSmiles(strArr2[1]))));
                                                    ArrayFingerprint computeFingerprintFromSMILES = new FixedFingerprinter(CdkFingerprintVersion.getDefault()).computeFingerprintFromSMILES(strArr2[1]);
                                                    bufferedWriter.write(strArr2[0]);
                                                    bufferedWriter.write(9);
                                                    hashSet2.add(parseOrThrow);
                                                    bufferedWriter.write(parseOrThrow.toString());
                                                    bufferedWriter.write(9);
                                                    bufferedWriter.write(load.getMaskedFingerprintVersion().mask(computeFingerprintFromSMILES).asArray().toTabSeparatedString());
                                                    bufferedWriter.newLine();
                                                    hashSet3.remove(strArr2[0]);
                                                } catch (Throwable th3) {
                                                    th3.printStackTrace();
                                                }
                                            }
                                        }
                                        System.out.println(hashSet3.size() + " are still missing.");
                                    }
                                    writer = FileUtils.getWriter(new File("missing_smiles"));
                                    try {
                                        Iterator it3 = hashSet3.iterator();
                                        while (it3.hasNext()) {
                                            writer.write((String) it3.next());
                                            writer.newLine();
                                        }
                                        if (writer != null) {
                                            writer.close();
                                        }
                                    } finally {
                                    }
                                }
                                bufferedWriter.close();
                                chemicalDatabase.close();
                            } catch (Throwable th4) {
                                try {
                                    bufferedWriter.close();
                                } catch (Throwable th5) {
                                    th4.addSuppressed(th5);
                                }
                                throw th4;
                            }
                        } finally {
                        }
                    } catch (ChemicalDatabaseException e) {
                        e.printStackTrace();
                    }
                }
            } else {
                try {
                    chemicalDatabase = new ChemicalDatabase("fingerid1.bioinf.uni-jena.de:5432", "fingerid", "tV9QRQHn2THjq5HR");
                    try {
                        writer = FileUtils.getWriter(new File("trainable_fingerprints.csv"));
                        try {
                            writer = FileUtils.getWriter(new File("fingerprints.csv"));
                            try {
                                for (FingerprintCandidate fingerprintCandidate2 : filterCsvFiles) {
                                    writer.write(fingerprintCandidate2.getInchiKey2D());
                                    writer.write(9);
                                    MolecularFormula extractFormulaOrThrow2 = fingerprintCandidate2.getInchi().extractFormulaOrThrow();
                                    hashSet2.add(extractFormulaOrThrow2);
                                    writer.write(extractFormulaOrThrow2.toString());
                                    writer.write(9);
                                    writer.write(fingerprintCandidate2.getFingerprint().asArray().toTabSeparatedString());
                                    writer.newLine();
                                    writer.write(fingerprintCandidate2.getInchiKey2D());
                                    writer.write(9);
                                    writer.write(tryToFindFingerprint(chemicalDatabase, fingerprintCandidate2).toTabSeparatedString());
                                    writer.newLine();
                                }
                                writer.flush();
                                for (FingerprintCandidate fingerprintCandidate3 : chemicalDatabase.lookupFingerprintsByInchis(hashSet)) {
                                    writer.write(fingerprintCandidate3.getInchiKey2D());
                                    writer.write(9);
                                    MolecularFormula extractFormulaOrThrow3 = fingerprintCandidate3.getInchi().extractFormulaOrThrow();
                                    hashSet2.add(extractFormulaOrThrow3);
                                    writer.write(extractFormulaOrThrow3.toString());
                                    writer.write(9);
                                    writer.write(load.getMaskedFingerprintVersion().mask(fingerprintCandidate3.getFingerprint().asArray()).asArray().toTabSeparatedString());
                                    writer.newLine();
                                    writer.write(fingerprintCandidate3.getInchiKey2D());
                                    writer.write(9);
                                    writer.write(CDK_MASK.mask(fingerprintCandidate3.getFingerprint()).toTabSeparatedString());
                                    writer.newLine();
                                }
                                if (writer != null) {
                                    writer.close();
                                }
                                if (writer != null) {
                                    writer.close();
                                }
                                chemicalDatabase.close();
                            } finally {
                            }
                        } finally {
                        }
                    } finally {
                    }
                } catch (ChemicalDatabaseException e2) {
                    e2.printStackTrace();
                }
            }
            writeFormulaFeatures(hashSet2);
            trainableFpPerformance(load, file);
        } catch (Throwable th6) {
            if (writer2 != null) {
                try {
                    writer2.close();
                } catch (Throwable th7) {
                    th6.addSuppressed(th7);
                }
            }
            throw th6;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static ArrayFingerprint tryToFindFingerprint(ChemicalDatabase chemicalDatabase, FingerprintCandidate fingerprintCandidate) throws IOException {
        List lookupFingerprintsByInchi = chemicalDatabase.lookupFingerprintsByInchi(Arrays.asList(fingerprintCandidate));
        if (lookupFingerprintsByInchi.size() > 0) {
            return CDK_MASK.mask(((FingerprintCandidate) lookupFingerprintsByInchi.get(0)).getFingerprint()).asArray();
        }
        try {
            Fingerprinter forVersion = Fingerprinter.getForVersion(TrainingData.VERSION);
            return CDK_MASK.mask(new BooleanFingerprint(TrainingData.VERSION, forVersion.fingerprintsToBooleans(forVersion.computeFingerprints(forVersion.convertInchi2Mol(fingerprintCandidate.getInchi().in2D))))).asArray();
        } catch (CDKException e) {
            e.printStackTrace();
            throw new RuntimeException((Throwable) e);
        }
    }

    private static void writeFormulaFeatures(HashSet<MolecularFormula> hashSet) throws IOException {
        FormulaConstraints formulaConstraints = new FormulaConstraints("CHNOPSClBrBSeIFAs");
        Iterator<MolecularFormula> it = hashSet.iterator();
        while (it.hasNext()) {
            MolecularFormula next = it.next();
            if (next.getMass() > 2000.0d) {
                it.remove();
            } else if (formulaConstraints.isViolated(next, PeriodicTable.getInstance().neutralIonization())) {
                it.remove();
            } else if (next.numberOfCarbons() == 0 || next.numberOfHydrogens() == 0) {
                it.remove();
            } else if (next.rdbe() <= -1.0f) {
                it.remove();
            }
        }
        System.out.println(hashSet.size() + " formulas in total.");
        ArrayList arrayList = new ArrayList();
        BufferedWriter writer = KernelToNumpyConverter.getWriter(new File("formula_features.csv"));
        try {
            Iterator<MolecularFormula> it2 = hashSet.iterator();
            while (it2.hasNext()) {
                MolecularFormula next2 = it2.next();
                double[] formulaFeatures = Canopus.getFormulaFeatures(next2);
                arrayList.add(formulaFeatures);
                writer.write(next2.toString());
                for (double d : formulaFeatures) {
                    writer.write(9);
                    writer.write(String.valueOf(d));
                }
                writer.newLine();
            }
            if (writer != null) {
                writer.close();
            }
            new KernelToNumpyConverter().writeToFile(new File("formula_normalized.txt"), FormulaFeatureVector.normalizeAndCenter((double[][]) arrayList.toArray((Object[]) new double[arrayList.size()])));
        } catch (Throwable th) {
            if (writer != null) {
                try {
                    writer.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private static List<FingerprintCandidate> filterCsvFiles(TrainedCSIFingerId trainedCSIFingerId, File file) {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        try {
            MaskedFingerprintVersion maskedFingerprintVersion = trainedCSIFingerId.getMaskedFingerprintVersion();
            Iterator it = Files.readLines(new File(file, "prediction_prediction.csv"), Charset.forName("UTF-8")).iterator();
            while (it.hasNext()) {
                String[] split = ((String) it.next()).split("\t");
                String str = split[3];
                boolean[] zArr = new boolean[str.length()];
                for (int i = 0; i < zArr.length; i++) {
                    if (str.charAt(i) == '1') {
                        zArr[i] = true;
                    }
                }
                BooleanFingerprint booleanFingerprint = new BooleanFingerprint(maskedFingerprintVersion, zArr);
                String substring = split[1].substring(0, 14);
                if (!hashSet.contains(substring)) {
                    hashSet.add(substring);
                    arrayList.add(new FingerprintCandidate(InChIs.newInChI(substring, split[2]), booleanFingerprint.asArray()));
                }
            }
            return arrayList;
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
