diff --git a/pom.xml b/pom.xml index e2869f7b..e3d349c4 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ Please follow the naming scheme YEAR.MONTH.RELEASE_NO_OF_MONTH (eg. 2016.4.1 for second release in Apr 2016) --> - 2024.1.3 + 2024.2.1 OpenChemLib Open Source Chemistry Library @@ -209,7 +209,7 @@ scm:git:git@github.com:Actelion/openchemlib.git scm:git:git@github.com:Actelion/openchemlib.git https://github.com/Actelion/openchemlib - openchemlib-2024.1.3 + openchemlib-2024.2.1 diff --git a/src/main/java/com/actelion/research/calc/Matrix.java b/src/main/java/com/actelion/research/calc/Matrix.java index 0b3e1ec4..40f0840d 100644 --- a/src/main/java/com/actelion/research/calc/Matrix.java +++ b/src/main/java/com/actelion/research/calc/Matrix.java @@ -2954,6 +2954,21 @@ public double getVarianceCol(int col) { return var; } + public double getVarianceRow(int row) { + double var = 0; + + int cols = cols(); + double mean = getMeanRow(row); + double dSum = 0; + for (int i = 0; i < cols; i++) { + dSum += (data[row][i] - mean) * (data[row][i] - mean); + } + + var = dSum / (cols - 1.0); + + return var; + } + public double getVarianceCentered() { double var = 0; diff --git a/src/main/java/com/actelion/research/chem/AtomTypeList.java b/src/main/java/com/actelion/research/chem/AtomTypeList.java index 9954809b..aaec1751 100644 --- a/src/main/java/com/actelion/research/chem/AtomTypeList.java +++ b/src/main/java/com/actelion/research/chem/AtomTypeList.java @@ -42,6 +42,7 @@ import java.io.BufferedWriter; import java.io.FileWriter; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.TreeMap; import java.util.TreeSet; @@ -73,8 +74,8 @@ public AtomTypeList(int mode) { /** * Creates a new AtomTypeList from a given file using the given mode. - * If the the filename references a .typ file, then the mode is checked, whether it matches the file's content. - * If the the filename references a compound file, then the molecules are parsed and a new AtomTypeList is created + * If the filename references a .typ file, then the mode is checked, whether it matches the file's content. + * If the filename references a compound file, then the molecules are parsed and a new AtomTypeList is created * reflecting the all contained atom types. * @param filename either .typ file or a .dwar or .sdf compound file * @param mode @@ -84,7 +85,7 @@ public AtomTypeList(String filename, int mode) throws Exception { this(mode); if (filename.endsWith(".typ")) { - BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename))); + BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename), StandardCharsets.UTF_8)); String version =theReader.readLine(); if (!VERSION_STRING.equals(version)) { throw new Exception("Outdated atom type list file."); diff --git a/src/main/java/com/actelion/research/chem/MolfileParser.java b/src/main/java/com/actelion/research/chem/MolfileParser.java index 91168bbd..433ba6a4 100644 --- a/src/main/java/com/actelion/research/chem/MolfileParser.java +++ b/src/main/java/com/actelion/research/chem/MolfileParser.java @@ -47,6 +47,7 @@ import com.actelion.research.io.BOMSkipper; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.TreeMap; public class MolfileParser @@ -970,7 +971,7 @@ public boolean parse(StereoMolecule mol, File file) { mMol = mol; try{ - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(reader); return readMoleculeFromBuffer(reader); } catch(IOException e){ diff --git a/src/main/java/com/actelion/research/chem/chemicalspaces/ChemicalSpaceCreator.java b/src/main/java/com/actelion/research/chem/chemicalspaces/ChemicalSpaceCreator.java index de998704..e9a7cb3b 100644 --- a/src/main/java/com/actelion/research/chem/chemicalspaces/ChemicalSpaceCreator.java +++ b/src/main/java/com/actelion/research/chem/chemicalspaces/ChemicalSpaceCreator.java @@ -1,41 +1,22 @@ package com.actelion.research.chem.chemicalspaces; -import java.io.BufferedReader; +import com.actelion.research.chem.*; +import com.actelion.research.chem.chemicalspaces.synthon.SynthonCreator; +import com.actelion.research.chem.chemicalspaces.synthon.SynthonReactor; +import com.actelion.research.chem.descriptor.DescriptorHandlerLongFFP512; +import com.actelion.research.chem.io.DWARFileCreator; +import com.actelion.research.chem.reaction.Reaction; +import com.actelion.research.chem.reaction.Reactor; + import java.io.BufferedWriter; import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.stream.Collectors; -import com.actelion.research.chem.CanonizerUtil; -import com.actelion.research.chem.IDCodeParser; -import com.actelion.research.chem.Molecule; -import com.actelion.research.chem.SSSearcher; -import com.actelion.research.chem.SSSearcherWithIndex; -import com.actelion.research.chem.StereoMolecule; -import com.actelion.research.chem.chemicalspaces.synthon.SynthonCreator; -import com.actelion.research.chem.chemicalspaces.synthon.SynthonReactor; -import com.actelion.research.chem.descriptor.DescriptorHandlerLongFFP512; -import com.actelion.research.chem.io.DWARFileCreator; -import com.actelion.research.chem.io.RXNFileParser; -import com.actelion.research.chem.io.SDFileParser; -import com.actelion.research.chem.reaction.Reaction; -import com.actelion.research.chem.reaction.Reactor; - public class ChemicalSpaceCreator { @@ -87,7 +68,7 @@ public void create() { ConcurrentMap processedToOrigIDCode = new ConcurrentHashMap(); ConcurrentMap>> reactionsWithSynthons = new ConcurrentHashMap>>(); processBuildingBlocks(this.bbs,processedToOrigIDCode,functionalizations); - fps = new ConcurrentHashMap(); + fps = new ConcurrentHashMap<>(); calcFragFPs(processedToOrigIDCode.keySet(),fps); generateSynthons(reactions, processedToOrigIDCode, reactionsWithSynthons,fps,allSynthonTransformations); generateCombinatoriaLibraries(reactionsWithSynthons, bbs, allSynthonTransformations); @@ -179,7 +160,6 @@ private static void processReaction(Reaction rxn, ConcurrentMap reactionsWithSynthons.putIfAbsent(rxn.getName(), new ArrayList<>()); //System.out.println("bbs"); - for(int i=0;i rList = reactants.get(i); diff --git a/src/main/java/com/actelion/research/chem/chemicalspaces/synthon/SynthonCreator.java b/src/main/java/com/actelion/research/chem/chemicalspaces/synthon/SynthonCreator.java index 724fb751..2cf007fe 100644 --- a/src/main/java/com/actelion/research/chem/chemicalspaces/synthon/SynthonCreator.java +++ b/src/main/java/com/actelion/research/chem/chemicalspaces/synthon/SynthonCreator.java @@ -1,17 +1,5 @@ package com.actelion.research.chem.chemicalspaces.synthon; -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import com.actelion.research.chem.Molecule; import com.actelion.research.chem.RingCollection; import com.actelion.research.chem.StereoMolecule; @@ -19,9 +7,13 @@ import com.actelion.research.chem.io.RXNFileCreator; import com.actelion.research.chem.reaction.Reaction; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.Writer; +import java.util.*; + public class SynthonCreator { - - + /** * only works for reactions with a single product * @param rxn @@ -32,18 +24,15 @@ public static Reaction[] create(Reaction rxn) throws Exception { if(rxn.getProducts()>1) throw new Exception("only reactions with one product are supported"); Reaction[] synthonTransformations = new Reaction[rxn.getReactants()]; - Map mappedAtomToReactant = new HashMap(); //stores the information on which mapped atom in the product ist contributed by which reactant + Map mappedAtomToReactant = new HashMap<>(); //stores the information on which mapped atom in the product ist contributed by which reactant for(int r=0;r 0) { diff --git a/src/main/java/com/actelion/research/chem/descriptor/flexophore/MolDistHistHelper.java b/src/main/java/com/actelion/research/chem/descriptor/flexophore/MolDistHistHelper.java index e8b980e5..12637414 100644 --- a/src/main/java/com/actelion/research/chem/descriptor/flexophore/MolDistHistHelper.java +++ b/src/main/java/com/actelion/research/chem/descriptor/flexophore/MolDistHistHelper.java @@ -123,6 +123,30 @@ public static void setDistHistToOne(MolDistHist mdh){ } } + public static MolDistHist getEmptyMolDistHist(){ + PPNode ppNode0 = new PPNode(); + ppNode0.realize(); + + MolDistHist mdhEmpty = new MolDistHist(1); + mdhEmpty.addNode(ppNode0); + mdhEmpty.realize(); + + return mdhEmpty; + } + public static boolean isEmptyMolDistHist(MolDistHist mdh){ + + boolean empty = true; + if(mdh.getNumPPNodes()>1){ + empty = false; + } else if(mdh.getNumPPNodes()==1){ + if(mdh.getNode(0).getInteractionTypeCount()>0){ + empty = false; + } + } + + return empty; + } + public static MolDistHist getMostDistantPairOfNodes (MolDistHist mdh){ int n = mdh.getNumPPNodes(); diff --git a/src/main/java/com/actelion/research/chem/forcefield/mmff/Csv.java b/src/main/java/com/actelion/research/chem/forcefield/mmff/Csv.java index f3d5777d..ddbea80c 100644 --- a/src/main/java/com/actelion/research/chem/forcefield/mmff/Csv.java +++ b/src/main/java/com/actelion/research/chem/forcefield/mmff/Csv.java @@ -38,6 +38,7 @@ import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; /** * Basic CSV parser. The Csv class provides very basic CSV file parsing @@ -91,7 +92,7 @@ public static Object[][] readFile(String path) { try { String line; - br = new BufferedReader(new InputStreamReader(Csv.class.getResourceAsStream(path))); + br = new BufferedReader(new InputStreamReader(Csv.class.getResourceAsStream(path), StandardCharsets.UTF_8)); int size = Integer.parseInt(br.readLine().trim()); String[] format = br.readLine().trim().split(","); diff --git a/src/main/java/com/actelion/research/chem/interactionstatistics/InteractionDistanceStatistics.java b/src/main/java/com/actelion/research/chem/interactionstatistics/InteractionDistanceStatistics.java index dc38561f..8a5ae820 100644 --- a/src/main/java/com/actelion/research/chem/interactionstatistics/InteractionDistanceStatistics.java +++ b/src/main/java/com/actelion/research/chem/interactionstatistics/InteractionDistanceStatistics.java @@ -1,23 +1,17 @@ package com.actelion.research.chem.interactionstatistics; +import com.actelion.research.util.FastSpline; +import com.actelion.research.util.SmoothingSplineInterpolator; + +import java.io.*; +import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import java.util.stream.IntStream; -import com.actelion.research.util.FastSpline; -import com.actelion.research.util.SmoothingSplineInterpolator; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.URL; - public class InteractionDistanceStatistics { private static volatile InteractionDistanceStatistics instance = new InteractionDistanceStatistics(); //eager initialization @@ -194,9 +188,9 @@ public void readFromFile() throws IOException { } InputStream is = url.openStream(); //InputStream is = new FileInputStream(file); - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line; - while((line = reader.readLine())!=null && line.length()!=0) { + while((line = reader.readLine())!=null && !line.isEmpty()) { String s[] = line.split(" "); long l = Long.parseLong(s[0]); diff --git a/src/main/java/com/actelion/research/chem/io/AbstractParser.java b/src/main/java/com/actelion/research/chem/io/AbstractParser.java index 1385e6a2..56e4e86d 100644 --- a/src/main/java/com/actelion/research/chem/io/AbstractParser.java +++ b/src/main/java/com/actelion/research/chem/io/AbstractParser.java @@ -7,6 +7,7 @@ import com.actelion.research.chem.Molecule3D; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -42,10 +43,10 @@ public final List loadGroup(String fileName) throws Exception { if(fileName.toUpperCase().endsWith(".GZ")) { GZIPInputStream is = new GZIPInputStream(new FileInputStream(fileName)); - r = new InputStreamReader(is); + r = new InputStreamReader(is, StandardCharsets.UTF_8); } else if(fileName.toUpperCase().endsWith(".ZIP")) { ZipInputStream is = new ZipInputStream(new FileInputStream(fileName)); - r = new InputStreamReader(is); + r = new InputStreamReader(is, StandardCharsets.UTF_8); } else { r = new BufferedReader(new FileReader(fileName)); } diff --git a/src/main/java/com/actelion/research/chem/io/DWARFileParser.java b/src/main/java/com/actelion/research/chem/io/DWARFileParser.java index 46925e38..ff93562e 100644 --- a/src/main/java/com/actelion/research/chem/io/DWARFileParser.java +++ b/src/main/java/com/actelion/research/chem/io/DWARFileParser.java @@ -42,6 +42,7 @@ import com.actelion.research.util.BinaryDecoder; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.Properties; @@ -76,7 +77,7 @@ public class DWARFileParser extends CompoundFileParser implements DescriptorCons */ public DWARFileParser(String fileName) { try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); mMode = MODE_COORDINATES_PREFER_2D; init(); @@ -92,7 +93,7 @@ public DWARFileParser(String fileName) { */ public DWARFileParser(File file) { try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); mMode = MODE_COORDINATES_PREFER_2D; init(); @@ -124,7 +125,7 @@ public DWARFileParser(Reader reader) { */ public DWARFileParser(String fileName, int mode) { try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); mMode = mode; init(); @@ -141,7 +142,7 @@ public DWARFileParser(String fileName, int mode) { */ public DWARFileParser(File file, int mode) { try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); mMode = mode; init(); diff --git a/src/main/java/com/actelion/research/chem/io/RDFileParser.java b/src/main/java/com/actelion/research/chem/io/RDFileParser.java index 4df6d7e2..b935e57d 100644 --- a/src/main/java/com/actelion/research/chem/io/RDFileParser.java +++ b/src/main/java/com/actelion/research/chem/io/RDFileParser.java @@ -40,6 +40,7 @@ import com.actelion.research.io.BOMSkipper; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.TreeMap; /** @@ -59,7 +60,7 @@ public class RDFileParser { public RDFileParser(String fileName) { mNoOfRecords = 0; try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); readHeader(); } catch (IOException e) { @@ -70,7 +71,7 @@ public RDFileParser(String fileName) { public RDFileParser(File file) { mNoOfRecords = 0; try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); readHeader(); } catch (IOException e) { diff --git a/src/main/java/com/actelion/research/chem/io/RXNFileParser.java b/src/main/java/com/actelion/research/chem/io/RXNFileParser.java index f0b9e564..ae3560a6 100644 --- a/src/main/java/com/actelion/research/chem/io/RXNFileParser.java +++ b/src/main/java/com/actelion/research/chem/io/RXNFileParser.java @@ -40,6 +40,7 @@ import com.actelion.research.io.BOMSkipper; import java.io.*; +import java.nio.charset.StandardCharsets; public class RXNFileParser @@ -81,7 +82,7 @@ public Reaction getReaction(File file) throws Exception public Reaction getReaction(File file, boolean ignoreIdCode) throws Exception { Reaction theReaction = new Reaction(); - BufferedReader theReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + BufferedReader theReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(theReader); parse(theReaction, theReader, ignoreIdCode); @@ -111,7 +112,7 @@ public boolean parse(Reaction theReaction, File file) public boolean parse(Reaction theReaction, File file, boolean ignoreIdCode) throws Exception { - BufferedReader theReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + BufferedReader theReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(theReader); return parse(theReaction, theReader, ignoreIdCode); diff --git a/src/main/java/com/actelion/research/chem/io/SDFileParser.java b/src/main/java/com/actelion/research/chem/io/SDFileParser.java index c7e74d7c..45ecaea8 100644 --- a/src/main/java/com/actelion/research/chem/io/SDFileParser.java +++ b/src/main/java/com/actelion/research/chem/io/SDFileParser.java @@ -39,6 +39,7 @@ import com.actelion.research.io.BOMSkipper; import java.io.*; +import java.nio.charset.StandardCharsets; public class SDFileParser extends CompoundFileParser { private static final int DEFAULT_RECORDS_TO_INSPECT = 10240; @@ -61,7 +62,7 @@ public SDFileParser(String fileName, String[] fieldName) { mFieldName = fieldName; try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); } catch (IOException e) { mReader = null; @@ -81,7 +82,7 @@ public SDFileParser(File file, String[] fieldName) { mNoOfRecords = -1; mFieldName = fieldName; try { - mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + mReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); BOMSkipper.skip(mReader); } catch (IOException e) { mReader = null; diff --git a/src/main/java/com/actelion/research/chem/prediction/IncrementTable.java b/src/main/java/com/actelion/research/chem/prediction/IncrementTable.java index 7521f36b..a899dd9c 100644 --- a/src/main/java/com/actelion/research/chem/prediction/IncrementTable.java +++ b/src/main/java/com/actelion/research/chem/prediction/IncrementTable.java @@ -34,6 +34,7 @@ package com.actelion.research.chem.prediction; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; public class IncrementTable { @@ -44,7 +45,7 @@ protected IncrementTable() { } protected IncrementTable(String filename) throws Exception { - BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename))); + BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename), StandardCharsets.UTF_8)); mRecords = new ArrayList(); while (true) { String theLine = theReader.readLine(); diff --git a/src/main/java/com/actelion/research/chem/prediction/IncrementTableWithIndex.java b/src/main/java/com/actelion/research/chem/prediction/IncrementTableWithIndex.java index d9e22c7d..c4a039b7 100644 --- a/src/main/java/com/actelion/research/chem/prediction/IncrementTableWithIndex.java +++ b/src/main/java/com/actelion/research/chem/prediction/IncrementTableWithIndex.java @@ -39,6 +39,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; public class IncrementTableWithIndex { @@ -52,7 +53,7 @@ protected IncrementTableWithIndex() { protected IncrementTableWithIndex(String filename) throws Exception { - BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename))); + BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename), StandardCharsets.UTF_8)); String header = theReader.readLine(); if (!header.equals(cHeader)) diff --git a/src/main/java/com/actelion/research/chem/prediction/ToxicityPredictor.java b/src/main/java/com/actelion/research/chem/prediction/ToxicityPredictor.java index 7143b34c..3b559af1 100644 --- a/src/main/java/com/actelion/research/chem/prediction/ToxicityPredictor.java +++ b/src/main/java/com/actelion/research/chem/prediction/ToxicityPredictor.java @@ -43,6 +43,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; public class ToxicityPredictor { @@ -205,7 +206,7 @@ public ParameterizedStringList getDetail(StereoMolecule testMolecule, int riskTy private ArrayList readIDCodeFile(String filename) throws Exception { - BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename))); + BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename), StandardCharsets.UTF_8)); ArrayList fragments = new ArrayList(); while (true) { try { @@ -224,7 +225,7 @@ private ArrayList readIDCodeFile(String filename) throws Exception { private SortedStringList readAndSortIDCodeFile(String filename) throws Exception { - BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename))); + BufferedReader theReader = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(filename), StandardCharsets.UTF_8)); SortedStringList molecules = new SortedStringList(); while (true) { try { diff --git a/src/main/java/com/actelion/research/chem/reaction/RSSSearcher.java b/src/main/java/com/actelion/research/chem/reaction/RSSSearcher.java new file mode 100644 index 00000000..2be2442c --- /dev/null +++ b/src/main/java/com/actelion/research/chem/reaction/RSSSearcher.java @@ -0,0 +1,599 @@ +/* + * Project: DD_core + * @(#)ReactionRSS.java + * + * Copyright (c) 1997- 2014 + * Actelion Pharmaceuticals Ltd. + * Gewerbestrasse 16 + * CH-4123 Allschwil, Switzerland + * + * All Rights Reserved. + * + * This software is the proprietary information of Actelion Pharmaceuticals, Ltd. + * Use is subject to license terms. + * + * Author: Christian Rufener + */ + +package com.actelion.research.chem.reaction; + +import com.actelion.research.chem.SSSearcher; +import com.actelion.research.chem.StereoMolecule; +import com.actelion.research.util.ArrayUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Project: + * User: rufenec + * Date: 9/30/2014 + * Time: 1:38 PM + */ +public class RSSSearcher +{ + + private static final boolean debug = false; + private static final boolean debug2 = false; + + private static void debug(String format,Object ...args) + { + if (debug) + System.out.printf(format,args); + } + + private static void debug2(String format,Object ...args) + { + if (debug2) + System.out.printf(format,args); + } + + private static class MapsList extends ArrayList + { + public String toString() + { + String s = new String(); + for (int[] a : this) { + s += "\n {"; + for (int i : a) { + s += " " + i; + } + s += "}"; + } + return s; + } + } + + + /** + * Try to match a query reaction in a target reaction + * Algo: + * First we check that there are more product reactants than query reactants + * Idem for products + * + * @param queryRxn + * @param targetRxn + * @return + */ + public static boolean match(Reaction queryRxn, Reaction targetRxn) + { + + List reactantMatchList = new ArrayList(); + List productMatchList = new ArrayList(); + + int numOfQueryReactants = queryRxn.getReactants(); + int numOfTargetReactants = targetRxn.getReactants(); + int numOfQueryProducts = queryRxn.getProducts(); + int numOfTargetProducts = targetRxn.getProducts(); + + if (numOfQueryReactants > numOfTargetReactants || numOfQueryProducts > numOfTargetProducts) + return false; + + debug("MATCHING Start\n"); + + if (numOfQueryReactants <= numOfTargetReactants) { + debug("MATCHING Reactants\n"); + List reactantList = getReactants(targetRxn); + ListPermutator permutator = new ListPermutator(reactantList); + + while (permutator.hasNext()) { + List targetReactants = permutator.next(); + List matchList = new ArrayList(); + for (int i = 0; i < numOfQueryReactants; i++) { + StereoMolecule queryMol = queryRxn.getReactant(i); + // Since the targetReactant list is a permutation, + // we use the same index as for the query + StereoMolecule targetMol = targetReactants.get(i); +// debug("Attempt to match\n%s : %s\n%s : %s\n", +// queryMol.getIDCode(), queryMol, +// targetMol.getIDCode(), targetMol); + debug2("Matching reactant\n"); + MapsList matched = findMatchingMaps(queryMol, targetMol); + // molecules did not even match on SSS, so try another permutation + if (matched == null) { + debug("Did not match SSS\n"); + break; + } + if (matched.size() > 0) { + debug("Matched with matchlist\n"); + matchList = add(matchList, matched); + } else { + debug("NO match with matchlist\n"); + //break; // this seemed to be wrong + } + } + // Choose the best match: + // We consider longer match list as a better solution + if (getMaxSize(matchList) > getMaxSize(reactantMatchList)) { + reactantMatchList = matchList; + } + } + debug("Reactant matchlist %d\n", reactantMatchList.size()); + } + + + if (numOfQueryProducts <= numOfTargetProducts) { + debug("MATCHING PRODUCTS\n"); + List productList = getProducts(targetRxn); + ListPermutator permute = new ListPermutator(productList); + while (permute.hasNext()) { +// debug("Next product permutation\n"); + List targetProducts = permute.next(); + List matchList = new ArrayList(); + for (int i = 0; i < numOfQueryProducts; i++) { + StereoMolecule queryMol = queryRxn.getProduct(i); + StereoMolecule target = targetProducts.get(i); +// debug("Attempt to match\n%s : %s\n%s : %s\n", +// queryMol.getIDCode(), queryMol, +// target.getIDCode(), target); + debug2("Matching product\n"); + List matched = findMatchingMaps(queryMol, target); + // molecules did not even match on SSS, so try another permutation + if (matched == null) { + debug("Did not match SSS\n"); + break; + } + if (matched.size() > 0) { + debug("Matched with matchlist\n"); + matchList = add(matchList, matched); + } else { + debug("NO match with matchlist\n"); + //break; // this seemed to be wrong + // break; + } + } + if (getMaxSize(matchList) > getMaxSize(productMatchList)) { + // Found a better match, so use this? + productMatchList = matchList; + } + } + debug("Product matchlist ist " + productMatchList.size()); + } + boolean ok = false; + + // Open issues + // What if reactant Matchlist is empty +// for (int[] rs : reactantMatchList) { +//// Arrays.sort(rs); +// debug("Reactant List: "); +// for (int j : rs) { +// debug("%d,", j); +// } +// } +// debug("\n"); + +// for (int[] ps : productMatchList) { +//// Arrays.sort(ps); +// debug("Product List: "); +// for (int j : ps) { +// debug("%d,", j); +// } +// } +// debug("\n"); + + boolean sort = true; + if (sort) { + debug2("Sorting\n"); + for (int[] rs : reactantMatchList) { + Arrays.sort(rs); + } + for (int[] ps : productMatchList) { + Arrays.sort(ps); + } + } + + for (int[] rs : reactantMatchList) { + for (int[] ps : productMatchList) { + if (Arrays.equals(rs, ps)) { + ok = true; + return ok; + } + } + } +// debug("Query did not match!"); + return ok; + } + + /** + * Returns a List of sorted arrays containing the mapping numbers of the target molecule + * which have been matched by the query sub-structure + * Please note the list contains only valid mapping numbers. Unmapped atoms + * which have been matched as well are ignored + * If the structures did not match SSS wise null will be returned + * @param query + * @param target + * @return null if the simple SSS failed + */ + private static MapsList findMatchingMaps(StereoMolecule query, StereoMolecule target) + { + MapsList ret = new MapsList(); + // First Performance check: Don't SSS for bigger query mols + if (query.getAllAtoms() <= target.getAllAtoms()) { + // Get the list of map numbers of the query + int[] queryMaps = getMapList(query); + boolean found = false; + for (int i : queryMaps) { + if (i != 0) { + found = true; + break; + } + } + if (found) { + // Leave if query has no maps + SSSearcher searcher = new SSSearcher(); + boolean fragment = query.isFragment(); +// debug("Query check on target \n%s\n%s\n",query.getIDCode(),target.getIDCode()); + query.setFragment(true); + searcher.setMol(query, target); + int count = 0; + + // SSS first + if ((count = searcher.findFragmentInMolecule()) > 0) { + // so we found the query in the target {count} times + // Get list of the matched indizes of the target molecule + List sssMatchList = searcher.getMatchList(); + for (int i = 0; i < count; i++) { + int[] mapList = new int[0]; + // these are the indizes of the atoms found in the target + int[] matchedSSSAtoms = sssMatchList.get(i); + // Query Atom[n] matches matchedSSSAtoms[n] => Target Atom Index + + // queryMaps[0] = 5 ; means mapping number of query atom 0 is 5 + // matchedSSSAtoms[0] = 4 ; means query Atom 0 matched on Target Atom 4 + // targetMaps[0] = target.getAtomMapNo(matchedSSSAtoms[0]) => x; + + // Get the corresponding mapping numbers of these matched target atoms + int[] targetMaps = getMapList(target, matchedSSSAtoms); + + int index = mapList.length; + // Make room for more mapping numbers in the maplist + mapList = copyOf(mapList, mapList.length + matchedSSSAtoms.length); + // And append the mapping numbers + debug2("Query Map Arr\t: %s\n", ArrayUtils.toString(queryMaps)); + debug2("Target Map Arr\t: %s\n", ArrayUtils.toString(targetMaps)); + + for (int k = 0; k < matchedSSSAtoms.length; k++) { +// System.out.printf("targetMap[%d] = %d queryMap[%d] = %d\n",k,targetMaps[k],k,queryMaps[k]); + if (targetMaps[k] != 0 && queryMaps[k] != 0) { + mapList[index++] = targetMaps[k]; + } + } + // Remove the unmapped entries + mapList = removeZeros(mapList); + debug2("Query Map List\t: %s\n", ArrayUtils.toString(removeZeros(queryMaps))); + debug2("Target Map List\t: %s\n", ArrayUtils.toString(mapList)); + ret.add(mapList); + } +// debug("Matched!\n"); + } else { + ret = null; // signal not found! +// debug("Did not match!\n"); + } + query.setFragment(fragment); + } + } + return ret; + } + + + + public static boolean matchKeys(byte[] tK,byte[] qK) + { + if (qK == null || tK == null || qK.length != tK.length) + return false; + for (int i = 0; i < qK.length; i++) { + if (qK[i] > tK[i]) + return false; + } + return true; + } + + + private static int getMaxSize(List foo) + { + int size = 0; + for (int[] k : foo) { + size = Math.max(size,k.length); + } + return size; + } + + + + private static List getProducts(Reaction r) + { + List list = new ArrayList(); + for (int i = 0; i < r.getProducts(); i++) { + list.add(r.getProduct(i)); + } + return list; + } + + private static List getReactants(Reaction r) + { + List list = new ArrayList(); + for (int i = 0; i < r.getReactants(); i++) { + list.add(r.getReactant(i)); + } + return list; + } + + /* + Before adding + +++++++++++++++++++++++++++++++++++ + sourceList listToAdd + ___ ___ ___ + |3| |7| |4| + |2| |8| |3| + |1| |9| |4| + ___ |4| |5| + |5| |2| + ___ ___ + + After adding + +++++++++++++++++++++++++++++++++++ + sourceList listToAdd + ___ ___ + |3 |3| + |2| |2| + |1| |1| + |7| |4| + |8| |3| + |9| |4| + |4| |5| + |5| |2| + ___ ___ + */ + + /** + * Append target list(s) to source list(s) + * if # of targets > 1 the source lists needs to be cloned n-1 times + * so effectively do the cross product + * Maybe we need a simpler solution + * However the final check for equality of reactant and product mapping is simple then + * @param sourceList + * @param listToAdd + * @return + */ + private static List add(List sourceList, List listToAdd) + { + int sizeofListToAdd = listToAdd.size(); + int originalSourceListSize = sourceList.size(); + if (sizeofListToAdd > 1) { + int sourceListSize = sourceList.size(); + if (sourceListSize == 0) { + for (int[] t : listToAdd) { + sourceList.add(t); + } + } else { + // OK there are multiple lists to add + // clone the source list n-1 times + for (int i = 1; i < sizeofListToAdd; i++) { + for (int j = 0; j < sourceListSize; j++) { + int[] s = sourceList.get(j); + sourceList.add(s.clone()); + } + } + // Add the elements of each list to add at the end of the sourcelist + for (int i = 0; i < sizeofListToAdd; i++) { + int[] t = listToAdd.get(i); + for (int j = 0; j < originalSourceListSize; j++) { + int index = i * originalSourceListSize + j; + int[] s = sourceList.get(index); + // Create a new array from the current array to hold t.length more elements + // and copy the s.length no of elements into it + int[] q = copyOf(s, s.length + t.length); + // Append the t array at the end + System.arraycopy(t, 0, q, s.length, t.length); + // Replace the original element in the list + sourceList.set(index, q); + } + } + } + } else if (sizeofListToAdd == 1) { + if (sourceList.size() == 0) { + sourceList.add(listToAdd.get(0)); + } else { + int[] t = listToAdd.get(0); + for (int i = 0; i < sourceList.size(); i++) { + int[] s = sourceList.get(i); + int[] q = copyOf(s, s.length + t.length); + System.arraycopy(t, 0, q, s.length, t.length); + sourceList.set(i, q); + } + } + } + return sourceList; + } + + private static int[] removeZeros(int[] array) + { + int count = 0; + int[] t = new int[array.length]; + for (int i = 0; i < array.length; i++) { + if (array[i] != 0) { + t[count++] = array[i]; + } + } + return copyOf(t,count); + } + +// private static int[] sortAndRemoveZeros(int[] array) +// { +// Arrays.sort(array); +// int index = 0; +// for (int i = 0; i < array.length; i++) { +// if (array[i] == 0) { +// index++; +// } +// } +// return copyOfRange(array, index, array.length); +// } + + + /** + * Algorithm: + * (This is not 100% correct, but for now lets go with it) + * If query molecules are SSS of target molecules + * Find the (combined) mapping numbers for each side of the target reaction by using the SSS match list from each Q / T comparison + * If the mapping numbers on both sides of the target are equal then we have a match + * + * let ML={} + * for each query reactant: + * if query reactant matches SSS in target reactant + * let QL = matching atoms in target (matchlist) + * let AM = list of Atom Maps of QL + * let ML += AM + * end if + * end for + * + * let MP={} + * for each query product + * if query product matches SSS in target product + * let QL = matching atoms in target (matchlist) + * let AM = list of Atom Maps of QL + * let MP += AM + * end if + * end for + * + * ML = eliminate 0 map nos from ML and sort + * MP = eliminate 0 map nos from MP and sort + * + * if (ML == MP) + * => MATCH + * else + * -> NO MATCH + */ + + + static int[] getMapList(StereoMolecule m, int atoms[]) + { + int[] ret = new int[atoms.length]; + for (int i = 0; i < atoms.length; i++) { + ret[i] = m.getAtomMapNo(atoms[i]); + } + return ret; + } + + /** + * Returns an array of mapping number for this molecule, + * the index into this array corresponds to the atom index in the molecule + * Note: Unmapped mapping numbers are included + * + * @param m + * @return + */ + static int[] getMapList(StereoMolecule m) + { + int atoms = m.getAllAtoms(); + int[] ret = new int[atoms]; + for (int i = 0; i < atoms; i++) { + ret[i] = m.getAtomMapNo(i); + } + return ret; + } + + +// static int getAtomByMap(StereoMolecule m, int mapNo) +// { +// int atoms = m.getAllAtoms(); +// for (int i = 0; i < atoms; i++) { +// if (m.getAtomMapNo(i) == mapNo) { +// return i; +// } +// } +// return -1; +// } + + + + private static class ListPermutator //implements Iterator? + { + + int total; + int index = 0; + int count = 0; + List list; + + ListPermutator(List list) + { + this.list = new ArrayList(list); + total = fac(list.size()); + } + + boolean hasNext() + { + return count < total; + } + + List next() + { + if (count == 0) { + count++; + return list; + } + permute(list, index); + count++; + index = (index + 1) % (list.size() - 1); + return list; + } + + private void permute(List arr, int k) + { + java.util.Collections.swap(arr, k, k + 1); + } + + private int fac(int c) + { + if (c <= 1) { + return 1; + } + return c * fac(c - 1); + } + + + } + + + /** We need these since we need 1.5 compliance for ORACLE db i.e Cartridge */ + private static int[] copyOf(int[] original, int newLength) { + int[] copy = new int[newLength]; + System.arraycopy(original, 0, copy, 0, + Math.min(original.length, newLength)); + return copy; + } + + + private static int[] copyOfRange(int[] original, int from, int to) { + int newLength = to - from; + if (newLength < 0) + throw new IllegalArgumentException(from + " > " + to); + int[] copy = new int[newLength]; + System.arraycopy(original, from, copy, 0, + Math.min(original.length - from, newLength)); + return copy; + } + + +} diff --git a/src/main/java/com/actelion/research/chem/reaction/ReactionIndexer.java b/src/main/java/com/actelion/research/chem/reaction/ReactionIndexer.java new file mode 100644 index 00000000..ced244e4 --- /dev/null +++ b/src/main/java/com/actelion/research/chem/reaction/ReactionIndexer.java @@ -0,0 +1,1437 @@ +/* + * Project: DD_core + * @(#)ReactionIndexer.java + * + * Copyright (c) 1997- 2014 + * Actelion Pharmaceuticals Ltd. + * Gewerbestrasse 16 + * CH-4123 Allschwil, Switzerland + * + * All Rights Reserved. + * + * This software is the proprietary information of Actelion Pharmaceuticals, Ltd. + * Use is subject to license terms. + * + * Author: Christian Rufener + */ + +package com.actelion.research.chem.reaction; + +import com.actelion.research.chem.ExtendedMolecule; +import com.actelion.research.chem.Molecule; +import com.actelion.research.chem.StereoMolecule; +import com.actelion.research.chem.io.RXNFileParser; + +public class ReactionIndexer +{ + private static boolean debug = false; +/* + public static final int BOND_BREAK_1 = 0x0001; + public static final int BOND_BREAK_2 = 0x0002; + public static final int BOND_BREAK_3 = 0x0004; + public static final int BOND_BREAK_M = 0x0008; + + public static final int BOND_CREATE_1 = 0x0010; + public static final int BOND_CREATE_2 = 0x0020; + public static final int BOND_CREATE_3 = 0x0040; + public static final int BOND_CHANGE_13 = 0x0080; + + public static final int BOND_CHANGE_12 = 0x0100; + public static final int BOND_CHANGE_21 = 0x0200; + public static final int BOND_CHANGE_23 = 0x0400; + public static final int BOND_CHANGE_32 = 0x0800; + public static final int BOND_CHANGE_31 = 0x1000; + public static final int BOND_CHANGE_A2 = 0x2000; + public static final int BOND_CHANGE_A1 = 0x4000; + public static final int BOND_CHANGE_O = 0x8000; + + public static final int ATOM_C_C = 0x00010000; + public static final int ATOM_C_O = 0x00020000; + public static final int ATOM_C_N = 0x00040000; + public static final int ATOM_C_X = 0x00080000; + public static final int ATOM_C_M = 0x00100000; +*/ + + private static final byte CR_ONLY = 0; + // Carbon Create Keys + private static final byte CR_CC1 = 1; + private static final byte CR_CC2 = 2; + private static final byte CR_CC3 = 3; + private static final byte CR_CCA = 4; + private static final byte CR_CN1 = 5; + private static final byte CR_CN2 = 6; + private static final byte CR_CN3 = 7; + private static final byte CR_CO1 = 8; + private static final byte CR_CO2 = 9; + private static final byte CR_CCl = 10; + private static final byte CR_CX = 11; + + // Nitrogen Create Keys + private static final byte CR_NN1 = 12; + private static final byte CR_NN2 = 13; + private static final byte CR_NO1 = 14; + private static final byte CR_NO2 = 15; + + // Change Carbon Keys + private static final byte CH_CCSD = 16; + private static final byte CH_CCDS = 17; + private static final byte CH_CCST = 18; + private static final byte CH_CCTS = 19; + private static final byte CH_CCDT = 20; + private static final byte CH_CCTD = 21; + private static final byte CH_CCSA = 22; + private static final byte CH_CCAS = 23; + private static final byte CH_CCDA = 24; + private static final byte CH_CCAD = 25; + private static final byte CH_CCTA = 26; + private static final byte CH_CCAT = 27; + + private static final byte CH_CNSD = 28; + private static final byte CH_CNDS = 29; + private static final byte CH_CNST = 30; + private static final byte CH_CNTS = 31; + private static final byte CH_CNDT = 32; + private static final byte CH_CNTD = 33; + private static final byte CH_CNSA = 34; + private static final byte CH_CNAS = 35; + private static final byte CH_CNDA = 36; + private static final byte CH_CNAD = 37; + + private static final byte CH_COSD = 38; + private static final byte CH_CODS = 39; + + + private static final byte DEL_ONLY = 40; + private static final byte DEL_CCS = 41; + private static final byte DEL_CCD = 42; + private static final byte DEL_CCT = 43; + private static final byte DEL_CCA = 44; + private static final byte DEL_CNS = 45; + private static final byte DEL_CND = 46; + private static final byte DEL_CNT = 47; + private static final byte DEL_COS = 48; + private static final byte DEL_COT = 49; + private static final byte DEL_CF = 50; + private static final byte DEL_CCl = 51; + private static final byte DEL_CBR = 52; + private static final byte DEL_CX = 53; + + // Nitrogen Create Keys + private static final byte DEL_NN1 = 54; + private static final byte DEL_NN2 = 55; + private static final byte DEL_NO1 = 56; + private static final byte DEL_NO2 = 57; + + private static final byte CH_ONLY = 58; + + public static final byte NUMKEYS = 1 + CH_ONLY; + + public static final int NITROGEN = 7; + public static final int OXYGEN = 8; + public static final int FLUORINE = 9; + public static final int CHLORINE = 17; + public static final int BROMINE = 35; + public static final int IODINE = 53; + public static final int ASTATINE = 85; + + private static final int CARBON = 6; + + private static final String[] KEY_STRING = { + "CR_ONLY", + "CR_CC1", + "CR_CC2", + "CR_CC3", + "CR_CCA", + "CR_CN1", + "CR_CN2", + "CR_CN3", + "CR_CO1", + "CR_CO2", + "CR_CCl", + "CR_CX", + "CR_NN1", + "CR_NN2", + "CR_NO1", + "CR_NO2", + "CH_CCSD", + "CH_CCDS", + "CH_CCST", + "CH_CCTS", + "CH_CCDT", + "CH_CCTD", + "CH_CCSA", + "CH_CCAS", + "CH_CCDA", + "CH_CCAD", + "CH_CCTA", + "CH_CCAT", + "CH_CNSD", + "CH_CNDS", + "CH_CNST", + "CH_CNTS", + "CH_CNDT", + "CH_CNTD", + "CH_CNSA", + "CH_CNAS", + "CH_CNDA", + "CH_CNAD", + "CH_COSD", + "CH_CODS", + "DEL_ONLY", + "DEL_CC1", + "DEL_CC2", + "DEL_CC3", + "DEL_CCA", + "DEL_CN1", + "DEL_CN2", + "DEL_CN3", + "DEL_CO1", + "DEL_CO2", + "DEL_CF ", + "DEL_CCl", + "DEL_CBR", + "DEL_CX ", + "DEL_NN1", + "DEL_NN2", + "DEL_NO1", + "DEL_NO2", + "CH_ONLY", + }; + + private byte[] rxnKeys = null; + + public ReactionIndexer(boolean debug) + { + this.debug = debug; + init(); + } + + public ReactionIndexer() + { + this(false); + } + + private void init() + { + rxnKeys = new byte[NUMKEYS]; + } + + public String getKeysString(Reaction rxn) + { + init(); + generateKeys(rxn); + return getKeysString(); + + } + + public static String getKeyName(int key) + { + return KEY_STRING[key]; + } + public byte[] getKeys(Reaction rxn) + { + init(); + generateKeys(rxn); + byte[] ret = new byte[rxnKeys.length]; + System.arraycopy(rxnKeys, 0, ret, 0, rxnKeys.length); + return ret; + } + + public boolean hasKeys() + { + for (int k : rxnKeys) { + if (k != 0) { + return true; + } + } + return false; + } + + private void addKey(int index) + { + if (index >= 0 && rxnKeys[index] < Byte.MAX_VALUE) { + rxnKeys[index]++; + } + } + + private void initKeys() + { + init(); + } + + + public String getFoundKeys() + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < NUMKEYS; i++) { + if (rxnKeys[i] > 0) { + sb.append(String.format("%s (%d)\n",KEY_STRING[i],(int)rxnKeys[i])); + } + } + return sb.toString(); + + } + public static byte[] getKeysFromString(String s) + { + byte[] ret = new byte[NUMKEYS]; + if (s != null) { + if (s.length() != NUMKEYS) + throw new RuntimeException("Invalid KeyString Length"); + for (int i = 0; i < NUMKEYS; i++) { + ret[i] = (byte)(s.charAt(i)-'0'); + } + } + + return ret; + } + + + public static String getKeysString(byte[] keys) + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < NUMKEYS; i++) { + sb.append(String.format("%1d", (int) keys[i])); + } + return sb.toString(); + } + + private String getKeysString() + { + return getKeysString(rxnKeys); + } + + private int addCreateKey(int bondtype, int atomSymbol1, int atomSymbol2) + { + int key = CR_ONLY; + int a1 = atomSymbol1; + int a2 = atomSymbol2; + if (atomSymbol2 < atomSymbol1) { + a1 = atomSymbol2; + a2 = atomSymbol1; + } + switch (a1) { + case CARBON: + key = getCarbonCreateKey(bondtype, a2); + break; + case NITROGEN: + key = getNitroCreateKey(bondtype, a2); + break; + + } + return key; + } + + private int getCarbonCreateKey(int bondtype, int atomNo) + { + int key = CR_ONLY; + switch (atomNo) { + case CARBON: + key = getCreateCarbonCarbonKey(bondtype); + break; + + case NITROGEN: + key = getCreateCarbonNitroKey(bondtype); + break; + + case OXYGEN: + key = getCreateCarbonOxygenKey(bondtype); + break; + + case CHLORINE: + key = CR_CCl; + break; + + case FLUORINE: + case BROMINE: + case IODINE: + case ASTATINE: + key = CR_CX; + break; + + } + return key; + } + + private int getNitroCreateKey(int bondtype, int atomNo) + { + int key = CR_ONLY; + switch (atomNo) { + case NITROGEN: + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + key = CR_NN1; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + key = CR_NN2; + } + break; + + case OXYGEN: + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + key = CR_NO1; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + key = CR_NO2; + } + break; + } + return key; + } + + private int getCreateCarbonCarbonKey(int bondtype) + { + if (bondtype == Molecule.cBondTypeDelocalized) { + return CR_CCA; + } else if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + return CR_CC1; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + return CR_CC2; + } else if ((bondtype & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + return CR_CC3; + } + return CR_ONLY; + } + + private int getCreateCarbonNitroKey(int bondtype) + { + + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + return CR_CN1; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + return CR_CN2; + } else if ((bondtype & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + return CR_CN3; + } + return CR_ONLY; + } + + private int getCreateCarbonOxygenKey(int bondtype) + { + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + return CR_CO1; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + return CR_CO2; + } + return CR_ONLY; + } + + ///////// Change//////// + private void addChangeKey(int atomNo1, int atomNo2, int bt1, int bt2) + { + addKey(CH_ONLY); + int a1 = atomNo1; + int a2 = atomNo2; + if (atomNo2 < atomNo1) { + a1 = atomNo2; + a2 = atomNo1; + } + switch (a1) { + case CARBON: + addCarbonChangeKey(a2, bt1, bt2); + break; + } + //return key; + } + + private void addCarbonChangeKey(int atomNo, int bt1, int bt2) + { + int key = CH_ONLY; + switch (atomNo) { + case CARBON: + addChangeCarbonCarbonKey(bt1, bt2); + break; + + case NITROGEN: + addChangeCarbonNitroKey(bt1, bt2); + break; + + case OXYGEN: + addChangeCarbonOxygenKey(bt1, bt2); + break; + + } +// return key; + } + + + private void addChangeCarbonCarbonKey(int bt1, int bt2) + { +// int key = CH_ONLY; + if (bt1 == Molecule.cBondTypeDelocalized) { + if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CCAS); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CCAD); + } else if ((bt2 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(CH_CCAT); + } + } else if ((bt1 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_CCSA); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CCSD); + } else if ((bt2 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(CH_CCST); + } + } else if ((bt1 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_CCDA); + } else if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CCDS); + } else if ((bt2 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(CH_CCDT); + } + } else if ((bt1 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_CCTA); + } else if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CCTS); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CCTD); + } + } + +// return key; + } + + private void addChangeCarbonNitroKey(int bt1, int bt2) + { +// int addKey(CH_ONLY; + if (bt1 == Molecule.cBondTypeDelocalized) { + if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CNAS); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CNAD); + } + } else if ((bt1 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_CNSA); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CNSD); + } else if ((bt2 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(CH_CNST); + } + } else if ((bt1 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_CNDA); + } else if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CNDS); + } else if ((bt2 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(CH_CNDT); + } + } else if ((bt1 & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CNTS); + } else if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_CNTD); + } + } + +// return key; + } + + private void addChangeCarbonOxygenKey(int bt1, int bt2) + { +// int addKey(CH_ONLY; + if ((bt1 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + if ((bt2 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(CH_COSD); + } + } else if ((bt1 & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + if ((bt2 & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(CH_CODS); + } + } + //return key; + } + + + /* + private int getChangeKey(int bt1, int bt2) + { + int addKey(-1; + if (bt1 == Molecule.cBondTypeDelocalized) { + if ((bt2 & Molecule.cBondTypeSingle) ==Molecule.cBondTypeSingle) { + addKey(CH_AS; + }else if ((bt2 & Molecule.cBondTypeDouble) ==Molecule.cBondTypeDouble) { + addKey(CH_AD; + }else if ((bt2 & Molecule.cBondTypeTriple) ==Molecule.cBondTypeTriple) { + addKey(CH_AT; + } + } else if ((bt1 & Molecule.cBondTypeSingle) ==Molecule.cBondTypeSingle) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_SA; + } else if ((bt2 & Molecule.cBondTypeDouble) ==Molecule.cBondTypeDouble) { + addKey(CH_SD; + }else if ((bt2 & Molecule.cBondTypeTriple) ==Molecule.cBondTypeTriple) { + addKey(CH_ST; + } + } else if ((bt1 & Molecule.cBondTypeDouble) ==Molecule.cBondTypeDouble) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_DA; + } else if ((bt2 & Molecule.cBondTypeSingle) ==Molecule.cBondTypeSingle) { + addKey(CH_DS; + }else if ((bt2 & Molecule.cBondTypeTriple) ==Molecule.cBondTypeTriple) { + addKey(CH_DT; + } + } else if ((bt1 & Molecule.cBondTypeTriple) ==Molecule.cBondTypeTriple) { + if (bt2 == Molecule.cBondTypeDelocalized) { + addKey(CH_TA; + } else if ((bt2 & Molecule.cBondTypeSingle) ==Molecule.cBondTypeSingle) { + addKey(CH_TS; + }else if ((bt2 & Molecule.cBondTypeDouble) ==Molecule.cBondTypeDouble) { + addKey(CH_TD; + } + } + + return key; + } + */ + private void addDeleteKey(int bondtype, int atomNo1, int atomNo2) + { + addKey(DEL_ONLY); + int a1 = atomNo1; + int a2 = atomNo2; + if (atomNo2 < atomNo1) { + a1 = atomNo2; + a2 = atomNo1; + } + switch (a1) { + case CARBON: + addCarbonDeleteKey(bondtype, a2); + break; + case NITROGEN: + addNitroDeleteKey(bondtype, a2); + break; + + } + } + + private void addCarbonDeleteKey(int bondtype, int atomNo) + { +// int addKey(DEL_ONLY; + switch (atomNo) { + case CARBON: + addDeleteCarbonCarbonKey(bondtype); + break; + + case NITROGEN: + addDeleteCarbonNitroKey(bondtype); + break; + + case OXYGEN: + addDeleteCarbonOxygenKey(bondtype); + break; + case FLUORINE: + case CHLORINE: + case 35: + case IODINE: + addHalogenDeleteKey(atomNo); + break; + + } +// addKey(); + } + + private void addDeleteCarbonCarbonKey(int bondtype) + { + if (bondtype == Molecule.cBondTypeDelocalized) { + addKey(DEL_CCA); + } else if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(DEL_CCS); + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(DEL_CCD); + } else if ((bondtype & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(DEL_CCT); + } + //addKey(DEL_ONLY; + } + + private void addDeleteCarbonNitroKey(int bondtype) + { + + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(DEL_CNS); + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(DEL_CND); + } else if ((bondtype & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + addKey(DEL_CNT); + } + //return DEL_ONLY; + } + + private void addDeleteCarbonOxygenKey(int bondtype) + { + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(DEL_COS); + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(DEL_COT); + } + //return DEL_ONLY; + } + + private void addNitroDeleteKey(int bondtype, int atomNo) + { +// int addKey(DEL_ONLY; + switch (atomNo) { + case NITROGEN: + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(DEL_NN1); + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(DEL_NN2); + } + break; + + case 8: + if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + addKey(DEL_NO1); + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + addKey(DEL_NO2); + } + break; + } +// return key; + } + + private void addHalogenDeleteKey(int atomNo) + { + addKey(DEL_CX); + switch (atomNo) { + case FLUORINE: + addKey(DEL_CF); + break; + case CHLORINE: + addKey(DEL_CCl); + break; + case BROMINE: + addKey(DEL_CBR); + break; + case IODINE: + break; + } + //return key; + } + + +/* + C1 C5 C1 C5 + \ / \ / + C2-Cl N4 --> C2 - N4 + / \ / \ + C3 C6 C3 C6 + + C1 C1 + \ \ + C2 = O3 --> C2 - 03 + / / + C3 C3 + + C1 C5 C1 C5 + \ / \ / + C2 - N4 --> C2 N4 + / \ / \ + C3 C6 C3 C6 + + + C1 C C5 C1 C5 + \ / / \ / + C2 - N N4 --> C2 - N4 + / \ \ / \ + C3 C C6 C3 C6 + + +*/ +// C1 -> Map No = 1, pAtomNo = x, cConntectedAtoms = 1, pNumConnectedAtoms=1 +// cBondAtom = (C2), bondType = 1, cOtherMapNo = 2, pBondType=1 = > NO CHANGE +// C2 -> Map No = 2, pAtomNo = x, cConntextedAtoms = 3, pNumConnectedAtoms = 3 +// C1 == OK +// C3 == OK +// cBondAtom (Cl),cBondType=1,cOtherMapNo=0 +// + + /* + // For all atoms connected to Component Atoms (cAtomNo) + for (int cbi = 0; cbi < cNumConnectedAtoms; cbi++) { + // get the other atom at this bond + int cBondAtom = source.getConnAtom(cAtomNo, cbi); + // Get the bond index from these two atoms + int sourceBondIndex = source.getConnBond(cAtomNo, cbi); + if (cVisitedBonds[sourceBondIndex]) + continue; + // Find the mapping number of the bond's other end + int cOtherMapNo = source.getAtomMapNo(cBondAtom); + // get the bond type of this atom + int cBondType = getBondType(source, sourceBondIndex); + ////sb.append(String.format("Source Bond From %d(%d) - %d(%d) \n",cAtomNo,cMapNo,cBondAtom,cOtherMapNo)); + + cVisitedBonds[sourceBondIndex] = true; + // the other end is mapped + if (cOtherMapNo != 0) { + // find the cont type of the two mapped atoms on the preoduct + int pBondType = getMappedBondType(target, cMapNo, cOtherMapNo); + //sb.append(String.format("Proiduct Bond Type (%d) - (%d) = %d \n",cMapNo,cOtherMapNo,pBondType)); + // If there's no bond, then the bond has been brokwn + if (pBondType == -1) { + if (!reverse) { + System.out.println(String.format("###BOND_BREAK(1) at %d-%d \n", cOtherMapNo, cMapNo)); + int key = addDeleteKey(cBondType, source.getAtomicNo(cAtomNo), source.getAtomicNo(cBondAtom)); + if (debug) + System.out.println("Delete Key is " + key); + addKey(key); + addKey(DEL_ONLY); + } else { + System.out.println(String.format("###BOND_CREATE(1) at %d-%d \n", cOtherMapNo, cMapNo)); + int key = addCreateKey(cBondType, source.getAtomicNo(cAtomNo), source.getAtomicNo(cBondAtom)); + addKey(key); + addKey(CR_ONLY); + } + if (debug) + sb.append(String.format("Bond %d %s-%s\n", sourceBondIndex, source.getAtomLabel(cAtomNo), source.getAtomLabel(cBondAtom))); + // BOND_BREAK; + // If the bond type is the same, nothing happend + } else if (cBondType == pBondType) { + ; + // the bond type is different, so it's a change + } else { + if (!reverse) { + System.out.println(String.format("###BOND_CHANGE(1) Type=%d Map %d-%d \n", pBondType, cOtherMapNo, cMapNo)); + int key = getChangeKey(source.getAtomicNo(cAtomNo), source.getAtomicNo(cBondAtom), cBondType, pBondType); + addKey(key); + addKey(CH_ONLY); + } + // BOND_CHANGE + } + + } else { + System.out.println(String.format("not found other map no %d %d", listIndex, cMapNo)); + //if (!cVisitedBonds[sourceBondIndex]) { + // System.out.println(String.format("## UNDEFINED",listIndex,cMapNo)); + //} + // Does this bond break + } + } + if (!reverse) { + if (cNumConnectedAtoms < pNumConnectedAtoms) { + System.out.println(String.format("searching backward? %d/%d (%d)", cNumConnectedAtoms, pNumConnectedAtoms, cMapNo)); + int bondIndex = findDeletedBond(target, pAtomNo, source, cAtomNo); + if (bondIndex != -1 && !cVisitedBonds[bondIndex]) { + int ba = target.getConnAtom(pAtomNo, bondIndex); + int type = target.getAtomicNo(ba); + int bt = getBondType(target, bondIndex); + System.out.println(String.format("### BOND_CREATE(2) at Target Atom %d(%d)[%d] from Source Atom %d(%d)", pAtomNo, cMapNo, type, cAtomNo, cMapNo)); + // BOND_CREATE + int key = addCreateKey(bt, target.getAtomicNo(pAtomNo), target.getAtomicNo(ba)); + addKey(key); + addKey(CR_ONLY); + } + } else if (cNumConnectedAtoms > pNumConnectedAtoms) { + System.out.println(String.format("searching forward? %d/%d (%d)", cNumConnectedAtoms, pNumConnectedAtoms, cMapNo)); + int bondIndex = findDeletedBond(source, cAtomNo, target, pAtomNo); + if (bondIndex != -1 && !cVisitedBonds[bondIndex]) { + int ba = source.getConnAtom(cAtomNo, bondIndex); + int type = source.getAtomicNo(ba); + int bt = getBondType(target, bondIndex); + System.out.println(String.format("### BOND_DELETE(2) at Target Atom %d(%d)[%d] from Source Atom %d(%d)", pAtomNo, cMapNo, type, cAtomNo, cMapNo)); + // BOND_CREATE + int key = addDeleteKey(bt, source.getAtomicNo(cAtomNo), source.getAtomicNo(ba)); + addKey(key); + addKey(DEL_ONLY); + } + } + } + } else { + } + } + } + } else { + + } + sb.append(getKeysString()); + return sb.toString(); + } + */ + private int findDeletedBond(ExtendedMolecule source, int sourceAtom, ExtendedMolecule target, int targetAtom) + { + int sourceNumConnectedAtoms = source.getConnAtoms(sourceAtom); + int originAtomType = source.getAtomicNo(sourceAtom); + int targetNumConnectedAtoms = target.getConnAtoms(targetAtom); + boolean visitedSourceBonds[] = new boolean[sourceNumConnectedAtoms]; + if (sourceNumConnectedAtoms > targetNumConnectedAtoms) { + if (debug) { + System.out.println(String.format("findDeletedBond() walk source atom %d", sourceAtom)); + } +// Walk all bonds at this atom + boolean visitedTargetBonds[] = new boolean[targetNumConnectedAtoms]; + for (int i = 0; i < sourceNumConnectedAtoms; i++) { + if (!visitedSourceBonds[i]) { + // This is the atom of the bond + int sourceBondAtom = source.getConnAtom(sourceAtom, i); +// And this is the bond + int sourceBond = source.getConnBond(sourceAtom, i); +// The bond type + int sourceBondType = getBondType(source, sourceBond); +// The atom type + int sourceAtomType = source.getAtomicNo(sourceBondAtom); + if (debug) { + System.out.println(String.format("findDeletedBond() walk source bond atom %d bond %d type %d atomtype %d", sourceBondAtom, i, sourceBondType, sourceAtomType)); + } + for (int j = 0; j < targetNumConnectedAtoms; j++) { + if (!visitedTargetBonds[j]) { + int bondAtom = target.getConnAtom(targetAtom, j); + int bond = target.getConnBond(targetAtom, j); + int targetBondType = getBondType(target, bond); + int targetAtomType = target.getAtomicNo(bondAtom); + if (debug) { + System.out.println(String.format("findDeletedBond() walk checking bond %d->%d (%d/%d) %d/%d", sourceAtomType, targetAtomType, i, j, sourceBondType, targetBondType)); + } + if (sourceAtomType == targetAtomType && sourceBondType == targetBondType) { + visitedSourceBonds[i] = true; + visitedTargetBonds[j] = true; + System.out.println("Found Bond"); + break; + } + } + } + if (!visitedSourceBonds[i]) { + if (debug) { + System.out.println(String.format("findDeletedBond() walk source bond not found atom %d bond %d type %d atomtype %d->%d", + + sourceBondAtom, i, sourceBondType, originAtomType, sourceAtomType)); + } + } + } + } + for (int i = 0; i < sourceNumConnectedAtoms; i++) { + if (!visitedSourceBonds[i]) { + if (debug) { + System.out.println(String.format("Did not find bond: %d(%d)", sourceAtom, i)); + } +// this bond has not been seen, so it's breacking + return i; + } + } + } + return -1; + } + + private boolean findBond(ExtendedMolecule mol, int atomIndex, int atomType, int bondType) + { + int pNumConnectedAtoms = mol.getConnAtoms(atomIndex); + return false; + } + + /* + ExtendedMolecule findMappedAtomMol(int mapno) + { + ExtendedMolecule ret = null; + int prod = rxn.getProducts(); + for (int i = 0; i < prod; i++) { + ExtendedMolecule m = rxn.getMolecule(i); + for (int a = 0; a < m.getAllAtoms(); a++) { + if (m.getAtomMapNo(a) == mapno) { + ret = m; + break; + } + } + } + return ret; + } + */ + int getMappedAtom(ExtendedMolecule m, int map1) + { + int a1 = -1; + for (int a = 0; a < m.getAllAtoms(); a++) { + if (m.getAtomMapNo(a) == map1) { + a1 = a; + break; + } + } + return a1; + } + + int getMappedBondType(ExtendedMolecule m, int map1, int map2) + { + int ret = -1; // Bond Type + int bond = getMappedBond(m, map1, map2); + if (bond != -1) { + return getBondType(m, bond); + } + return ret; + } + + private int getBondType(ExtendedMolecule m, int bond) + { + int ret = m.getBondType(bond); + if (m.isAromaticBond(bond) || m.isDelocalizedBond(bond)) { + ret = Molecule.cBondTypeDelocalized; + } + return getSimpleBondType(ret); + } + + private int getSimpleBondType(int bondtype) + { + if (bondtype == Molecule.cBondTypeDelocalized) { + return Molecule.cBondTypeDelocalized; + } else if ((bondtype & Molecule.cBondTypeSingle) == Molecule.cBondTypeSingle) { + return Molecule.cBondTypeSingle; + } else if ((bondtype & Molecule.cBondTypeDouble) == Molecule.cBondTypeDouble) { + return Molecule.cBondTypeDouble; + } else if ((bondtype & Molecule.cBondTypeTriple) == Molecule.cBondTypeTriple) { + return bondtype & Molecule.cBondTypeTriple; + } + return bondtype; + } + + int getMappedBond(ExtendedMolecule m, int map1, int map2) + { + int ret = -1; // Bond + int a1 = getMappedAtom(m, map1); + if (a1 != -1) { + int bcnt = m.getConnAtoms(a1); + for (int b = 0; b < bcnt; b++) { + int a2 = m.getConnAtom(a1, b); +// int a2 = m.getBondAtom(a1,b); + int mt = m.getAtomMapNo(a2); + if (debug) { + System.out.println(String.format("Checking Product bond %d-%d : Product Atom %d has map no %d", a1, a2, a2, mt)); + } + if (mt == map2) { + ret = m.getConnBond(a1, b); + break; + } + } + } + return ret; + } + +// public int getKey(ExtendedMolecule s, int a1, int a2, int bond) +// { +// return 0; +// } + + + private int getSameBond(ExtendedMolecule source, int cBondAtom, int cBondIndex, ExtendedMolecule target, int targetAtom) + { + int cAtomSymbol = source.getAtomicNo(cBondAtom); + int map = source.getAtomMapNo(cBondAtom); +// int cBondAtom = source.getConnAtom(sourceAtom,cbi); +// int cBondIndex = source.getConnBond(sourceAtom,cbi); + int cBondType = getBondType(source, cBondIndex); + int targetBondCount = target.getConnAtoms(targetAtom); + for (int tbi = 0; tbi < targetBondCount; tbi++) { + int tBondAtom = target.getConnAtom(targetAtom, tbi); + int tBondIndex = target.getConnBond(targetAtom, tbi); + int tBondType = getBondType(target, tBondIndex); + int tAtomSymbol = target.getAtomicNo(tBondAtom); + int tmap = target.getAtomMapNo(tBondAtom); + if (cAtomSymbol == tAtomSymbol && cBondType == tBondType && map == tmap) { + return tBondIndex; + } + } + return -1; + } + + private int getBond(ExtendedMolecule mol, int atom1, int atom2) + { + int ret = -1; + int bondCount = mol.getConnAtoms(atom1); + for (int tbi = 0; tbi < bondCount; tbi++) { + int tBondAtom = mol.getConnAtom(atom1, tbi); + if (tBondAtom == atom2) { + ret = mol.getConnBond(atom1, tbi); + break; + } + } + return ret; + } + + private int findMapAt(ExtendedMolecule mol, int atom, int map) + { + int ret = -1; + int bondCount = mol.getConnAtoms(atom); + int m = mol.getAtomMapNo(atom); + if (m == map) { + ret = atom; + } + if (ret == -1) { + for (int tbi = 0; tbi < bondCount; tbi++) { + int tBondAtom = mol.getConnAtom(atom, tbi); + m = mol.getAtomMapNo(tBondAtom); + if (m == map) { + ret = tBondAtom; + break; + } + } + } + return ret; + } + + private void generateKeys(Reaction rxn) + { + + // (1) + // FIND ATOM WITH MAP NO IN TARGET + // FOR ALL ATOMS ON TARGET + // IF MAP(ATOM)== MAP + // TATOM = ATOM + // END FOR + + //(2) + // for all bonds at targetatom + // if map(atom)== map + // return atom + // end for + + StringBuilder sb = new StringBuilder(); + ExtendedMolecule m = new ExtendedMolecule(); + initKeys(); + if (debug) { + System.out.println(" " + m.toString()); + } + int comp = rxn.getReactants(); + int prod = rxn.getProducts(); + int flags = Molecule.cHelperBitNeighbours | Molecule.cHelperBitRings; + + if (comp != 0 && prod != 0) { + if (debug) { + System.out.println("RXNSearcher.search()"); + } + ExtendedMolecule component = new StereoMolecule(rxn.getMolecule(0)); + for (int c = 1; c < comp; c++) { + ExtendedMolecule mol = rxn.getMolecule(c); + component.addMolecule(mol); + } + component.ensureHelperArrays(flags); + //sb.append(String.format("Component has %d atoms\n",component.getAllAtoms())); + + ExtendedMolecule product = new StereoMolecule(rxn.getProduct(0)); + for (int c = 1; c < prod; c++) { + product.addMolecule(rxn.getProduct(c)); + } + product.ensureHelperArrays(flags); + + ExtendedMolecule source = component; + ExtendedMolecule target = product; + + // All Mapped atom indizes + int mappedSourceAtoms[] = new int[source.getAllAtoms()]; + int mappedTargetAtoms[] = new int[target.getAllAtoms()]; + boolean cVisitedBonds[] = new boolean[source.getAllBonds()]; + + //sb.append(String.format("source has %d atoms\n",source.getAllAtoms())); + int noMappedSourceAtoms = 0; + for (int a = 0; a < source.getAllAtoms(); a++) { + int map = source.getAtomMapNo(a); + if (map > 0) { + mappedSourceAtoms[noMappedSourceAtoms++] = a; + } + + } + + + int noMappedTargetAtoms = 0; + for (int a = 0; a < target.getAllAtoms(); a++) { + int map = target.getAtomMapNo(a); + if (map > 0) { + mappedTargetAtoms[noMappedTargetAtoms++] = a; + } + + } + + // setup SOURCEBOND and TARGETBOND ARRAY based on MAPPED ATOMS + boolean sourceBonds[] = new boolean[source.getAllBonds()]; + boolean targetBonds[] = new boolean[target.getAllBonds()]; + + +// for (int k = 0; k < idx; k++) { +//// System.out.printf("Map [%d] -> [%d]\n",k,source.getAtomMapNo(k)); +// int s = source.getConnAtoms(k); +// for (int b = 0; b < s; b++) { +// int a2 = source.getConnAtom(k, b); +// System.out.printf("Map #[%d] -> M[%d] = #[%d] -> M[%d]\n", +// k,source.getAtomMapNo(k),a2, +// source.getAtomMapNo(a2)); +// } +// } +// if (true) +// return ""; + + // for all mapped atoms + for (int k = 0; k < noMappedSourceAtoms; k++) { + // sourceAtom + int sourceAtom = mappedSourceAtoms[k]; + int sourceMapNo = source.getAtomMapNo(sourceAtom); + // targetAtom = find atom with same map no as sourceAtom (1) + int targetAtom = getMappedAtom(target, sourceMapNo); + debug("Working on atom %d (%d) target atom is %d\n", sourceMapNo, sourceAtom, targetAtom); + + // if not found + // break; + if (targetAtom == -1) { + break; + } + +// debug("Map first (%d) (%d-%d)\n",sourceMapNo, sourceAtom,targetAtom); + + // boundCounts at source and target atom + int sourceBondCount = source.getConnAtoms(sourceAtom); + int targetBondCount = target.getConnAtoms(targetAtom); + // for all bonds at source Atom + for (int cbi = 0; cbi < sourceBondCount; cbi++) { + int sourceBondConnectedAtom = source.getConnAtom(sourceAtom, cbi); + int sourceBondIndex = source.getConnBond(sourceAtom, cbi); + int sourceBondType = getBondType(source, sourceBondIndex); + // if sourceBond is not visited + if (!sourceBonds[sourceBondIndex]) { + // amap = map(bondAtom) + int sourceBondConnectedAtomMap = source.getAtomMapNo(sourceBondConnectedAtom); + debug("\tChecking Source Bond on atom %d (%d)\n", + sourceBondConnectedAtomMap, sourceBondConnectedAtom); + + // if bondAtom is mapped + if (sourceBondConnectedAtomMap != 0) { + // find atom with amap as at targetAtom bonds + int targetBondAtom = findMapAt(target, targetAtom, sourceBondConnectedAtomMap); +// debug("Mapping source bond %d (%d-%d) (%d-%d)\n", +// cbi, +// sourceMapNo, +// sourceBondConnectedAtomMap, +// sourceAtom, +// sourceBondConnectedAtom +// ); + // if same map is found + if (targetBondAtom != -1) { + int targetBondIndex = getBond(target, targetAtom, targetBondAtom); + // TARGETBOND is visited + targetBonds[targetBondIndex] = true; + // if bondtype differs + int targetBondType = getBondType(target, targetBondIndex); + // The bond types don't match + if (targetBondType != sourceBondType) { + // its a change + // sourceBond[i] is visited + if (debug) { + System.out.println(String.format("Map: %d-%d bond CHANGE %d/%d", sourceMapNo, sourceBondConnectedAtomMap, sourceBondType, targetBondType)); + } + sourceBonds[sourceBondIndex] = true; + addChangeKey(source.getAtomicNo(sourceAtom), source.getAtomicNo(sourceBondConnectedAtom), sourceBondType, targetBondType); + //addKey(key); + //addKey(CH_ONLY); + // else + } else { + // sourceBond[i] is visited + sourceBonds[sourceBondIndex] = true; + // no change + // end if + } + // else (map not found) + } else { + // its a break; + // sourceBond[i] is visited + if (debug) { + System.out.println(String.format("Map: %d-%d bond BREAK %d", sourceMapNo, sourceBondConnectedAtomMap, sourceBondType)); + } + sourceBonds[sourceBondIndex] = true; + addDeleteKey(sourceBondType, source.getAtomicNo(sourceAtom), source.getAtomicNo(sourceBondConnectedAtom)); +// addKey(key); +// addKey(DEL_ONLY); + // ??? don't mark bond as visited we want to find the CREATE + // end if + } + } else { + ; + // Not a mapped atom, we will deal with this in the next section + // System.out.println(String.format("Map: %d Not mapped atom", sourceMapNo)); + // not mapped atom + // ignore + } + } // end if + } // end for + + // Deal with the non-mapped atoms... + // for all bonds at source Atom + for (int cbi = 0; cbi < sourceBondCount; cbi++) { + int cBondAtom = source.getConnAtom(sourceAtom, cbi); + int cBondIndex = source.getConnBond(sourceAtom, cbi); + int cBondType = getBondType(source, cBondIndex); +// int cAtomSymbol = source.getAtomicNo(cBondAtom); + // if sourceBond is not visited + // System.out.println(String.format("Map %d running at atom %d map %d",sourceMapNo,cBondAtom,source.getAtomMapNo(cBondAtom))); + if (!sourceBonds[cBondIndex]) { + int cbMap = source.getAtomMapNo(cBondAtom); + // if bondAtom is not mapped + if (cbMap == 0) { + int tBondIndex = getSameBond(source, cBondAtom, cBondIndex, target, targetAtom); + if (tBondIndex != -1) { + // sourcebond is visited + // TARGETBOND IS VISITED + sourceBonds[cBondIndex] = true; + targetBonds[tBondIndex] = true; + // System.out.println(String.format("No Map/No change -> found bond index %d ", sourceMapNo)); + // no change + // else + } else { + // BREAK_BOND + // sourcebond is visited +// if (debug) +// System.out.println(String.format("Map: %d (%d-%d) Atom Type: %d bond at atom " + +// "BREAK", +// sourceMapNo, +// sourceAtom,cBondAtom, +// source.getAtomicNo(cBondAtom))); + sourceBonds[cBondIndex] = true; + addDeleteKey(cBondType, source.getAtomicNo(sourceAtom), source.getAtomicNo(cBondAtom)); + if (debug) { + System.out.println(String.format("Map: %d (%d-%d) Atom Type: %d bond at atom " + + "DELETE", + sourceMapNo, + sourceAtom, cBondAtom, + source.getAtomicNo(cBondAtom))); + } + +// addKey(key); + //addKey(DEL_ONLY); + // end if + } + } else { + // This should not happen, since we dealt with this already before + if (debug) { + System.out.println(String.format("Map: %d Found SHOULD NOT", cbMap)); + } + } // end if + } else { + // The bond has already been + // else + //??? + } + // end if + // end for + } + // for all target bounds which have not yet been visited + // they might be created, however, we'd need to check whether there's a mapping +// if (sourceBondCount < targetBondCount) { + // for all bonds at target atom + for (int tbi = 0; tbi < targetBondCount; tbi++) { + int tBondIndex = target.getConnBond(targetAtom, tbi); + int tBondType = getBondType(target, tBondIndex); + int tAtom = target.getConnAtom(targetAtom, tbi); + int tmap = target.getAtomMapNo(tAtom); + int an = target.getAtomicNo(tAtom); + // if targetbond is not visited + if (!targetBonds[tBondIndex]) { + //those are CREATE_BONDS + if (debug) { + System.out.println(String.format("Map: %d-%d (%d) bond at atom CREATE_2", sourceMapNo, tmap, an)); + } + targetBonds[tBondIndex] = true; + int key = addCreateKey(tBondType, target.getAtomicNo(targetAtom), target.getAtomicNo(tAtom)); + addKey(key); + addKey(CR_ONLY); + + } //endif + } // end for +// } //endif + } //end for all mapped atoms + } + } + + public void debug(String format, Object... args) + { + if (debug) + System.out.printf(format, args); + + } + + public static void main(String args[]) + { + ReactionIndexer rs = new ReactionIndexer(); + try { + + for (int i = 1; i <= 11; i++) { + RXNFileParser p = new RXNFileParser(); + Reaction r = new Reaction(); + p.parse(r, new java.io.File("RXN" + i + ".rxn")); + if (debug) { + System.out.println("RXN: " + r.getMolecules()); + } + rs.getKeys(r); + } + } catch (Throwable e) { + System.err.println("Error parsing RDFILE " + e); + e.printStackTrace(); + } + } + + +} + + +/* +class RDFileReader +{ + BufferedReader rd = null; + StringBuilder rxn = null; + StringBuilder data = null; + + public RDFileReader(InputStream is) throws IOException + { + rd = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); + String line; + line = rd.readLine(); + if (line == null || !line.startsWith("$RDFILE ")) + throw new IOException("Invalid File Header"); + line = rd.readLine(); + if (line == null) + throw new IOException("Invalid File Header"); + line = rd.readLine(); + if (line == null || !line.startsWith("$RFMT")) + throw new IOException("File is empty"); + } + + boolean hasNext() + { + boolean ret = false; + StringBuilder r = new StringBuilder(); + StringBuilder d = new StringBuilder(); + // The default buffer is the reaction buffer + StringBuilder sb = r; + String line; + try { + boolean eof = true; + while ((line = rd.readLine()) != null) { + eof = false; + if (line.startsWith("$RFMT")) { +// System.out.println("End of This RXN..." +line); + break; + } + // Switch the buffer to the data buffer + if (line.startsWith("$DTYPE")) { +// System.out.println("Switching To Data Buffer " + line); + sb = data; + } + sb.append(line); + sb.append("\n"); + } + if (!eof) { + data = d; + rxn = r; + ret = true; + } + } catch (IOException e) { + ret = false; + } + return ret; + } + + public Reaction getReaction() + { + Reaction r = new Reaction(); + RXNFileParser p = new RXNFileParser(); + try { + r = p.getReaction(rxn.toString()); + } catch (Exception e) { + System.err.println("Error parsing reaction..."); + r = null; + } + return r; + } + +} +*/ diff --git a/src/main/java/com/actelion/research/gui/hidpi/HiDPIHelper.java b/src/main/java/com/actelion/research/gui/hidpi/HiDPIHelper.java index 465cb671..d56476dc 100644 --- a/src/main/java/com/actelion/research/gui/hidpi/HiDPIHelper.java +++ b/src/main/java/com/actelion/research/gui/hidpi/HiDPIHelper.java @@ -10,6 +10,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; public class HiDPIHelper { // This is an Apple only solution and needs to be adapted to support high-res displays of other vendors @@ -77,20 +78,24 @@ else if (Platform.isMacintosh()) { sUIScaleFactor = 1.0f; } else if (Platform.isWindows()) { - try { - // with JRE8 we used (float)UIManager.getFont("Label.font").getSize() / 12f - sUIScaleFactor = Toolkit.getDefaultToolkit().getScreenResolution() / 96f; + // only do scaling if jre <= 1.8 + if (System.getProperty("java.version").startsWith("1.")) { + try { + // with JRE8 we used (float)UIManager.getFont("Label.font").getSize() / 12f + sUIScaleFactor = Toolkit.getDefaultToolkit().getScreenResolution() / 96f; + } catch (HeadlessException hle) { + sUIScaleFactor = 1.0f; } - catch (HeadlessException hle) { + } else { sUIScaleFactor = 1.0f; - } } + } else { // Linux; Toolkit.getDefaultToolkit().getScreenResolution() always returns 1.0 try { sUIScaleFactor = 1.0f; // default in case of error Process process = Runtime.getRuntime().exec("xrdb -q"); process.waitFor(); - BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); + BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)); String line; while ((line = br.readLine()) != null) { if (line.startsWith("Xft.dpi:")) { diff --git a/src/main/java/com/actelion/research/util/IO.java b/src/main/java/com/actelion/research/util/IO.java index 2315f228..22647987 100644 --- a/src/main/java/com/actelion/research/util/IO.java +++ b/src/main/java/com/actelion/research/util/IO.java @@ -40,6 +40,7 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.NumberFormat; @@ -92,7 +93,7 @@ public static BufferedReader getBufferedReader(String sAbsolutePathIn) throws Fi if (sAbsolutePathIn.length() > 0) { FileInputStream fis = new FileInputStream(sAbsolutePathIn); - InputStreamReader isr = new InputStreamReader(fis); + InputStreamReader isr = new InputStreamReader(fis, StandardCharsets.UTF_8); bufferedReader = new BufferedReader(isr); } @@ -507,7 +508,7 @@ public static void skipUntilLineMatchesRegEx(InputStream in, String regex)throws } public static String read(InputStream is) throws IOException { - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line=null; StringBuilder sb = new StringBuilder(); while ((line = reader.readLine())!=null) { @@ -697,7 +698,7 @@ public static List readLines2List(File file) throws IOException { public static List readLines2List(InputStream is) throws IOException { List li = new ArrayList(); - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line = null; diff --git a/src/main/java/com/actelion/research/util/StringFunctions.java b/src/main/java/com/actelion/research/util/StringFunctions.java index 95c2b7e4..ff81a0fa 100644 --- a/src/main/java/com/actelion/research/util/StringFunctions.java +++ b/src/main/java/com/actelion/research/util/StringFunctions.java @@ -957,6 +957,22 @@ public static String toString(boolean [] arr){ return sb.toString(); } + public static String toString(boolean [] [] arr){ + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < arr.length; i++) { + for (int j = 0; j < arr[i].length; j++) { + if (arr[i][j]) { + sb.append(1); + } else { + sb.append(0); + } + } + sb.append("\n"); + } + + return sb.toString(); + } public static String toString(List li, NumberFormat nf){ diff --git a/src/main/java/com/actelion/research/util/datamodel/IntVec.java b/src/main/java/com/actelion/research/util/datamodel/IntVec.java index fec104c9..e6ffa6b6 100644 --- a/src/main/java/com/actelion/research/util/datamodel/IntVec.java +++ b/src/main/java/com/actelion/research/util/datamodel/IntVec.java @@ -43,6 +43,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; @@ -871,7 +872,7 @@ public void read(String s) { List li = new ArrayList(); try { - BufferedReader buf = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + BufferedReader buf = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); while(buf.ready()) { String s = buf.readLine(); diff --git a/src/main/java/org/openmolecules/chem/conf/gen/RigidFragmentCache.java b/src/main/java/org/openmolecules/chem/conf/gen/RigidFragmentCache.java index e814acef..2d195407 100644 --- a/src/main/java/org/openmolecules/chem/conf/gen/RigidFragmentCache.java +++ b/src/main/java/org/openmolecules/chem/conf/gen/RigidFragmentCache.java @@ -174,7 +174,7 @@ public synchronized void loadDefaultCache() { if (is != null) { ZipInputStream zipStream = new ZipInputStream(is); zipStream.getNextEntry(); - BufferedReader reader = new BufferedReader(new InputStreamReader(zipStream)); + BufferedReader reader = new BufferedReader(new InputStreamReader(zipStream, StandardCharsets.UTF_8)); loadCache(reader); reader.close(); mDefaultCacheLoaded = true; @@ -203,7 +203,7 @@ public void loadCache(String cacheFileName) { if (cacheFileName.endsWith(".zip")) { ZipInputStream zipStream = new ZipInputStream(new FileInputStream(cacheFileName)); zipStream.getNextEntry(); - reader = new BufferedReader(new InputStreamReader(zipStream)); + reader = new BufferedReader(new InputStreamReader(zipStream, StandardCharsets.UTF_8)); } else { reader = new BufferedReader(new FileReader(cacheFileName));