Skip to content

Commit

Permalink
Mol2 format implementation addressing theochem#78 and theochem#37
Browse files Browse the repository at this point in the history
  • Loading branch information
Esteban Vohringer-Martinez authored and Esteban Vohringer-Martinez committed May 29, 2019
1 parent 7d40670 commit 15246e9
Show file tree
Hide file tree
Showing 3 changed files with 373 additions and 0 deletions.
152 changes: 152 additions & 0 deletions iodata/formats/mol2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""MOL2 file format.
There are different formats of mol2 files. Here the compatibility with AMBER software
was the main objective to write out files with atomic charges used by antechamber.
"""


from typing import TextIO, Iterator, Tuple

import numpy as np

from ..docstrings import (document_load_one, document_load_many, document_dump_one,
document_dump_many)
from ..iodata import IOData
from ..periodic import sym2num, num2sym
from ..utils import angstrom, LineIterator


__all__ = []


PATTERNS = ['*.mol2']


@document_load_one("MOL2", ['atcoords', 'atnums', 'atcharges', 'atffparams'], ['title'])
def load_one(lit: LineIterator) -> dict:
"""Do not edit this docstring. It will be overwritten."""
molecule_found = False
while True:
try:
line = next(lit)
except StopIteration:
break
if len(line) > 1:
words = line.split()
if words[0] == "@<TRIPOS>MOLECULE":
# Found another molecule; go one line back and break
if molecule_found:
lit.back(line)
break
title = next(lit).strip()
if words[0] == "@<TRIPOS>ATOM":
atnums, atcoords, atchgs, attypes = _load_helper_atoms(lit)
atcharges = {"mol2": atchgs}
atffparams = {"attypes": attypes}
result = {
'atcoords': atcoords,
'atnums': atnums,
'atcharges': atcharges,
'atffparams': atffparams
}
if title is not None:
result['title'] = title
molecule_found = True
if molecule_found is False:
raise lit.error("Molecule could not be read")
return result


def _load_helper_atoms(lit: LineIterator) -> Tuple[np.ndarray, np.ndarray, np.ndarray, tuple]:
"""Load element numbers, coordinates and atomic charges."""
atnums = []
atcoords = []
atchgs = []
attypes = []
for line in lit:
words = line.split()
if len(words) <= 6:
break
# Assume that the first character of atom type is the element
try:
atnums.append(sym2num.get(words[5][0].title()))
except ValueError:
print(f'Can not convert atom type {words[5][0]} to element')
attypes.append(words[5])
atcoords.append([float(words[2]), float(words[3]), float(words[4])])
if len(words) == 9:
atchgs.append(float(words[8]))
else:
atchg.append(0.0)
atnums = np.array(atnums, int)
atcoords = np.array(atcoords) * angstrom
atchgs = np.array(atchgs)
attypes = tuple(attypes)
return atnums, atcoords, atchgs, attypes


@document_load_many("MOL2", ['atcoords', 'atnums', 'atcharges', 'atffparams'], ['title'])
def load_many(lit: LineIterator) -> Iterator[dict]:
"""Do not edit this docstring. It will be overwritten."""
# MOL2 files with more molecules are a simple concatenation of individual MOL2 files,'
# making it trivial to load many frames.
while True:
try:
yield load_one(lit)
except IOError:
return


@document_dump_one("MOL2", ['atcoords', 'atnums'], ['atcharges', 'atffparams', 'title'])
def dump_one(f: TextIO, data: IOData):
"""Do not edit this docstring. It will be overwritten."""
# The first six lines are reserved for comments
print("# Mol2 file created with Iodata", file=f)
print("\n\n\n\n\n", file=f)
print("@<TRIPOS>MOLECULE", file=f)
print(data.title or 'Created with IOData', file=f)
print("@<TRIPOS>ATOM", file=f)
for i in range(data.natom):
n = num2sym[data.atnums[i]]
x, y, z = data.atcoords[i] / angstrom
out1 = f'{i:6d} {n:2s} {x:10.4f} {y:10.4f} {z:10.4f} '
if not data.atcharges:
# Will write the first atomic charge key of the dictionary
charge = data.atcharges[0][i]
if not data.attparams:
for param, value in data.atffparams.items():
if param == 'attypes':
attype = values[i]
out2 = f'{attype:3s} 1 XXX {charge:1.5f}'
else:
out2 = f'{n:2s} 1 XXX {charge:1.5f}'
print(out1 + out2, file=f)
else:
out2 = f'{n:2s} 1 XXX 0.0000'
print(out1 + out2, file=f)


@document_dump_many("MOL2", ['atcoords', 'atnums', 'atcharges'], ['title'])
def dump_many(f: TextIO, datas: Iterator[IOData]):
"""Do not edit this docstring. It will be overwritten."""
# Similar to load_many, this is relatively easy.
for data in datas:
dump_one(f, data)
114 changes: 114 additions & 0 deletions iodata/test/data/caffeine.mol2
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
@<TRIPOS>MOLECULE
ZINC00001084
24 25 0 0 0
SMALL
USER_CHARGES

@<TRIPOS>ATOM
1 C1 -0.0178 1.4608 0.0101 C.3 1 <0> 0.0684
2 N1 0.0021 -0.0041 0.0020 N.pl3 1 <0> -0.4126
3 C2 0.0085 -0.7996 -1.0853 C.2 1 <0> 0.2683
4 N2 0.0275 -2.0544 -0.7142 N.2 1 <0> -0.4991
5 C3 0.0343 -2.1318 0.6190 C.2 1 <0> 0.3175
6 C4 0.0238 -0.8312 1.1071 C.2 1 <0> -0.1559
7 C5 0.0338 -0.6202 2.5028 C.2 1 <0> 0.5949
8 O1 0.0219 0.5106 2.9550 O.2 1 <0> -0.5132
9 N3 0.0564 -1.6860 3.3300 N.am 1 <0> -0.5520
10 C6 0.0693 -2.9392 2.8440 C.2 1 <0> 0.7070
11 O2 0.0891 -3.8755 3.6197 O.2 1 <0> -0.5319
12 N4 0.0527 -3.1841 1.5201 N.am 1 <0> -0.5109
13 C7 0.0693 -4.5651 1.0316 C.3 1 <0> 0.0891
14 C8 0.0674 -1.4741 4.7795 C.3 1 <0> 0.0836
15 H1 1.0048 1.8381 0.0023 H 1 <0> 0.0879
16 H2 -0.5450 1.8219 -0.8730 H 1 <0> 0.0987
17 H3 -0.5280 1.8123 0.9069 H 1 <0> 0.1006
18 H4 -0.0012 -0.4540 -2.1085 H 1 <0> 0.2318
19 H5 1.1010 -4.8936 0.9058 H 1 <0> 0.0732
20 H6 -0.4324 -5.2114 1.7518 H 1 <0> 0.0936
21 H7 -0.4482 -4.6180 0.0738 H 1 <0> 0.0925
22 H8 -0.9574 -1.4357 5.1489 H 1 <0> 0.0769
23 H9 0.5971 -2.2951 5.2627 H 1 <0> 0.0968
24 H10 0.5705 -0.5340 5.0055 H 1 <0> 0.0949
@<TRIPOS>BOND
1 1 2 1
2 1 15 1
3 1 16 1
4 1 17 1
5 2 6 1
6 2 3 1
7 3 4 2
8 3 18 1
9 4 5 1
10 5 12 1
11 5 6 2
12 6 7 1
13 7 8 2
14 7 9 am
15 9 10 am
16 9 14 1
17 10 11 2
18 10 12 am
19 12 13 1
20 13 19 1
21 13 20 1
22 13 21 1
23 14 22 1
24 14 23 1
25 14 24 1
@<TRIPOS>MOLECULE
ZINC00001085
24 25 0 0 0
SMALL
USER_CHARGES

@<TRIPOS>ATOM
1 C1 -0.0100 1.5608 0.0201 C.3 1 <0> 0.0684
2 N1 0.0021 -0.0041 0.0020 N.pl3 1 <0> -0.4126
3 C2 0.0085 -0.7996 -1.0853 C.2 1 <0> 0.2683
4 N2 0.0275 -2.0544 -0.7142 N.2 1 <0> -0.4991
5 C3 0.0343 -2.1318 0.6190 C.2 1 <0> 0.3175
6 C4 0.0238 -0.8312 1.1071 C.2 1 <0> -0.1559
7 C5 0.0338 -0.6202 2.5028 C.2 1 <0> 0.5949
8 O1 0.0219 0.5106 2.9550 O.2 1 <0> -0.5132
9 N3 0.0564 -1.6860 3.3300 N.am 1 <0> -0.5520
10 C6 0.0693 -2.9392 2.8440 C.2 1 <0> 0.7070
11 O2 0.0891 -3.8755 3.6197 O.2 1 <0> -0.5319
12 N4 0.0527 -3.1841 1.5201 N.am 1 <0> -0.5109
13 C7 0.0693 -4.5651 1.0316 C.3 1 <0> 0.0891
14 C8 0.0674 -1.4741 4.7795 C.3 1 <0> 0.0836
15 H1 1.0048 1.8381 0.0023 H 1 <0> 0.0879
16 H2 -0.5450 1.8219 -0.8730 H 1 <0> 0.0987
17 H3 -0.5280 1.8123 0.9069 H 1 <0> 0.1006
18 H4 -0.0012 -0.4540 -2.1085 H 1 <0> 0.2318
19 H5 1.1010 -4.8936 0.9058 H 1 <0> 0.0732
20 H6 -0.4324 -5.2114 1.7518 H 1 <0> 0.0936
21 H7 -0.4482 -4.6180 0.0738 H 1 <0> 0.0925
22 H8 -0.9574 -1.4357 5.1489 H 1 <0> 0.0769
23 H9 0.5971 -2.2951 5.2627 H 1 <0> 0.0968
24 H10 0.5705 -0.5340 5.0055 H 1 <0> 0.0949
@<TRIPOS>BOND
1 1 2 1
2 1 15 1
3 1 16 1
4 1 17 1
5 2 6 1
6 2 3 1
7 3 4 2
8 3 18 1
9 4 5 1
10 5 12 1
11 5 6 2
12 6 7 1
13 7 8 2
14 7 9 am
15 9 10 am
16 9 14 1
17 10 11 2
18 10 12 am
19 12 13 1
20 13 19 1
21 13 20 1
22 13 21 1
23 14 22 1
24 14 23 1
25 14 24 1
107 changes: 107 additions & 0 deletions iodata/test/test_mol2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# IODATA is an input and output module for quantum chemistry.
# Copyright (C) 2011-2019 The IODATA Development Team
#
# This file is part of IODATA.
#
# IODATA is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# IODATA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
# --
"""Test iodata.formats.mol2 module."""

import os

import pytest
from numpy.testing import assert_equal, assert_allclose

from .common import truncated_file
from ..api import load_one, load_many, dump_one, dump_many
from ..utils import angstrom
try:
from importlib_resources import path
except ImportError:
from importlib.resources import path


def test_mol2_load_one():
# test mol2 one structure
with path('iodata.test.data', 'caffeine.mol2') as fn_mol:
mol = load_one(str(fn_mol))
check_example(mol)


def test_mol2_formaterror(tmpdir):
# test if sdf file has the wrong ending without $$$$
with path('iodata.test.data', 'caffeine.mol2') as fn_test:
with truncated_file(fn_test, 2, 0, tmpdir) as fn:
with pytest.raises(IOError):
load_one(str(fn))


def check_example(mol):
"""Test some things on example file."""
assert mol.title == 'ZINC00001084'
assert_equal(mol.natom, 24)
assert_equal(mol.atnums, [6, 7, 6, 7, 6, 6, 6, 8, 7, 6, 8, 7, 6, 6,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
assert mol.atffparams['attypes'][0] == 'C.3'
# check coordinates
atcoords_ang = mol.atcoords / angstrom
assert_allclose(atcoords_ang[0], [-0.0178, 1.4608, 0.0101])
assert_allclose(atcoords_ang[1], [0.0021, -0.0041, 0.0020])
assert_allclose(atcoords_ang[22], [0.5971, -2.2951, 5.2627])
assert_allclose(atcoords_ang[23], [0.5705, -0.5340, 5.0055])
assert_allclose(mol.atcharges['mol2'][0], 0.0684)
assert_allclose(mol.atcharges['mol2'][23], 0.0949)


def check_load_dump_consistency(tmpdir, fn):
"""Check if dumping and loading an SDF file results in the same data."""
mol0 = load_one(str(fn))
# write sdf file in a temporary folder & then read it
fn_tmp = os.path.join(tmpdir, 'test.mol2')
dump_one(mol0, fn_tmp, fmt='mol2')
mol1 = load_one(fn_tmp)
# check two sdf files
assert mol0.title == mol1.title
assert_equal(mol0.atnums, mol1.atnums)
assert_allclose(mol0.atcoords, mol1.atcoords, atol=1.e-5)


def test_load_dump_consistency(tmpdir):
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2:
check_load_dump_consistency(tmpdir, fn_mol2)


def test_load_many():
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2:
mols = list(load_many(str(fn_mol2)))
assert len(mols) == 2
check_example(mols[0])
assert mols[1].title == 'ZINC00001085'
assert_equal(mols[1].natom, 24)
assert_allclose(mols[0].atcoords[0] / angstrom, [-0.0178, 1.4608, 0.0101])
assert_allclose(mols[1].atcoords[0] / angstrom, [-0.0100, 1.5608, 0.0201])


def test_load_dump_many_consistency(tmpdir):
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2:
mols0 = list(load_many(str(fn_mol2)))
# write mol2 file in a temporary folder & then read it
fn_tmp = os.path.join(tmpdir, 'test')
dump_many(mols0, fn_tmp, fmt='mol2')
mols1 = list(load_many(fn_tmp, fmt='mol2'))
assert len(mols0) == len(mols1)
for mol0, mol1 in zip(mols0, mols1):
assert mol0.title == mol1.title
assert_equal(mol0.atnums, mol1.atnums)
assert_allclose(mol0.atcoords, mol1.atcoords, atol=1.e-5)

0 comments on commit 15246e9

Please sign in to comment.