forked from theochem/iodata
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mol2 format implementation addressing theochem#78 and theochem#37
- Loading branch information
Esteban Vohringer-Martinez
authored and
Esteban Vohringer-Martinez
committed
May 29, 2019
1 parent
7d40670
commit 15246e9
Showing
3 changed files
with
373 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# IODATA is an input and output module for quantum chemistry. | ||
# Copyright (C) 2011-2019 The IODATA Development Team | ||
# | ||
# This file is part of IODATA. | ||
# | ||
# IODATA is free software; you can redistribute it and/or | ||
# modify it under the terms of the GNU General Public License | ||
# as published by the Free Software Foundation; either version 3 | ||
# of the License, or (at your option) any later version. | ||
# | ||
# IODATA is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program; if not, see <http://www.gnu.org/licenses/> | ||
# -- | ||
"""MOL2 file format. | ||
There are different formats of mol2 files. Here the compatibility with AMBER software | ||
was the main objective to write out files with atomic charges used by antechamber. | ||
""" | ||
|
||
|
||
from typing import TextIO, Iterator, Tuple | ||
|
||
import numpy as np | ||
|
||
from ..docstrings import (document_load_one, document_load_many, document_dump_one, | ||
document_dump_many) | ||
from ..iodata import IOData | ||
from ..periodic import sym2num, num2sym | ||
from ..utils import angstrom, LineIterator | ||
|
||
|
||
__all__ = [] | ||
|
||
|
||
PATTERNS = ['*.mol2'] | ||
|
||
|
||
@document_load_one("MOL2", ['atcoords', 'atnums', 'atcharges', 'atffparams'], ['title']) | ||
def load_one(lit: LineIterator) -> dict: | ||
"""Do not edit this docstring. It will be overwritten.""" | ||
molecule_found = False | ||
while True: | ||
try: | ||
line = next(lit) | ||
except StopIteration: | ||
break | ||
if len(line) > 1: | ||
words = line.split() | ||
if words[0] == "@<TRIPOS>MOLECULE": | ||
# Found another molecule; go one line back and break | ||
if molecule_found: | ||
lit.back(line) | ||
break | ||
title = next(lit).strip() | ||
if words[0] == "@<TRIPOS>ATOM": | ||
atnums, atcoords, atchgs, attypes = _load_helper_atoms(lit) | ||
atcharges = {"mol2": atchgs} | ||
atffparams = {"attypes": attypes} | ||
result = { | ||
'atcoords': atcoords, | ||
'atnums': atnums, | ||
'atcharges': atcharges, | ||
'atffparams': atffparams | ||
} | ||
if title is not None: | ||
result['title'] = title | ||
molecule_found = True | ||
if molecule_found is False: | ||
raise lit.error("Molecule could not be read") | ||
return result | ||
|
||
|
||
def _load_helper_atoms(lit: LineIterator) -> Tuple[np.ndarray, np.ndarray, np.ndarray, tuple]: | ||
"""Load element numbers, coordinates and atomic charges.""" | ||
atnums = [] | ||
atcoords = [] | ||
atchgs = [] | ||
attypes = [] | ||
for line in lit: | ||
words = line.split() | ||
if len(words) <= 6: | ||
break | ||
# Assume that the first character of atom type is the element | ||
try: | ||
atnums.append(sym2num.get(words[5][0].title())) | ||
except ValueError: | ||
print(f'Can not convert atom type {words[5][0]} to element') | ||
attypes.append(words[5]) | ||
atcoords.append([float(words[2]), float(words[3]), float(words[4])]) | ||
if len(words) == 9: | ||
atchgs.append(float(words[8])) | ||
else: | ||
atchg.append(0.0) | ||
atnums = np.array(atnums, int) | ||
atcoords = np.array(atcoords) * angstrom | ||
atchgs = np.array(atchgs) | ||
attypes = tuple(attypes) | ||
return atnums, atcoords, atchgs, attypes | ||
|
||
|
||
@document_load_many("MOL2", ['atcoords', 'atnums', 'atcharges', 'atffparams'], ['title']) | ||
def load_many(lit: LineIterator) -> Iterator[dict]: | ||
"""Do not edit this docstring. It will be overwritten.""" | ||
# MOL2 files with more molecules are a simple concatenation of individual MOL2 files,' | ||
# making it trivial to load many frames. | ||
while True: | ||
try: | ||
yield load_one(lit) | ||
except IOError: | ||
return | ||
|
||
|
||
@document_dump_one("MOL2", ['atcoords', 'atnums'], ['atcharges', 'atffparams', 'title']) | ||
def dump_one(f: TextIO, data: IOData): | ||
"""Do not edit this docstring. It will be overwritten.""" | ||
# The first six lines are reserved for comments | ||
print("# Mol2 file created with Iodata", file=f) | ||
print("\n\n\n\n\n", file=f) | ||
print("@<TRIPOS>MOLECULE", file=f) | ||
print(data.title or 'Created with IOData', file=f) | ||
print("@<TRIPOS>ATOM", file=f) | ||
for i in range(data.natom): | ||
n = num2sym[data.atnums[i]] | ||
x, y, z = data.atcoords[i] / angstrom | ||
out1 = f'{i:6d} {n:2s} {x:10.4f} {y:10.4f} {z:10.4f} ' | ||
if not data.atcharges: | ||
# Will write the first atomic charge key of the dictionary | ||
charge = data.atcharges[0][i] | ||
if not data.attparams: | ||
for param, value in data.atffparams.items(): | ||
if param == 'attypes': | ||
attype = values[i] | ||
out2 = f'{attype:3s} 1 XXX {charge:1.5f}' | ||
else: | ||
out2 = f'{n:2s} 1 XXX {charge:1.5f}' | ||
print(out1 + out2, file=f) | ||
else: | ||
out2 = f'{n:2s} 1 XXX 0.0000' | ||
print(out1 + out2, file=f) | ||
|
||
|
||
@document_dump_many("MOL2", ['atcoords', 'atnums', 'atcharges'], ['title']) | ||
def dump_many(f: TextIO, datas: Iterator[IOData]): | ||
"""Do not edit this docstring. It will be overwritten.""" | ||
# Similar to load_many, this is relatively easy. | ||
for data in datas: | ||
dump_one(f, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
@<TRIPOS>MOLECULE | ||
ZINC00001084 | ||
24 25 0 0 0 | ||
SMALL | ||
USER_CHARGES | ||
|
||
@<TRIPOS>ATOM | ||
1 C1 -0.0178 1.4608 0.0101 C.3 1 <0> 0.0684 | ||
2 N1 0.0021 -0.0041 0.0020 N.pl3 1 <0> -0.4126 | ||
3 C2 0.0085 -0.7996 -1.0853 C.2 1 <0> 0.2683 | ||
4 N2 0.0275 -2.0544 -0.7142 N.2 1 <0> -0.4991 | ||
5 C3 0.0343 -2.1318 0.6190 C.2 1 <0> 0.3175 | ||
6 C4 0.0238 -0.8312 1.1071 C.2 1 <0> -0.1559 | ||
7 C5 0.0338 -0.6202 2.5028 C.2 1 <0> 0.5949 | ||
8 O1 0.0219 0.5106 2.9550 O.2 1 <0> -0.5132 | ||
9 N3 0.0564 -1.6860 3.3300 N.am 1 <0> -0.5520 | ||
10 C6 0.0693 -2.9392 2.8440 C.2 1 <0> 0.7070 | ||
11 O2 0.0891 -3.8755 3.6197 O.2 1 <0> -0.5319 | ||
12 N4 0.0527 -3.1841 1.5201 N.am 1 <0> -0.5109 | ||
13 C7 0.0693 -4.5651 1.0316 C.3 1 <0> 0.0891 | ||
14 C8 0.0674 -1.4741 4.7795 C.3 1 <0> 0.0836 | ||
15 H1 1.0048 1.8381 0.0023 H 1 <0> 0.0879 | ||
16 H2 -0.5450 1.8219 -0.8730 H 1 <0> 0.0987 | ||
17 H3 -0.5280 1.8123 0.9069 H 1 <0> 0.1006 | ||
18 H4 -0.0012 -0.4540 -2.1085 H 1 <0> 0.2318 | ||
19 H5 1.1010 -4.8936 0.9058 H 1 <0> 0.0732 | ||
20 H6 -0.4324 -5.2114 1.7518 H 1 <0> 0.0936 | ||
21 H7 -0.4482 -4.6180 0.0738 H 1 <0> 0.0925 | ||
22 H8 -0.9574 -1.4357 5.1489 H 1 <0> 0.0769 | ||
23 H9 0.5971 -2.2951 5.2627 H 1 <0> 0.0968 | ||
24 H10 0.5705 -0.5340 5.0055 H 1 <0> 0.0949 | ||
@<TRIPOS>BOND | ||
1 1 2 1 | ||
2 1 15 1 | ||
3 1 16 1 | ||
4 1 17 1 | ||
5 2 6 1 | ||
6 2 3 1 | ||
7 3 4 2 | ||
8 3 18 1 | ||
9 4 5 1 | ||
10 5 12 1 | ||
11 5 6 2 | ||
12 6 7 1 | ||
13 7 8 2 | ||
14 7 9 am | ||
15 9 10 am | ||
16 9 14 1 | ||
17 10 11 2 | ||
18 10 12 am | ||
19 12 13 1 | ||
20 13 19 1 | ||
21 13 20 1 | ||
22 13 21 1 | ||
23 14 22 1 | ||
24 14 23 1 | ||
25 14 24 1 | ||
@<TRIPOS>MOLECULE | ||
ZINC00001085 | ||
24 25 0 0 0 | ||
SMALL | ||
USER_CHARGES | ||
|
||
@<TRIPOS>ATOM | ||
1 C1 -0.0100 1.5608 0.0201 C.3 1 <0> 0.0684 | ||
2 N1 0.0021 -0.0041 0.0020 N.pl3 1 <0> -0.4126 | ||
3 C2 0.0085 -0.7996 -1.0853 C.2 1 <0> 0.2683 | ||
4 N2 0.0275 -2.0544 -0.7142 N.2 1 <0> -0.4991 | ||
5 C3 0.0343 -2.1318 0.6190 C.2 1 <0> 0.3175 | ||
6 C4 0.0238 -0.8312 1.1071 C.2 1 <0> -0.1559 | ||
7 C5 0.0338 -0.6202 2.5028 C.2 1 <0> 0.5949 | ||
8 O1 0.0219 0.5106 2.9550 O.2 1 <0> -0.5132 | ||
9 N3 0.0564 -1.6860 3.3300 N.am 1 <0> -0.5520 | ||
10 C6 0.0693 -2.9392 2.8440 C.2 1 <0> 0.7070 | ||
11 O2 0.0891 -3.8755 3.6197 O.2 1 <0> -0.5319 | ||
12 N4 0.0527 -3.1841 1.5201 N.am 1 <0> -0.5109 | ||
13 C7 0.0693 -4.5651 1.0316 C.3 1 <0> 0.0891 | ||
14 C8 0.0674 -1.4741 4.7795 C.3 1 <0> 0.0836 | ||
15 H1 1.0048 1.8381 0.0023 H 1 <0> 0.0879 | ||
16 H2 -0.5450 1.8219 -0.8730 H 1 <0> 0.0987 | ||
17 H3 -0.5280 1.8123 0.9069 H 1 <0> 0.1006 | ||
18 H4 -0.0012 -0.4540 -2.1085 H 1 <0> 0.2318 | ||
19 H5 1.1010 -4.8936 0.9058 H 1 <0> 0.0732 | ||
20 H6 -0.4324 -5.2114 1.7518 H 1 <0> 0.0936 | ||
21 H7 -0.4482 -4.6180 0.0738 H 1 <0> 0.0925 | ||
22 H8 -0.9574 -1.4357 5.1489 H 1 <0> 0.0769 | ||
23 H9 0.5971 -2.2951 5.2627 H 1 <0> 0.0968 | ||
24 H10 0.5705 -0.5340 5.0055 H 1 <0> 0.0949 | ||
@<TRIPOS>BOND | ||
1 1 2 1 | ||
2 1 15 1 | ||
3 1 16 1 | ||
4 1 17 1 | ||
5 2 6 1 | ||
6 2 3 1 | ||
7 3 4 2 | ||
8 3 18 1 | ||
9 4 5 1 | ||
10 5 12 1 | ||
11 5 6 2 | ||
12 6 7 1 | ||
13 7 8 2 | ||
14 7 9 am | ||
15 9 10 am | ||
16 9 14 1 | ||
17 10 11 2 | ||
18 10 12 am | ||
19 12 13 1 | ||
20 13 19 1 | ||
21 13 20 1 | ||
22 13 21 1 | ||
23 14 22 1 | ||
24 14 23 1 | ||
25 14 24 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# IODATA is an input and output module for quantum chemistry. | ||
# Copyright (C) 2011-2019 The IODATA Development Team | ||
# | ||
# This file is part of IODATA. | ||
# | ||
# IODATA is free software; you can redistribute it and/or | ||
# modify it under the terms of the GNU General Public License | ||
# as published by the Free Software Foundation; either version 3 | ||
# of the License, or (at your option) any later version. | ||
# | ||
# IODATA is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program; if not, see <http://www.gnu.org/licenses/> | ||
# -- | ||
"""Test iodata.formats.mol2 module.""" | ||
|
||
import os | ||
|
||
import pytest | ||
from numpy.testing import assert_equal, assert_allclose | ||
|
||
from .common import truncated_file | ||
from ..api import load_one, load_many, dump_one, dump_many | ||
from ..utils import angstrom | ||
try: | ||
from importlib_resources import path | ||
except ImportError: | ||
from importlib.resources import path | ||
|
||
|
||
def test_mol2_load_one(): | ||
# test mol2 one structure | ||
with path('iodata.test.data', 'caffeine.mol2') as fn_mol: | ||
mol = load_one(str(fn_mol)) | ||
check_example(mol) | ||
|
||
|
||
def test_mol2_formaterror(tmpdir): | ||
# test if sdf file has the wrong ending without $$$$ | ||
with path('iodata.test.data', 'caffeine.mol2') as fn_test: | ||
with truncated_file(fn_test, 2, 0, tmpdir) as fn: | ||
with pytest.raises(IOError): | ||
load_one(str(fn)) | ||
|
||
|
||
def check_example(mol): | ||
"""Test some things on example file.""" | ||
assert mol.title == 'ZINC00001084' | ||
assert_equal(mol.natom, 24) | ||
assert_equal(mol.atnums, [6, 7, 6, 7, 6, 6, 6, 8, 7, 6, 8, 7, 6, 6, | ||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) | ||
assert mol.atffparams['attypes'][0] == 'C.3' | ||
# check coordinates | ||
atcoords_ang = mol.atcoords / angstrom | ||
assert_allclose(atcoords_ang[0], [-0.0178, 1.4608, 0.0101]) | ||
assert_allclose(atcoords_ang[1], [0.0021, -0.0041, 0.0020]) | ||
assert_allclose(atcoords_ang[22], [0.5971, -2.2951, 5.2627]) | ||
assert_allclose(atcoords_ang[23], [0.5705, -0.5340, 5.0055]) | ||
assert_allclose(mol.atcharges['mol2'][0], 0.0684) | ||
assert_allclose(mol.atcharges['mol2'][23], 0.0949) | ||
|
||
|
||
def check_load_dump_consistency(tmpdir, fn): | ||
"""Check if dumping and loading an SDF file results in the same data.""" | ||
mol0 = load_one(str(fn)) | ||
# write sdf file in a temporary folder & then read it | ||
fn_tmp = os.path.join(tmpdir, 'test.mol2') | ||
dump_one(mol0, fn_tmp, fmt='mol2') | ||
mol1 = load_one(fn_tmp) | ||
# check two sdf files | ||
assert mol0.title == mol1.title | ||
assert_equal(mol0.atnums, mol1.atnums) | ||
assert_allclose(mol0.atcoords, mol1.atcoords, atol=1.e-5) | ||
|
||
|
||
def test_load_dump_consistency(tmpdir): | ||
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2: | ||
check_load_dump_consistency(tmpdir, fn_mol2) | ||
|
||
|
||
def test_load_many(): | ||
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2: | ||
mols = list(load_many(str(fn_mol2))) | ||
assert len(mols) == 2 | ||
check_example(mols[0]) | ||
assert mols[1].title == 'ZINC00001085' | ||
assert_equal(mols[1].natom, 24) | ||
assert_allclose(mols[0].atcoords[0] / angstrom, [-0.0178, 1.4608, 0.0101]) | ||
assert_allclose(mols[1].atcoords[0] / angstrom, [-0.0100, 1.5608, 0.0201]) | ||
|
||
|
||
def test_load_dump_many_consistency(tmpdir): | ||
with path('iodata.test.data', 'caffeine.mol2') as fn_mol2: | ||
mols0 = list(load_many(str(fn_mol2))) | ||
# write mol2 file in a temporary folder & then read it | ||
fn_tmp = os.path.join(tmpdir, 'test') | ||
dump_many(mols0, fn_tmp, fmt='mol2') | ||
mols1 = list(load_many(fn_tmp, fmt='mol2')) | ||
assert len(mols0) == len(mols1) | ||
for mol0, mol1 in zip(mols0, mols1): | ||
assert mol0.title == mol1.title | ||
assert_equal(mol0.atnums, mol1.atnums) | ||
assert_allclose(mol0.atcoords, mol1.atcoords, atol=1.e-5) |