class TopologyGenerator:
"""
Generate topology files for MD simulations.
Supports AMBER (prmtop, inpcrd, frcmod, lib), GROMACS (top, gro, pdb),
and OpenMM (xml, pdb) formats.
Parameters
----------
work_dir : str
Working directory.
force_field : str
Force field to use. Default 'gaff2'.
"""
def __init__(self, work_dir: str = './work', force_field: str = 'gaff2'):
self.work_dir = create_work_dir(work_dir)
self.force_field = force_field
self.tleap_dir = os.path.join(self.work_dir, 'tleap_output')
os.makedirs(self.tleap_dir, exist_ok=True)
def generate_amber(self, mol_file: str, frcmod_file: Optional[str] = None,
charge_method: str = 'am1bcc', mol2_file: Optional[str] = None,
output_prefix: str = 'SYS', lib_name: Optional[str] = None,
use_two_stage: bool = True,
resp_charges_file: Optional[str] = None) -> Dict[str, Any]:
"""
Generate AMBER topology files using tLEaP.
Parameters
----------
mol_file : str
Path to input MOL/PDB file.
frcmod_file : str, optional
Path to custom FRCMOD file (e.g., with optimized dihedral parameters).
charge_method : str
Charge method for antechamber ('am1bcc', 'resp', 'gasteiger').
If resp_charges_file is provided, this is ignored (charges are read
from file using antechamber's -c rc mode).
mol2_file : str, optional
Path to existing MOL2 file with charges. If None, generates or reuses one.
output_prefix : str
Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.
lib_name : str, optional
Library filename to write/read during two-stage tLEaP execution.
use_two_stage : bool
If True, follow the original notebook workflow with saveoff/loadoff.
resp_charges_file : str, optional
Path to a file containing pre-computed RESP charges (one charge per line).
Typically generated by calculate_resp_charges(). When provided, antechamber
uses these charges directly (-c rc -cf <file>) instead of computing them.
This allows using Psi4-computed RESP charges in the AMBER topology.
"""
if mol2_file is None:
mol2_candidates = [
os.path.join(self.tleap_dir, 'new.mol2'),
os.path.join(self.tleap_dir, 'ligand.mol2'),
]
for candidate in mol2_candidates:
if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
mol2_file = candidate
logger.info(f"Reusing existing MOL2: {mol2_file}")
break
if mol2_file is None:
mol2_file = self._generate_mol2_with_charges(
mol_file, charge_method, resp_charges_file=resp_charges_file
)
if frcmod_file is not None:
frcmod_path = os.path.join(self.tleap_dir, os.path.basename(frcmod_file))
if os.path.abspath(frcmod_file) != os.path.abspath(frcmod_path):
shutil.copy(frcmod_file, frcmod_path)
else:
frcmod_path = frcmod_file
else:
frcmod_candidates = [
os.path.join(self.tleap_dir, 'ligand_new.frcmod'),
os.path.join(self.tleap_dir, 'ligand.frcmod'),
]
frcmod_path = None
for candidate in frcmod_candidates:
if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
frcmod_path = candidate
break
if frcmod_path is None:
frcmod_path = os.path.join(self.tleap_dir, 'ligand.frcmod')
self._run_parmchk2(mol2_file, frcmod_path)
lib_default = 'lig_new.lib' if output_prefix.endswith('_new') else 'lig.lib'
if lib_name is None:
lib_name = lib_default
stem = os.path.splitext(os.path.basename(mol2_file))[0] or 'ligand'
paths = {
'prmtop': os.path.join(self.tleap_dir, f'{output_prefix}.prmtop'),
'inpcrd': os.path.join(self.tleap_dir, f'{output_prefix}.crd'),
'pdb': os.path.join(self.tleap_dir, f'{output_prefix}.pdb'),
'lib': os.path.join(self.tleap_dir, lib_name),
'mol2': mol2_file,
'frcmod': frcmod_path,
'ligand_pdb': os.path.join(self.tleap_dir, f'{stem}_gaff.pdb'),
'tleap_in': os.path.join(self.tleap_dir, 'tleap.in'),
'tleap_stage1': os.path.join(self.tleap_dir, 'tleap_stage1.in'),
'tleap_stage2': os.path.join(self.tleap_dir, 'tleap_stage2.in'),
}
if use_two_stage:
stage1_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
savepdb LIG {paths['ligand_pdb']}
quit"""
stage1_result = self._run_tleap(stage1_input, paths['tleap_stage1'])
if not os.path.exists(paths['lib']) or os.path.getsize(paths['lib']) == 0:
self._raise_tleap_failure(stage1_result, paths['lib'])
stage2_input = f"""source leaprc.protein.ff19SB
source leaprc.DNA.OL15
source leaprc.RNA.OL3
source leaprc.GLYCAM_06j-1
source leaprc.{self.force_field}
loadamberparams {paths['frcmod']}
loadoff {paths['lib']}
SYS = loadmol2 {paths['mol2']}
alignaxes SYS
check SYS
charge SYS
set SYS box {{50,50,50}}
saveamberparm SYS {paths['prmtop']} {paths['inpcrd']}
savepdb SYS {paths['pdb']}
quit"""
result = self._run_tleap(stage2_input, paths['tleap_stage2'])
else:
tleap_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
saveamberparm LIG {paths['prmtop']} {paths['inpcrd']}
savepdb LIG {paths['pdb']}
quit"""
result = self._run_tleap(tleap_input, paths['tleap_in'])
if not os.path.exists(paths['prmtop']) or os.path.getsize(paths['prmtop']) == 0:
self._raise_tleap_failure(result, paths['prmtop'])
logger.info(f"AMBER topology files generated (prefix={output_prefix})")
return paths
def generate_gromacs(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
"""
Generate GROMACS topology files from AMBER topology.
Follows the original ParametrizANI notebook approach:
1. Load prmtop/inpcrd with OpenMM
2. Create the OpenMM System (which correctly encodes all parameters)
3. Use parmed.openmm.load_topology() to build the parmed structure
4. Save to GROMACS format (.top, .gro, .pdb)
This ensures all dihedral parameters (including optimized ones) are
correctly transferred to the GROMACS topology.
"""
import parmed
import openmm as mm
from openmm.app import AmberPrmtopFile, AmberInpcrdFile
from openmm import unit
gmx_dir = os.path.join(self.work_dir, 'gromacs')
os.makedirs(gmx_dir, exist_ok=True)
# Load and create OpenMM system (matches original notebook approach)
prmtop = AmberPrmtopFile(prmtop_file)
inpcrd = AmberInpcrdFile(inpcrd_file)
omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)
# Create parmed structure from OpenMM system (preserves all parameters)
parmed_structure = parmed.openmm.load_topology(
prmtop.topology, omm_system, inpcrd.positions
)
paths = {
'top': os.path.join(gmx_dir, 'system.top'),
'gro': os.path.join(gmx_dir, 'system.gro'),
'pdb': os.path.join(gmx_dir, 'system.pdb'),
}
parmed_structure.save(paths['top'], overwrite=True)
parmed_structure.save(paths['gro'], overwrite=True)
parmed_structure.save(paths['pdb'], overwrite=True)
logger.info("GROMACS topology files generated")
return paths
def generate_openmm(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
"""
Generate OpenMM XML system file and PDB.
Follows the original ParametrizANI notebook approach:
1. Load prmtop/inpcrd with OpenMM
2. Create the OpenMM System (constraints=None to preserve all parameters)
3. Serialize the system to XML
4. Save PDB via parmed.openmm.load_topology()
This ensures all dihedral parameters (including optimized ones) are
correctly encoded in the XML system file.
"""
import parmed
import openmm as mm
from openmm.app import AmberPrmtopFile, AmberInpcrdFile, PDBFile
from openmm import unit, XmlSerializer
omm_dir = os.path.join(self.work_dir, 'openmm')
os.makedirs(omm_dir, exist_ok=True)
prmtop = AmberPrmtopFile(prmtop_file)
inpcrd = AmberInpcrdFile(inpcrd_file)
omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)
# Serialize the system to XML
xml_content = XmlSerializer.serialize(omm_system)
paths = {
'xml': os.path.join(omm_dir, 'system.xml'),
'pdb': os.path.join(omm_dir, 'system.pdb'),
}
with open(paths['xml'], 'w') as f:
f.write(xml_content)
# Save PDB via parmed (matches original notebook)
parmed_structure = parmed.openmm.load_topology(
prmtop.topology, omm_system, inpcrd.positions
)
parmed_structure.save(paths['pdb'], overwrite=True)
logger.info("OpenMM system files generated")
return paths
def generate_all(self, mol_file: str, frcmod_file=None, charge_method='am1bcc',
formats=None, resp_charges_file=None):
"""Generate topology files in all requested formats."""
if formats is None:
formats = ['amber', 'gromacs', 'openmm']
results = {}
amber_result = self.generate_amber(
mol_file, frcmod_file, charge_method,
resp_charges_file=resp_charges_file
)
results['amber'] = amber_result
if 'gromacs' in formats:
results['gromacs'] = self.generate_gromacs(amber_result['prmtop'], amber_result['inpcrd'])
if 'openmm' in formats:
results['openmm'] = self.generate_openmm(amber_result['prmtop'], amber_result['inpcrd'])
return results
def package_results(self, results, output_name='topology_files'):
"""Package all generated files into a ZIP archive."""
zip_path = os.path.join(self.work_dir, f'{output_name}.zip')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for format_name, files in results.items():
for file_type, file_path in files.items():
if file_path and os.path.exists(file_path):
arcname = f'{format_name}/{os.path.basename(file_path)}'
zf.write(file_path, arcname)
logger.info(f"Results packaged: {zip_path}")
return zip_path
def update_frcmod(self, original_frcmod: str, optimized_params: str,
dihedral_indices: Optional[List[int]] = None,
mol2_file: Optional[str] = None, output_file=None):
"""
Update FRCMOD file with optimized dihedral parameters.
Follows the original ParametrizANI logic:
1. Find DIHE lines matching the central 2-atom pattern → set to zero
2. Delete DIHE lines matching the full 4-atom pattern
3. Insert new optimized parameters right after the DIHE header using the
reversed orientation derived from the MOL2 atom types.
"""
if output_file is None:
output_file = os.path.join(self.tleap_dir, 'ligand_new.frcmod')
if os.path.abspath(original_frcmod) != os.path.abspath(output_file):
shutil.copy(original_frcmod, output_file)
if mol2_file is None:
mol2_candidates = [
os.path.join(self.tleap_dir, 'ligand.mol2'),
os.path.join(self.tleap_dir, 'new.mol2'),
]
for candidate in mol2_candidates:
if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
mol2_file = candidate
break
if mol2_file and dihedral_indices is not None:
central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern = (
self._get_patterns_from_mol2(mol2_file, dihedral_indices)
)
else:
full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern = (
self._get_patterns_from_params(optimized_params)
)
with open(output_file, 'r') as f:
content = f.read()
content = content.replace(' -', '-')
lines = content.split('\n')
optimized_lines = self._prepare_optimized_param_lines(
optimized_params,
full_pattern=full_pattern,
reversed_full_pattern=reversed_full_pattern,
)
modified_lines = []
in_dihe_section = False
dihe_header_idx = -1
for line in lines:
stripped = line.strip()
if stripped == 'DIHE':
in_dihe_section = True
modified_lines.append(line)
dihe_header_idx = len(modified_lines) - 1
continue
if in_dihe_section and stripped and stripped.isupper() and '-' not in stripped:
in_dihe_section = False
modified_lines.append(line)
continue
if in_dihe_section and stripped:
if self._matches_full_pattern(line, full_pattern, reversed_full_pattern):
continue
if self._contains_central_pattern(line, central_pattern, reversed_central_pattern):
modified_lines.append(self._zero_out_line(line))
else:
modified_lines.append(line)
else:
modified_lines.append(line)
# Generate explicit zero lines for ALL quartets around the central bond
# that aren't already in the frcmod. This prevents tLEaP from falling back
# to generic wildcard params (e.g. X-ce-c-X) which would add unwanted
# torsion energy on top of our optimized parameters.
extra_zero_lines = []
if mol2_file and dihedral_indices is not None:
try:
all_quartets = self._get_all_central_bond_quartets(mol2_file, dihedral_indices)
# Collect patterns already handled (zeroed or in optimized lines)
existing_patterns = set()
for line in modified_lines:
stripped = line.strip()
if stripped and '-' in stripped:
parts = stripped.split()
if parts:
p = parts[0]
existing_patterns.add(p)
# Also add reversed
elems = p.split('-')
if len(elems) == 4:
existing_patterns.add('-'.join(reversed(elems)))
for line in optimized_lines:
parts = line.split()
if parts:
p = parts[0]
existing_patterns.add(p)
elems = p.split('-')
if len(elems) == 4:
existing_patterns.add('-'.join(reversed(elems)))
for quartet in all_quartets:
rev_quartet = '-'.join(reversed(quartet.split('-')))
if quartet not in existing_patterns and rev_quartet not in existing_patterns:
extra_zero_lines.append(f"{quartet} 1 0.000 0.000 1.0")
existing_patterns.add(quartet)
existing_patterns.add(rev_quartet)
except Exception as e:
logger.warning(f"Could not generate extra zero lines: {e}")
if dihe_header_idx >= 0:
# Insert optimized lines first, then extra zeros
all_new_lines = optimized_lines + extra_zero_lines
for new_line in reversed(all_new_lines):
modified_lines.insert(dihe_header_idx + 1, new_line)
else:
modified_lines.append('DIHE')
modified_lines.extend(optimized_lines)
modified_lines.extend(extra_zero_lines)
modified_lines.append('')
with open(output_file, 'w') as f:
f.write('\n'.join(modified_lines))
logger.info(
f"Updated FRCMOD with MOL2-matched patterns and inserted params: {output_file}"
)
return output_file
def _prepare_optimized_param_lines(self, optimized_params: str, full_pattern: str,
reversed_full_pattern: str) -> List[str]:
"""Normalize optimized parameter lines to the reversed MOL2 orientation."""
new_param_lines = []
for line in optimized_params.strip().split('\n'):
stripped = line.strip()
if not stripped or stripped.upper() == 'DIHE':
continue
parts = line.split()
if not parts:
continue
remainder = line[len(parts[0]):]
type_field = reversed_full_pattern or full_pattern or parts[0]
new_param_lines.append(f"{type_field}{remainder}")
return new_param_lines
def _extract_mol2_atom_types(self, mol2_file: str, atom_indices_1based: List[int]) -> List[str]:
"""Extract atom types for the requested 1-based atom indices from a MOL2 file."""
index_to_type: Dict[int, str] = {}
with open(mol2_file, 'r') as f:
in_atom_block = False
for line in f:
if line.startswith('@<TRIPOS>ATOM'):
in_atom_block = True
continue
if line.startswith('@<TRIPOS>'):
in_atom_block = False
continue
if not in_atom_block:
continue
parts = line.split()
if len(parts) >= 6 and parts[0].isdigit():
atom_index = int(parts[0])
if atom_index in atom_indices_1based:
index_to_type[atom_index] = parts[5].lower()
return [index_to_type[idx] for idx in atom_indices_1based if idx in index_to_type]
def _get_patterns_from_mol2(self, mol2_file: str,
dihedral_indices: List[int]) -> Tuple[str, str, str, str]:
"""Build central and full dihedral atom-type patterns from a MOL2 file."""
if len(dihedral_indices) != 4:
raise ValueError("dihedral_indices must contain exactly 4 atom indices")
indices_1based = [int(idx) + 1 for idx in dihedral_indices]
central_indices = [indices_1based[1], indices_1based[2]]
central_types = self._extract_mol2_atom_types(mol2_file, central_indices)
full_types = self._extract_mol2_atom_types(mol2_file, indices_1based)
if len(central_types) != 2 or len(full_types) != 4:
raise ValueError(
f"Could not extract requested atom types from MOL2 file: {mol2_file}"
)
central_pattern = '-'.join(central_types)
reversed_central_pattern = '-'.join(reversed(central_types))
full_pattern = '-'.join(full_types)
reversed_full_pattern = '-'.join(reversed(full_types))
return central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern
def _get_all_central_bond_quartets(self, mol2_file: str, dihedral_indices: List[int]) -> List[str]:
"""
Find ALL dihedral quartets around the central bond from the MOL2 file.
Returns type patterns like 'h4-ce-c-o', 'c2-ce-c-os', etc. for every
proper torsion that shares the central bond (atoms at indices 1 and 2
of dihedral_indices). These must all be explicitly zeroed in the FRCMOD
to prevent generic wildcard parameters (e.g. X-ce-c-X from gaff2.dat)
from being applied by tLEaP.
"""
indices_1based = [int(idx) + 1 for idx in dihedral_indices]
central_atom2 = indices_1based[1] # atom index of central atom 2 (1-based)
central_atom3 = indices_1based[2] # atom index of central atom 3 (1-based)
# Parse MOL2: get all atom types and bonds
atom_types_map: Dict[int, str] = {}
bonds: List[Tuple[int, int]] = []
with open(mol2_file, 'r') as f:
section = None
for line in f:
if line.startswith('@<TRIPOS>ATOM'):
section = 'atom'
continue
elif line.startswith('@<TRIPOS>BOND'):
section = 'bond'
continue
elif line.startswith('@<TRIPOS>'):
section = None
continue
if section == 'atom':
parts = line.split()
if len(parts) >= 6 and parts[0].isdigit():
atom_idx = int(parts[0])
atom_types_map[atom_idx] = parts[5].lower()
elif section == 'bond':
parts = line.split()
if len(parts) >= 4 and parts[0].isdigit():
a1 = int(parts[1])
a2 = int(parts[2])
bonds.append((a1, a2))
# Build adjacency list
neighbors: Dict[int, List[int]] = {}
for a1, a2 in bonds:
neighbors.setdefault(a1, []).append(a2)
neighbors.setdefault(a2, []).append(a1)
# Find all atoms bonded to central_atom2 (excluding central_atom3)
outer_left = [a for a in neighbors.get(central_atom2, []) if a != central_atom3]
# Find all atoms bonded to central_atom3 (excluding central_atom2)
outer_right = [a for a in neighbors.get(central_atom3, []) if a != central_atom2]
# Generate all quartet type patterns: left-central2-central3-right
type2 = atom_types_map.get(central_atom2, '')
type3 = atom_types_map.get(central_atom3, '')
all_quartets = []
for left_atom in outer_left:
for right_atom in outer_right:
type_left = atom_types_map.get(left_atom, '')
type_right = atom_types_map.get(right_atom, '')
if type_left and type_right and type2 and type3:
pattern = f"{type_left}-{type2}-{type3}-{type_right}"
all_quartets.append(pattern)
return all_quartets
def _get_patterns_from_params(self, optimized_params: str) -> Tuple[str, str, str, str]:
"""Fallback pattern extraction from optimized parameter lines."""
for line in optimized_params.strip().split('\n'):
stripped = line.strip()
if not stripped or stripped.upper() == 'DIHE':
continue
type_field = stripped.split()[0]
atom_types = type_field.split('-')
if len(atom_types) == 4:
full_pattern = '-'.join(atom_types)
reversed_full_pattern = '-'.join(reversed(atom_types))
central_pattern = '-'.join(atom_types[1:3])
reversed_central_pattern = '-'.join(reversed(atom_types[1:3]))
return full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern
return '', '', '', ''
def _contains_central_pattern(self, line: str, pattern: str, rev_pattern: str) -> bool:
"""Check if a DIHE line contains the internal 2-atom pattern."""
parts = line.split()
if not parts:
return False
type_field = parts[0]
elements = type_field.split('-')
if len(elements) < 4:
return False
for i in range(1, len(elements) - 1):
segment = f"{elements[i]}-{elements[i + 1]}"
if segment == pattern or segment == rev_pattern:
return True
return False
def _matches_full_pattern(self, line: str, pattern4: str, rev_pattern4: str) -> bool:
"""Check if a DIHE line matches the full 4-atom pattern."""
parts = line.split()
if not parts:
return False
type_field = parts[0]
return type_field == pattern4 or type_field == rev_pattern4
def _zero_out_line(self, line: str) -> str:
"""Zero a DIHE line: set PK=0, PHASE=0, PN=1.0 while keeping type and IDIVF."""
parts = line.split()
if len(parts) < 5:
return line
# parts[0]=type, parts[1]=IDIVF, parts[2]=PK, parts[3]=PHASE, parts[4]=PN
type_field = parts[0]
idivf = parts[1]
# Preserve any trailing comment (e.g., SCEE, SCNB, or # comments)
suffix = ' ' + ' '.join(parts[5:]) if len(parts) > 5 else ''
return f"{type_field} {idivf} 0.000 0.000 1.0{suffix}"
def _run_tleap(self, tleap_input: str, tleap_file: str):
"""Write and run a tLEaP input file."""
with open(tleap_file, 'w') as f:
f.write(tleap_input)
return subprocess.run(
f"tleap -f {tleap_file}", shell=True, capture_output=True, text=True
)
def _raise_tleap_failure(self, result, expected_output: str):
"""Raise a detailed tLEaP failure with recent stdout/stderr."""
logger.error("tLEaP failed to generate topology files.")
logger.error(f"tLEaP stdout: {result.stdout[-500:] if result.stdout else 'empty'}")
logger.error(f"tLEaP stderr: {result.stderr[-500:] if result.stderr else 'empty'}")
raise RuntimeError(
f"tLEaP failed to generate {expected_output}. Check that mol2 and frcmod files are valid.\n"
f"tLEaP output: {result.stdout[-200:] if result.stdout else 'none'}"
)
def _generate_mol2_with_charges(self, mol_file, charge_method,
resp_charges_file=None):
"""
Generate MOL2 file with charges using antechamber.
If resp_charges_file is provided, uses antechamber's -c rc -cf <file>
mode to read pre-computed charges (e.g., from Psi4 RESP calculation)
instead of computing them internally.
"""
mol2_output = os.path.join(self.tleap_dir, 'ligand.mol2')
ext = os.path.splitext(mol_file)[1].lower()
input_format = {'.mol': 'mdl', '.pdb': 'pdb', '.mol2': 'mol2', '.sdf': 'sdf'}.get(ext, 'pdb')
if resp_charges_file is not None:
# Use pre-computed charges (e.g., from Psi4 RESP)
# antechamber -c rc reads charges from file specified with -cf
charges_path = os.path.abspath(resp_charges_file)
# Ensure the charges file is in the format antechamber expects
# (one charge per line, no header). Our calculate_resp_charges()
# saves with np.savetxt which adds a # header line — strip it.
clean_charges_path = os.path.join(self.tleap_dir, 'resp_charges_clean.dat')
self._prepare_charges_file(charges_path, clean_charges_path)
cmd = (
f"antechamber -i {mol_file} -fi {input_format} "
f"-o {mol2_output} -fo mol2 "
f"-c rc -cf {clean_charges_path} "
f"-at {self.force_field} -pf y"
)
logger.info(f"Using pre-computed RESP charges from: {resp_charges_file}")
else:
charge_flag = {
'am1bcc': 'bcc', 'bcc': 'bcc',
'resp': 'resp', 'gasteiger': 'gas'
}.get(charge_method.lower(), 'bcc')
cmd = (
f"antechamber -i {mol_file} -fi {input_format} "
f"-o {mol2_output} -fo mol2 "
f"-c {charge_flag} -at {self.force_field} -pf y"
)
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if not os.path.exists(mol2_output) or os.path.getsize(mol2_output) == 0:
error_msg = f"antechamber failed to generate MOL2 file.\nCommand: {cmd}"
if result.stderr:
error_msg += f"\nstderr: {result.stderr[-300:]}"
raise RuntimeError(error_msg)
return mol2_output
def _prepare_charges_file(self, input_path: str, output_path: str):
"""
Prepare a charges file for antechamber -c rc -cf mode.
antechamber expects one charge per line with no header comments.
Our calculate_resp_charges() uses np.savetxt which adds a '# ...' header.
This method strips any comment lines and ensures the format is correct.
"""
charges = []
with open(input_path, 'r') as f:
for line in f:
stripped = line.strip()
if not stripped or stripped.startswith('#'):
continue
try:
charges.append(float(stripped))
except ValueError:
continue
if not charges:
raise ValueError(
f"No valid charges found in {input_path}. "
"Expected one numeric value per line."
)
# Write clean charges file (one per line, no header)
with open(output_path, 'w') as f:
for charge in charges:
f.write(f"{charge:.6f}\n")
logger.info(f"Prepared {len(charges)} charges for antechamber from {input_path}")
def _run_parmchk2(self, mol2_file, frcmod_output):
"""Run parmchk2 to generate missing parameters."""
cmd = f"parmchk2 -i {mol2_file} -f mol2 -o {frcmod_output} -s {self.force_field}"
subprocess.run(cmd, shell=True, capture_output=True, text=True)
if not os.path.exists(frcmod_output):
raise RuntimeError("parmchk2 failed to generate FRCMOD file.")
return frcmod_output