TopologyGenerator¶

Generate topology files for MD simulations in AMBER, GROMACS, and OpenMM formats.

Usage¶

from parametrizani import TopologyGenerator

topo = TopologyGenerator('./work', force_field='gaff2')

# Generate initial topology
amber_files = topo.generate_amber(conf['mol_file'], charge_method='am1bcc')

# Update FRCMOD with optimized parameters
updated_frcmod = topo.update_frcmod(
    amber_files['frcmod'],
    optimized_params,
    dihedral_indices=dihedral_indices,
    mol2_file=mol2_file,
)

# Regenerate topology with new parameters
new_amber = topo.generate_amber(
    conf['mol_file'],
    frcmod_file=updated_frcmod,
    mol2_file=mol2_file,
    output_prefix='SYS_new',
)

# Export to other formats
gromacs_files = topo.generate_gromacs(new_amber['prmtop'], new_amber['inpcrd'])
openmm_files = topo.generate_openmm(new_amber['prmtop'], new_amber['inpcrd'])

FRCMOD Update Logic¶

The update_frcmod() method follows the original ParametrizANI notebook algorithm:

Zero all DIHE lines where the central 2-atom segment matches (sets PK=0, PHASE=0, PN=1)
Delete DIHE lines matching the full 4-atom pattern (forward or reversed)
Insert new optimized parameter lines immediately after the DIHE header

When dihedral_indices and mol2_file are provided, patterns are derived from the actual MOL2 atom types (matching the original notebook's process_mol2_elements() logic).

API Reference¶

parametrizani.topology_gen.TopologyGenerator ¶

Generate topology files for MD simulations.

Supports AMBER (prmtop, inpcrd, frcmod, lib), GROMACS (top, gro, pdb), and OpenMM (xml, pdb) formats.

Parameters:

Name	Type	Description	Default
`work_dir`	`str`	Working directory.	`'./work'`
`force_field`	`str`	Force field to use. Default 'gaff2'.	`'gaff2'`

Source code in parametrizani/topology_gen.py

class TopologyGenerator:
    """
    Generate topology files for MD simulations.

    Supports AMBER (prmtop, inpcrd, frcmod, lib), GROMACS (top, gro, pdb),
    and OpenMM (xml, pdb) formats.

    Parameters
    ----------
    work_dir : str
        Working directory.
    force_field : str
        Force field to use. Default 'gaff2'.
    """

    def __init__(self, work_dir: str = './work', force_field: str = 'gaff2'):
        self.work_dir = create_work_dir(work_dir)
        self.force_field = force_field
        self.tleap_dir = os.path.join(self.work_dir, 'tleap_output')
        os.makedirs(self.tleap_dir, exist_ok=True)

    def generate_amber(self, mol_file: str, frcmod_file: Optional[str] = None,
                       charge_method: str = 'am1bcc', mol2_file: Optional[str] = None,
                       output_prefix: str = 'SYS', lib_name: Optional[str] = None,
                       use_two_stage: bool = True,
                       resp_charges_file: Optional[str] = None) -> Dict[str, Any]:
        """
        Generate AMBER topology files using tLEaP.

        Parameters
        ----------
        mol_file : str
            Path to input MOL/PDB file.
        frcmod_file : str, optional
            Path to custom FRCMOD file (e.g., with optimized dihedral parameters).
        charge_method : str
            Charge method for antechamber ('am1bcc', 'resp', 'gasteiger').
            If resp_charges_file is provided, this is ignored (charges are read
            from file using antechamber's -c rc mode).
        mol2_file : str, optional
            Path to existing MOL2 file with charges. If None, generates or reuses one.
        output_prefix : str
            Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.
        lib_name : str, optional
            Library filename to write/read during two-stage tLEaP execution.
        use_two_stage : bool
            If True, follow the original notebook workflow with saveoff/loadoff.
        resp_charges_file : str, optional
            Path to a file containing pre-computed RESP charges (one charge per line).
            Typically generated by calculate_resp_charges(). When provided, antechamber
            uses these charges directly (-c rc -cf <file>) instead of computing them.
            This allows using Psi4-computed RESP charges in the AMBER topology.
        """
        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'new.mol2'),
                os.path.join(self.tleap_dir, 'ligand.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    logger.info(f"Reusing existing MOL2: {mol2_file}")
                    break
            if mol2_file is None:
                mol2_file = self._generate_mol2_with_charges(
                    mol_file, charge_method, resp_charges_file=resp_charges_file
                )

        if frcmod_file is not None:
            frcmod_path = os.path.join(self.tleap_dir, os.path.basename(frcmod_file))
            if os.path.abspath(frcmod_file) != os.path.abspath(frcmod_path):
                shutil.copy(frcmod_file, frcmod_path)
            else:
                frcmod_path = frcmod_file
        else:
            frcmod_candidates = [
                os.path.join(self.tleap_dir, 'ligand_new.frcmod'),
                os.path.join(self.tleap_dir, 'ligand.frcmod'),
            ]
            frcmod_path = None
            for candidate in frcmod_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    frcmod_path = candidate
                    break
            if frcmod_path is None:
                frcmod_path = os.path.join(self.tleap_dir, 'ligand.frcmod')
                self._run_parmchk2(mol2_file, frcmod_path)

        lib_default = 'lig_new.lib' if output_prefix.endswith('_new') else 'lig.lib'
        if lib_name is None:
            lib_name = lib_default

        stem = os.path.splitext(os.path.basename(mol2_file))[0] or 'ligand'
        paths = {
            'prmtop': os.path.join(self.tleap_dir, f'{output_prefix}.prmtop'),
            'inpcrd': os.path.join(self.tleap_dir, f'{output_prefix}.crd'),
            'pdb': os.path.join(self.tleap_dir, f'{output_prefix}.pdb'),
            'lib': os.path.join(self.tleap_dir, lib_name),
            'mol2': mol2_file,
            'frcmod': frcmod_path,
            'ligand_pdb': os.path.join(self.tleap_dir, f'{stem}_gaff.pdb'),
            'tleap_in': os.path.join(self.tleap_dir, 'tleap.in'),
            'tleap_stage1': os.path.join(self.tleap_dir, 'tleap_stage1.in'),
            'tleap_stage2': os.path.join(self.tleap_dir, 'tleap_stage2.in'),
        }

        if use_two_stage:
            stage1_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
savepdb LIG {paths['ligand_pdb']}
quit"""
            stage1_result = self._run_tleap(stage1_input, paths['tleap_stage1'])
            if not os.path.exists(paths['lib']) or os.path.getsize(paths['lib']) == 0:
                self._raise_tleap_failure(stage1_result, paths['lib'])

            stage2_input = f"""source leaprc.protein.ff19SB
source leaprc.DNA.OL15
source leaprc.RNA.OL3
source leaprc.GLYCAM_06j-1
source leaprc.{self.force_field}
loadamberparams {paths['frcmod']}
loadoff {paths['lib']}
SYS = loadmol2 {paths['mol2']}
alignaxes SYS
check SYS
charge SYS
set SYS box {{50,50,50}}
saveamberparm SYS {paths['prmtop']} {paths['inpcrd']}
savepdb SYS {paths['pdb']}
quit"""
            result = self._run_tleap(stage2_input, paths['tleap_stage2'])
        else:
            tleap_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
saveamberparm LIG {paths['prmtop']} {paths['inpcrd']}
savepdb LIG {paths['pdb']}
quit"""
            result = self._run_tleap(tleap_input, paths['tleap_in'])

        if not os.path.exists(paths['prmtop']) or os.path.getsize(paths['prmtop']) == 0:
            self._raise_tleap_failure(result, paths['prmtop'])

        logger.info(f"AMBER topology files generated (prefix={output_prefix})")
        return paths

    def generate_gromacs(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
        """
        Generate GROMACS topology files from AMBER topology.

        Follows the original ParametrizANI notebook approach:
        1. Load prmtop/inpcrd with OpenMM
        2. Create the OpenMM System (which correctly encodes all parameters)
        3. Use parmed.openmm.load_topology() to build the parmed structure
        4. Save to GROMACS format (.top, .gro, .pdb)

        This ensures all dihedral parameters (including optimized ones) are
        correctly transferred to the GROMACS topology.
        """
        import parmed
        import openmm as mm
        from openmm.app import AmberPrmtopFile, AmberInpcrdFile
        from openmm import unit

        gmx_dir = os.path.join(self.work_dir, 'gromacs')
        os.makedirs(gmx_dir, exist_ok=True)

        # Load and create OpenMM system (matches original notebook approach)
        prmtop = AmberPrmtopFile(prmtop_file)
        inpcrd = AmberInpcrdFile(inpcrd_file)
        omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

        # Create parmed structure from OpenMM system (preserves all parameters)
        parmed_structure = parmed.openmm.load_topology(
            prmtop.topology, omm_system, inpcrd.positions
        )

        paths = {
            'top': os.path.join(gmx_dir, 'system.top'),
            'gro': os.path.join(gmx_dir, 'system.gro'),
            'pdb': os.path.join(gmx_dir, 'system.pdb'),
        }
        parmed_structure.save(paths['top'], overwrite=True)
        parmed_structure.save(paths['gro'], overwrite=True)
        parmed_structure.save(paths['pdb'], overwrite=True)
        logger.info("GROMACS topology files generated")
        return paths

    def generate_openmm(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
        """
        Generate OpenMM XML system file and PDB.

        Follows the original ParametrizANI notebook approach:
        1. Load prmtop/inpcrd with OpenMM
        2. Create the OpenMM System (constraints=None to preserve all parameters)
        3. Serialize the system to XML
        4. Save PDB via parmed.openmm.load_topology()

        This ensures all dihedral parameters (including optimized ones) are
        correctly encoded in the XML system file.
        """
        import parmed
        import openmm as mm
        from openmm.app import AmberPrmtopFile, AmberInpcrdFile, PDBFile
        from openmm import unit, XmlSerializer

        omm_dir = os.path.join(self.work_dir, 'openmm')
        os.makedirs(omm_dir, exist_ok=True)

        prmtop = AmberPrmtopFile(prmtop_file)
        inpcrd = AmberInpcrdFile(inpcrd_file)
        omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

        # Serialize the system to XML
        xml_content = XmlSerializer.serialize(omm_system)

        paths = {
            'xml': os.path.join(omm_dir, 'system.xml'),
            'pdb': os.path.join(omm_dir, 'system.pdb'),
        }
        with open(paths['xml'], 'w') as f:
            f.write(xml_content)

        # Save PDB via parmed (matches original notebook)
        parmed_structure = parmed.openmm.load_topology(
            prmtop.topology, omm_system, inpcrd.positions
        )
        parmed_structure.save(paths['pdb'], overwrite=True)

        logger.info("OpenMM system files generated")
        return paths

    def generate_all(self, mol_file: str, frcmod_file=None, charge_method='am1bcc',
                     formats=None, resp_charges_file=None):
        """Generate topology files in all requested formats."""
        if formats is None:
            formats = ['amber', 'gromacs', 'openmm']
        results = {}
        amber_result = self.generate_amber(
            mol_file, frcmod_file, charge_method,
            resp_charges_file=resp_charges_file
        )
        results['amber'] = amber_result
        if 'gromacs' in formats:
            results['gromacs'] = self.generate_gromacs(amber_result['prmtop'], amber_result['inpcrd'])
        if 'openmm' in formats:
            results['openmm'] = self.generate_openmm(amber_result['prmtop'], amber_result['inpcrd'])
        return results

    def package_results(self, results, output_name='topology_files'):
        """Package all generated files into a ZIP archive."""
        zip_path = os.path.join(self.work_dir, f'{output_name}.zip')
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            for format_name, files in results.items():
                for file_type, file_path in files.items():
                    if file_path and os.path.exists(file_path):
                        arcname = f'{format_name}/{os.path.basename(file_path)}'
                        zf.write(file_path, arcname)
        logger.info(f"Results packaged: {zip_path}")
        return zip_path

    def update_frcmod(self, original_frcmod: str, optimized_params: str,
                      dihedral_indices: Optional[List[int]] = None,
                      mol2_file: Optional[str] = None, output_file=None):
        """
        Update FRCMOD file with optimized dihedral parameters.

        Follows the original ParametrizANI logic:
        1. Find DIHE lines matching the central 2-atom pattern → set to zero
        2. Delete DIHE lines matching the full 4-atom pattern
        3. Insert new optimized parameters right after the DIHE header using the
           reversed orientation derived from the MOL2 atom types.
        """
        if output_file is None:
            output_file = os.path.join(self.tleap_dir, 'ligand_new.frcmod')
        if os.path.abspath(original_frcmod) != os.path.abspath(output_file):
            shutil.copy(original_frcmod, output_file)

        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'ligand.mol2'),
                os.path.join(self.tleap_dir, 'new.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    break

        if mol2_file and dihedral_indices is not None:
            central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern = (
                self._get_patterns_from_mol2(mol2_file, dihedral_indices)
            )
        else:
            full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern = (
                self._get_patterns_from_params(optimized_params)
            )

        with open(output_file, 'r') as f:
            content = f.read()
        content = content.replace(' -', '-')
        lines = content.split('\n')

        optimized_lines = self._prepare_optimized_param_lines(
            optimized_params,
            full_pattern=full_pattern,
            reversed_full_pattern=reversed_full_pattern,
        )

        modified_lines = []
        in_dihe_section = False
        dihe_header_idx = -1

        for line in lines:
            stripped = line.strip()

            if stripped == 'DIHE':
                in_dihe_section = True
                modified_lines.append(line)
                dihe_header_idx = len(modified_lines) - 1
                continue
            if in_dihe_section and stripped and stripped.isupper() and '-' not in stripped:
                in_dihe_section = False
                modified_lines.append(line)
                continue

            if in_dihe_section and stripped:
                if self._matches_full_pattern(line, full_pattern, reversed_full_pattern):
                    continue
                if self._contains_central_pattern(line, central_pattern, reversed_central_pattern):
                    modified_lines.append(self._zero_out_line(line))
                else:
                    modified_lines.append(line)
            else:
                modified_lines.append(line)

        # Generate explicit zero lines for ALL quartets around the central bond
        # that aren't already in the frcmod. This prevents tLEaP from falling back
        # to generic wildcard params (e.g. X-ce-c-X) which would add unwanted
        # torsion energy on top of our optimized parameters.
        extra_zero_lines = []
        if mol2_file and dihedral_indices is not None:
            try:
                all_quartets = self._get_all_central_bond_quartets(mol2_file, dihedral_indices)
                # Collect patterns already handled (zeroed or in optimized lines)
                existing_patterns = set()
                for line in modified_lines:
                    stripped = line.strip()
                    if stripped and '-' in stripped:
                        parts = stripped.split()
                        if parts:
                            p = parts[0]
                            existing_patterns.add(p)
                            # Also add reversed
                            elems = p.split('-')
                            if len(elems) == 4:
                                existing_patterns.add('-'.join(reversed(elems)))
                for line in optimized_lines:
                    parts = line.split()
                    if parts:
                        p = parts[0]
                        existing_patterns.add(p)
                        elems = p.split('-')
                        if len(elems) == 4:
                            existing_patterns.add('-'.join(reversed(elems)))

                for quartet in all_quartets:
                    rev_quartet = '-'.join(reversed(quartet.split('-')))
                    if quartet not in existing_patterns and rev_quartet not in existing_patterns:
                        extra_zero_lines.append(f"{quartet}   1    0.000       0.000           1.0")
                        existing_patterns.add(quartet)
                        existing_patterns.add(rev_quartet)
            except Exception as e:
                logger.warning(f"Could not generate extra zero lines: {e}")

        if dihe_header_idx >= 0:
            # Insert optimized lines first, then extra zeros
            all_new_lines = optimized_lines + extra_zero_lines
            for new_line in reversed(all_new_lines):
                modified_lines.insert(dihe_header_idx + 1, new_line)
        else:
            modified_lines.append('DIHE')
            modified_lines.extend(optimized_lines)
            modified_lines.extend(extra_zero_lines)
            modified_lines.append('')

        with open(output_file, 'w') as f:
            f.write('\n'.join(modified_lines))

        logger.info(
            f"Updated FRCMOD with MOL2-matched patterns and inserted params: {output_file}"
        )
        return output_file

    def _prepare_optimized_param_lines(self, optimized_params: str, full_pattern: str,
                                       reversed_full_pattern: str) -> List[str]:
        """Normalize optimized parameter lines to the reversed MOL2 orientation."""
        new_param_lines = []
        for line in optimized_params.strip().split('\n'):
            stripped = line.strip()
            if not stripped or stripped.upper() == 'DIHE':
                continue
            parts = line.split()
            if not parts:
                continue
            remainder = line[len(parts[0]):]
            type_field = reversed_full_pattern or full_pattern or parts[0]
            new_param_lines.append(f"{type_field}{remainder}")
        return new_param_lines

    def _extract_mol2_atom_types(self, mol2_file: str, atom_indices_1based: List[int]) -> List[str]:
        """Extract atom types for the requested 1-based atom indices from a MOL2 file."""
        index_to_type: Dict[int, str] = {}
        with open(mol2_file, 'r') as f:
            in_atom_block = False
            for line in f:
                if line.startswith('@<TRIPOS>ATOM'):
                    in_atom_block = True
                    continue
                if line.startswith('@<TRIPOS>'):
                    in_atom_block = False
                    continue
                if not in_atom_block:
                    continue
                parts = line.split()
                if len(parts) >= 6 and parts[0].isdigit():
                    atom_index = int(parts[0])
                    if atom_index in atom_indices_1based:
                        index_to_type[atom_index] = parts[5].lower()

        return [index_to_type[idx] for idx in atom_indices_1based if idx in index_to_type]

    def _get_patterns_from_mol2(self, mol2_file: str,
                                dihedral_indices: List[int]) -> Tuple[str, str, str, str]:
        """Build central and full dihedral atom-type patterns from a MOL2 file."""
        if len(dihedral_indices) != 4:
            raise ValueError("dihedral_indices must contain exactly 4 atom indices")

        indices_1based = [int(idx) + 1 for idx in dihedral_indices]
        central_indices = [indices_1based[1], indices_1based[2]]

        central_types = self._extract_mol2_atom_types(mol2_file, central_indices)
        full_types = self._extract_mol2_atom_types(mol2_file, indices_1based)

        if len(central_types) != 2 or len(full_types) != 4:
            raise ValueError(
                f"Could not extract requested atom types from MOL2 file: {mol2_file}"
            )

        central_pattern = '-'.join(central_types)
        reversed_central_pattern = '-'.join(reversed(central_types))
        full_pattern = '-'.join(full_types)
        reversed_full_pattern = '-'.join(reversed(full_types))
        return central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern

    def _get_all_central_bond_quartets(self, mol2_file: str, dihedral_indices: List[int]) -> List[str]:
        """
        Find ALL dihedral quartets around the central bond from the MOL2 file.

        Returns type patterns like 'h4-ce-c-o', 'c2-ce-c-os', etc. for every
        proper torsion that shares the central bond (atoms at indices 1 and 2
        of dihedral_indices). These must all be explicitly zeroed in the FRCMOD
        to prevent generic wildcard parameters (e.g. X-ce-c-X from gaff2.dat)
        from being applied by tLEaP.
        """
        indices_1based = [int(idx) + 1 for idx in dihedral_indices]
        central_atom2 = indices_1based[1]  # atom index of central atom 2 (1-based)
        central_atom3 = indices_1based[2]  # atom index of central atom 3 (1-based)

        # Parse MOL2: get all atom types and bonds
        atom_types_map: Dict[int, str] = {}
        bonds: List[Tuple[int, int]] = []

        with open(mol2_file, 'r') as f:
            section = None
            for line in f:
                if line.startswith('@<TRIPOS>ATOM'):
                    section = 'atom'
                    continue
                elif line.startswith('@<TRIPOS>BOND'):
                    section = 'bond'
                    continue
                elif line.startswith('@<TRIPOS>'):
                    section = None
                    continue

                if section == 'atom':
                    parts = line.split()
                    if len(parts) >= 6 and parts[0].isdigit():
                        atom_idx = int(parts[0])
                        atom_types_map[atom_idx] = parts[5].lower()
                elif section == 'bond':
                    parts = line.split()
                    if len(parts) >= 4 and parts[0].isdigit():
                        a1 = int(parts[1])
                        a2 = int(parts[2])
                        bonds.append((a1, a2))

        # Build adjacency list
        neighbors: Dict[int, List[int]] = {}
        for a1, a2 in bonds:
            neighbors.setdefault(a1, []).append(a2)
            neighbors.setdefault(a2, []).append(a1)

        # Find all atoms bonded to central_atom2 (excluding central_atom3)
        outer_left = [a for a in neighbors.get(central_atom2, []) if a != central_atom3]
        # Find all atoms bonded to central_atom3 (excluding central_atom2)
        outer_right = [a for a in neighbors.get(central_atom3, []) if a != central_atom2]

        # Generate all quartet type patterns: left-central2-central3-right
        type2 = atom_types_map.get(central_atom2, '')
        type3 = atom_types_map.get(central_atom3, '')

        all_quartets = []
        for left_atom in outer_left:
            for right_atom in outer_right:
                type_left = atom_types_map.get(left_atom, '')
                type_right = atom_types_map.get(right_atom, '')
                if type_left and type_right and type2 and type3:
                    pattern = f"{type_left}-{type2}-{type3}-{type_right}"
                    all_quartets.append(pattern)

        return all_quartets

    def _get_patterns_from_params(self, optimized_params: str) -> Tuple[str, str, str, str]:
        """Fallback pattern extraction from optimized parameter lines."""
        for line in optimized_params.strip().split('\n'):
            stripped = line.strip()
            if not stripped or stripped.upper() == 'DIHE':
                continue
            type_field = stripped.split()[0]
            atom_types = type_field.split('-')
            if len(atom_types) == 4:
                full_pattern = '-'.join(atom_types)
                reversed_full_pattern = '-'.join(reversed(atom_types))
                central_pattern = '-'.join(atom_types[1:3])
                reversed_central_pattern = '-'.join(reversed(atom_types[1:3]))
                return full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern
        return '', '', '', ''

    def _contains_central_pattern(self, line: str, pattern: str, rev_pattern: str) -> bool:
        """Check if a DIHE line contains the internal 2-atom pattern."""
        parts = line.split()
        if not parts:
            return False
        type_field = parts[0]
        elements = type_field.split('-')
        if len(elements) < 4:
            return False
        for i in range(1, len(elements) - 1):
            segment = f"{elements[i]}-{elements[i + 1]}"
            if segment == pattern or segment == rev_pattern:
                return True
        return False

    def _matches_full_pattern(self, line: str, pattern4: str, rev_pattern4: str) -> bool:
        """Check if a DIHE line matches the full 4-atom pattern."""
        parts = line.split()
        if not parts:
            return False
        type_field = parts[0]
        return type_field == pattern4 or type_field == rev_pattern4

    def _zero_out_line(self, line: str) -> str:
        """Zero a DIHE line: set PK=0, PHASE=0, PN=1.0 while keeping type and IDIVF."""
        parts = line.split()
        if len(parts) < 5:
            return line
        # parts[0]=type, parts[1]=IDIVF, parts[2]=PK, parts[3]=PHASE, parts[4]=PN
        type_field = parts[0]
        idivf = parts[1]
        # Preserve any trailing comment (e.g., SCEE, SCNB, or # comments)
        suffix = '      ' + '  '.join(parts[5:]) if len(parts) > 5 else ''
        return f"{type_field}   {idivf}    0.000       0.000           1.0{suffix}"

    def _run_tleap(self, tleap_input: str, tleap_file: str):
        """Write and run a tLEaP input file."""
        with open(tleap_file, 'w') as f:
            f.write(tleap_input)
        return subprocess.run(
            f"tleap -f {tleap_file}", shell=True, capture_output=True, text=True
        )

    def _raise_tleap_failure(self, result, expected_output: str):
        """Raise a detailed tLEaP failure with recent stdout/stderr."""
        logger.error("tLEaP failed to generate topology files.")
        logger.error(f"tLEaP stdout: {result.stdout[-500:] if result.stdout else 'empty'}")
        logger.error(f"tLEaP stderr: {result.stderr[-500:] if result.stderr else 'empty'}")
        raise RuntimeError(
            f"tLEaP failed to generate {expected_output}. Check that mol2 and frcmod files are valid.\n"
            f"tLEaP output: {result.stdout[-200:] if result.stdout else 'none'}"
        )

    def _generate_mol2_with_charges(self, mol_file, charge_method,
                                    resp_charges_file=None):
        """
        Generate MOL2 file with charges using antechamber.

        If resp_charges_file is provided, uses antechamber's -c rc -cf <file>
        mode to read pre-computed charges (e.g., from Psi4 RESP calculation)
        instead of computing them internally.
        """
        mol2_output = os.path.join(self.tleap_dir, 'ligand.mol2')
        ext = os.path.splitext(mol_file)[1].lower()
        input_format = {'.mol': 'mdl', '.pdb': 'pdb', '.mol2': 'mol2', '.sdf': 'sdf'}.get(ext, 'pdb')

        if resp_charges_file is not None:
            # Use pre-computed charges (e.g., from Psi4 RESP)
            # antechamber -c rc reads charges from file specified with -cf
            charges_path = os.path.abspath(resp_charges_file)

            # Ensure the charges file is in the format antechamber expects
            # (one charge per line, no header). Our calculate_resp_charges()
            # saves with np.savetxt which adds a # header line — strip it.
            clean_charges_path = os.path.join(self.tleap_dir, 'resp_charges_clean.dat')
            self._prepare_charges_file(charges_path, clean_charges_path)

            cmd = (
                f"antechamber -i {mol_file} -fi {input_format} "
                f"-o {mol2_output} -fo mol2 "
                f"-c rc -cf {clean_charges_path} "
                f"-at {self.force_field} -pf y"
            )
            logger.info(f"Using pre-computed RESP charges from: {resp_charges_file}")
        else:
            charge_flag = {
                'am1bcc': 'bcc', 'bcc': 'bcc',
                'resp': 'resp', 'gasteiger': 'gas'
            }.get(charge_method.lower(), 'bcc')
            cmd = (
                f"antechamber -i {mol_file} -fi {input_format} "
                f"-o {mol2_output} -fo mol2 "
                f"-c {charge_flag} -at {self.force_field} -pf y"
            )

        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if not os.path.exists(mol2_output) or os.path.getsize(mol2_output) == 0:
            error_msg = f"antechamber failed to generate MOL2 file.\nCommand: {cmd}"
            if result.stderr:
                error_msg += f"\nstderr: {result.stderr[-300:]}"
            raise RuntimeError(error_msg)
        return mol2_output

    def _prepare_charges_file(self, input_path: str, output_path: str):
        """
        Prepare a charges file for antechamber -c rc -cf mode.

        antechamber expects one charge per line with no header comments.
        Our calculate_resp_charges() uses np.savetxt which adds a '# ...' header.
        This method strips any comment lines and ensures the format is correct.
        """
        charges = []
        with open(input_path, 'r') as f:
            for line in f:
                stripped = line.strip()
                if not stripped or stripped.startswith('#'):
                    continue
                try:
                    charges.append(float(stripped))
                except ValueError:
                    continue

        if not charges:
            raise ValueError(
                f"No valid charges found in {input_path}. "
                "Expected one numeric value per line."
            )

        # Write clean charges file (one per line, no header)
        with open(output_path, 'w') as f:
            for charge in charges:
                f.write(f"{charge:.6f}\n")

        logger.info(f"Prepared {len(charges)} charges for antechamber from {input_path}")

    def _run_parmchk2(self, mol2_file, frcmod_output):
        """Run parmchk2 to generate missing parameters."""
        cmd = f"parmchk2 -i {mol2_file} -f mol2 -o {frcmod_output} -s {self.force_field}"
        subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if not os.path.exists(frcmod_output):
            raise RuntimeError("parmchk2 failed to generate FRCMOD file.")
        return frcmod_output

init ¶

__init__(work_dir: str = './work', force_field: str = 'gaff2')

Source code in parametrizani/topology_gen.py

def __init__(self, work_dir: str = './work', force_field: str = 'gaff2'):
    self.work_dir = create_work_dir(work_dir)
    self.force_field = force_field
    self.tleap_dir = os.path.join(self.work_dir, 'tleap_output')
    os.makedirs(self.tleap_dir, exist_ok=True)

generate_amber ¶

generate_amber(mol_file: str, frcmod_file: Optional[str] = None, charge_method: str = 'am1bcc', mol2_file: Optional[str] = None, output_prefix: str = 'SYS', lib_name: Optional[str] = None, use_two_stage: bool = True, resp_charges_file: Optional[str] = None) -> Dict[str, Any]

Generate AMBER topology files using tLEaP.

Parameters:

Name	Type	Description	Default
`mol_file`	`str`	Path to input MOL/PDB file.	required
`frcmod_file`	`str`	Path to custom FRCMOD file (e.g., with optimized dihedral parameters).	`None`
`charge_method`	`str`	Charge method for antechamber ('am1bcc', 'resp', 'gasteiger'). If resp_charges_file is provided, this is ignored (charges are read from file using antechamber's -c rc mode).	`'am1bcc'`
`mol2_file`	`str`	Path to existing MOL2 file with charges. If None, generates or reuses one.	`None`
`output_prefix`	`str`	Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.	`'SYS'`
`lib_name`	`str`	Library filename to write/read during two-stage tLEaP execution.	`None`
`use_two_stage`	`bool`	If True, follow the original notebook workflow with saveoff/loadoff.	`True`
`resp_charges_file`	`str`	Path to a file containing pre-computed RESP charges (one charge per line). Typically generated by calculate_resp_charges(). When provided, antechamber uses these charges directly (-c rc -cf ) instead of computing them. This allows using Psi4-computed RESP charges in the AMBER topology.	`None`

Source code in parametrizani/topology_gen.py

    def generate_amber(self, mol_file: str, frcmod_file: Optional[str] = None,
                       charge_method: str = 'am1bcc', mol2_file: Optional[str] = None,
                       output_prefix: str = 'SYS', lib_name: Optional[str] = None,
                       use_two_stage: bool = True,
                       resp_charges_file: Optional[str] = None) -> Dict[str, Any]:
        """
        Generate AMBER topology files using tLEaP.

        Parameters
        ----------
        mol_file : str
            Path to input MOL/PDB file.
        frcmod_file : str, optional
            Path to custom FRCMOD file (e.g., with optimized dihedral parameters).
        charge_method : str
            Charge method for antechamber ('am1bcc', 'resp', 'gasteiger').
            If resp_charges_file is provided, this is ignored (charges are read
            from file using antechamber's -c rc mode).
        mol2_file : str, optional
            Path to existing MOL2 file with charges. If None, generates or reuses one.
        output_prefix : str
            Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.
        lib_name : str, optional
            Library filename to write/read during two-stage tLEaP execution.
        use_two_stage : bool
            If True, follow the original notebook workflow with saveoff/loadoff.
        resp_charges_file : str, optional
            Path to a file containing pre-computed RESP charges (one charge per line).
            Typically generated by calculate_resp_charges(). When provided, antechamber
            uses these charges directly (-c rc -cf <file>) instead of computing them.
            This allows using Psi4-computed RESP charges in the AMBER topology.
        """
        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'new.mol2'),
                os.path.join(self.tleap_dir, 'ligand.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    logger.info(f"Reusing existing MOL2: {mol2_file}")
                    break
            if mol2_file is None:
                mol2_file = self._generate_mol2_with_charges(
                    mol_file, charge_method, resp_charges_file=resp_charges_file
                )

        if frcmod_file is not None:
            frcmod_path = os.path.join(self.tleap_dir, os.path.basename(frcmod_file))
            if os.path.abspath(frcmod_file) != os.path.abspath(frcmod_path):
                shutil.copy(frcmod_file, frcmod_path)
            else:
                frcmod_path = frcmod_file
        else:
            frcmod_candidates = [
                os.path.join(self.tleap_dir, 'ligand_new.frcmod'),
                os.path.join(self.tleap_dir, 'ligand.frcmod'),
            ]
            frcmod_path = None
            for candidate in frcmod_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    frcmod_path = candidate
                    break
            if frcmod_path is None:
                frcmod_path = os.path.join(self.tleap_dir, 'ligand.frcmod')
                self._run_parmchk2(mol2_file, frcmod_path)

        lib_default = 'lig_new.lib' if output_prefix.endswith('_new') else 'lig.lib'
        if lib_name is None:
            lib_name = lib_default

        stem = os.path.splitext(os.path.basename(mol2_file))[0] or 'ligand'
        paths = {
            'prmtop': os.path.join(self.tleap_dir, f'{output_prefix}.prmtop'),
            'inpcrd': os.path.join(self.tleap_dir, f'{output_prefix}.crd'),
            'pdb': os.path.join(self.tleap_dir, f'{output_prefix}.pdb'),
            'lib': os.path.join(self.tleap_dir, lib_name),
            'mol2': mol2_file,
            'frcmod': frcmod_path,
            'ligand_pdb': os.path.join(self.tleap_dir, f'{stem}_gaff.pdb'),
            'tleap_in': os.path.join(self.tleap_dir, 'tleap.in'),
            'tleap_stage1': os.path.join(self.tleap_dir, 'tleap_stage1.in'),
            'tleap_stage2': os.path.join(self.tleap_dir, 'tleap_stage2.in'),
        }

        if use_two_stage:
            stage1_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
savepdb LIG {paths['ligand_pdb']}
quit"""
            stage1_result = self._run_tleap(stage1_input, paths['tleap_stage1'])
            if not os.path.exists(paths['lib']) or os.path.getsize(paths['lib']) == 0:
                self._raise_tleap_failure(stage1_result, paths['lib'])

            stage2_input = f"""source leaprc.protein.ff19SB
source leaprc.DNA.OL15
source leaprc.RNA.OL3
source leaprc.GLYCAM_06j-1
source leaprc.{self.force_field}
loadamberparams {paths['frcmod']}
loadoff {paths['lib']}
SYS = loadmol2 {paths['mol2']}
alignaxes SYS
check SYS
charge SYS
set SYS box {{50,50,50}}
saveamberparm SYS {paths['prmtop']} {paths['inpcrd']}
savepdb SYS {paths['pdb']}
quit"""
            result = self._run_tleap(stage2_input, paths['tleap_stage2'])
        else:
            tleap_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
saveamberparm LIG {paths['prmtop']} {paths['inpcrd']}
savepdb LIG {paths['pdb']}
quit"""
            result = self._run_tleap(tleap_input, paths['tleap_in'])

        if not os.path.exists(paths['prmtop']) or os.path.getsize(paths['prmtop']) == 0:
            self._raise_tleap_failure(result, paths['prmtop'])

        logger.info(f"AMBER topology files generated (prefix={output_prefix})")
        return paths

generate_gromacs ¶

generate_gromacs(prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]

Generate GROMACS topology files from AMBER topology.

Follows the original ParametrizANI notebook approach: 1. Load prmtop/inpcrd with OpenMM 2. Create the OpenMM System (which correctly encodes all parameters) 3. Use parmed.openmm.load_topology() to build the parmed structure 4. Save to GROMACS format (.top, .gro, .pdb)

This ensures all dihedral parameters (including optimized ones) are correctly transferred to the GROMACS topology.

Source code in parametrizani/topology_gen.py

def generate_gromacs(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
    """
    Generate GROMACS topology files from AMBER topology.

    Follows the original ParametrizANI notebook approach:
    1. Load prmtop/inpcrd with OpenMM
    2. Create the OpenMM System (which correctly encodes all parameters)
    3. Use parmed.openmm.load_topology() to build the parmed structure
    4. Save to GROMACS format (.top, .gro, .pdb)

    This ensures all dihedral parameters (including optimized ones) are
    correctly transferred to the GROMACS topology.
    """
    import parmed
    import openmm as mm
    from openmm.app import AmberPrmtopFile, AmberInpcrdFile
    from openmm import unit

    gmx_dir = os.path.join(self.work_dir, 'gromacs')
    os.makedirs(gmx_dir, exist_ok=True)

    # Load and create OpenMM system (matches original notebook approach)
    prmtop = AmberPrmtopFile(prmtop_file)
    inpcrd = AmberInpcrdFile(inpcrd_file)
    omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

    # Create parmed structure from OpenMM system (preserves all parameters)
    parmed_structure = parmed.openmm.load_topology(
        prmtop.topology, omm_system, inpcrd.positions
    )

    paths = {
        'top': os.path.join(gmx_dir, 'system.top'),
        'gro': os.path.join(gmx_dir, 'system.gro'),
        'pdb': os.path.join(gmx_dir, 'system.pdb'),
    }
    parmed_structure.save(paths['top'], overwrite=True)
    parmed_structure.save(paths['gro'], overwrite=True)
    parmed_structure.save(paths['pdb'], overwrite=True)
    logger.info("GROMACS topology files generated")
    return paths

generate_openmm ¶

generate_openmm(prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]

Generate OpenMM XML system file and PDB.

Follows the original ParametrizANI notebook approach: 1. Load prmtop/inpcrd with OpenMM 2. Create the OpenMM System (constraints=None to preserve all parameters) 3. Serialize the system to XML 4. Save PDB via parmed.openmm.load_topology()

This ensures all dihedral parameters (including optimized ones) are correctly encoded in the XML system file.

Source code in parametrizani/topology_gen.py

def generate_openmm(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
    """
    Generate OpenMM XML system file and PDB.

    Follows the original ParametrizANI notebook approach:
    1. Load prmtop/inpcrd with OpenMM
    2. Create the OpenMM System (constraints=None to preserve all parameters)
    3. Serialize the system to XML
    4. Save PDB via parmed.openmm.load_topology()

    This ensures all dihedral parameters (including optimized ones) are
    correctly encoded in the XML system file.
    """
    import parmed
    import openmm as mm
    from openmm.app import AmberPrmtopFile, AmberInpcrdFile, PDBFile
    from openmm import unit, XmlSerializer

    omm_dir = os.path.join(self.work_dir, 'openmm')
    os.makedirs(omm_dir, exist_ok=True)

    prmtop = AmberPrmtopFile(prmtop_file)
    inpcrd = AmberInpcrdFile(inpcrd_file)
    omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

    # Serialize the system to XML
    xml_content = XmlSerializer.serialize(omm_system)

    paths = {
        'xml': os.path.join(omm_dir, 'system.xml'),
        'pdb': os.path.join(omm_dir, 'system.pdb'),
    }
    with open(paths['xml'], 'w') as f:
        f.write(xml_content)

    # Save PDB via parmed (matches original notebook)
    parmed_structure = parmed.openmm.load_topology(
        prmtop.topology, omm_system, inpcrd.positions
    )
    parmed_structure.save(paths['pdb'], overwrite=True)

    logger.info("OpenMM system files generated")
    return paths

generate_all ¶

generate_all(mol_file: str, frcmod_file=None, charge_method='am1bcc', formats=None, resp_charges_file=None)

Generate topology files in all requested formats.

Source code in parametrizani/topology_gen.py

def generate_all(self, mol_file: str, frcmod_file=None, charge_method='am1bcc',
                 formats=None, resp_charges_file=None):
    """Generate topology files in all requested formats."""
    if formats is None:
        formats = ['amber', 'gromacs', 'openmm']
    results = {}
    amber_result = self.generate_amber(
        mol_file, frcmod_file, charge_method,
        resp_charges_file=resp_charges_file
    )
    results['amber'] = amber_result
    if 'gromacs' in formats:
        results['gromacs'] = self.generate_gromacs(amber_result['prmtop'], amber_result['inpcrd'])
    if 'openmm' in formats:
        results['openmm'] = self.generate_openmm(amber_result['prmtop'], amber_result['inpcrd'])
    return results

update_frcmod ¶

update_frcmod(original_frcmod: str, optimized_params: str, dihedral_indices: Optional[List[int]] = None, mol2_file: Optional[str] = None, output_file=None)

Update FRCMOD file with optimized dihedral parameters.

Follows the original ParametrizANI logic: 1. Find DIHE lines matching the central 2-atom pattern → set to zero 2. Delete DIHE lines matching the full 4-atom pattern 3. Insert new optimized parameters right after the DIHE header using the reversed orientation derived from the MOL2 atom types.

Source code in parametrizani/topology_gen.py

def update_frcmod(self, original_frcmod: str, optimized_params: str,
                  dihedral_indices: Optional[List[int]] = None,
                  mol2_file: Optional[str] = None, output_file=None):
    """
    Update FRCMOD file with optimized dihedral parameters.

    Follows the original ParametrizANI logic:
    1. Find DIHE lines matching the central 2-atom pattern → set to zero
    2. Delete DIHE lines matching the full 4-atom pattern
    3. Insert new optimized parameters right after the DIHE header using the
       reversed orientation derived from the MOL2 atom types.
    """
    if output_file is None:
        output_file = os.path.join(self.tleap_dir, 'ligand_new.frcmod')
    if os.path.abspath(original_frcmod) != os.path.abspath(output_file):
        shutil.copy(original_frcmod, output_file)

    if mol2_file is None:
        mol2_candidates = [
            os.path.join(self.tleap_dir, 'ligand.mol2'),
            os.path.join(self.tleap_dir, 'new.mol2'),
        ]
        for candidate in mol2_candidates:
            if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                mol2_file = candidate
                break

    if mol2_file and dihedral_indices is not None:
        central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern = (
            self._get_patterns_from_mol2(mol2_file, dihedral_indices)
        )
    else:
        full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern = (
            self._get_patterns_from_params(optimized_params)
        )

    with open(output_file, 'r') as f:
        content = f.read()
    content = content.replace(' -', '-')
    lines = content.split('\n')

    optimized_lines = self._prepare_optimized_param_lines(
        optimized_params,
        full_pattern=full_pattern,
        reversed_full_pattern=reversed_full_pattern,
    )

    modified_lines = []
    in_dihe_section = False
    dihe_header_idx = -1

    for line in lines:
        stripped = line.strip()

        if stripped == 'DIHE':
            in_dihe_section = True
            modified_lines.append(line)
            dihe_header_idx = len(modified_lines) - 1
            continue
        if in_dihe_section and stripped and stripped.isupper() and '-' not in stripped:
            in_dihe_section = False
            modified_lines.append(line)
            continue

        if in_dihe_section and stripped:
            if self._matches_full_pattern(line, full_pattern, reversed_full_pattern):
                continue
            if self._contains_central_pattern(line, central_pattern, reversed_central_pattern):
                modified_lines.append(self._zero_out_line(line))
            else:
                modified_lines.append(line)
        else:
            modified_lines.append(line)

    # Generate explicit zero lines for ALL quartets around the central bond
    # that aren't already in the frcmod. This prevents tLEaP from falling back
    # to generic wildcard params (e.g. X-ce-c-X) which would add unwanted
    # torsion energy on top of our optimized parameters.
    extra_zero_lines = []
    if mol2_file and dihedral_indices is not None:
        try:
            all_quartets = self._get_all_central_bond_quartets(mol2_file, dihedral_indices)
            # Collect patterns already handled (zeroed or in optimized lines)
            existing_patterns = set()
            for line in modified_lines:
                stripped = line.strip()
                if stripped and '-' in stripped:
                    parts = stripped.split()
                    if parts:
                        p = parts[0]
                        existing_patterns.add(p)
                        # Also add reversed
                        elems = p.split('-')
                        if len(elems) == 4:
                            existing_patterns.add('-'.join(reversed(elems)))
            for line in optimized_lines:
                parts = line.split()
                if parts:
                    p = parts[0]
                    existing_patterns.add(p)
                    elems = p.split('-')
                    if len(elems) == 4:
                        existing_patterns.add('-'.join(reversed(elems)))

            for quartet in all_quartets:
                rev_quartet = '-'.join(reversed(quartet.split('-')))
                if quartet not in existing_patterns and rev_quartet not in existing_patterns:
                    extra_zero_lines.append(f"{quartet}   1    0.000       0.000           1.0")
                    existing_patterns.add(quartet)
                    existing_patterns.add(rev_quartet)
        except Exception as e:
            logger.warning(f"Could not generate extra zero lines: {e}")

    if dihe_header_idx >= 0:
        # Insert optimized lines first, then extra zeros
        all_new_lines = optimized_lines + extra_zero_lines
        for new_line in reversed(all_new_lines):
            modified_lines.insert(dihe_header_idx + 1, new_line)
    else:
        modified_lines.append('DIHE')
        modified_lines.extend(optimized_lines)
        modified_lines.extend(extra_zero_lines)
        modified_lines.append('')

    with open(output_file, 'w') as f:
        f.write('\n'.join(modified_lines))

    logger.info(
        f"Updated FRCMOD with MOL2-matched patterns and inserted params: {output_file}"
    )
    return output_file

package_results ¶

package_results(results, output_name='topology_files')

Package all generated files into a ZIP archive.

Source code in parametrizani/topology_gen.py

def package_results(self, results, output_name='topology_files'):
    """Package all generated files into a ZIP archive."""
    zip_path = os.path.join(self.work_dir, f'{output_name}.zip')
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for format_name, files in results.items():
            for file_type, file_path in files.items():
                if file_path and os.path.exists(file_path):
                    arcname = f'{format_name}/{os.path.basename(file_path)}'
                    zf.write(file_path, arcname)
    logger.info(f"Results packaged: {zip_path}")
    return zip_path

TopologyGenerator¶

Usage¶

FRCMOD Update Logic¶

API Reference¶

parametrizani.topology_gen.TopologyGenerator ¶

__init__ ¶

generate_amber ¶

generate_gromacs ¶

generate_openmm ¶

generate_all ¶

update_frcmod ¶

package_results ¶

init ¶