Skip to content

TopologyGenerator

Generate topology files for MD simulations in AMBER, GROMACS, and OpenMM formats.

Usage

from parametrizani import TopologyGenerator

topo = TopologyGenerator('./work', force_field='gaff2')

# Generate initial topology
amber_files = topo.generate_amber(conf['mol_file'], charge_method='am1bcc')

# Update FRCMOD with optimized parameters
updated_frcmod = topo.update_frcmod(
    amber_files['frcmod'],
    optimized_params,
    dihedral_indices=dihedral_indices,
    mol2_file=mol2_file,
)

# Regenerate topology with new parameters
new_amber = topo.generate_amber(
    conf['mol_file'],
    frcmod_file=updated_frcmod,
    mol2_file=mol2_file,
    output_prefix='SYS_new',
)

# Export to other formats
gromacs_files = topo.generate_gromacs(new_amber['prmtop'], new_amber['inpcrd'])
openmm_files = topo.generate_openmm(new_amber['prmtop'], new_amber['inpcrd'])

FRCMOD Update Logic

The update_frcmod() method follows the original ParametrizANI notebook algorithm:

  1. Zero all DIHE lines where the central 2-atom segment matches (sets PK=0, PHASE=0, PN=1)
  2. Delete DIHE lines matching the full 4-atom pattern (forward or reversed)
  3. Insert new optimized parameter lines immediately after the DIHE header

When dihedral_indices and mol2_file are provided, patterns are derived from the actual MOL2 atom types (matching the original notebook's process_mol2_elements() logic).

API Reference

parametrizani.topology_gen.TopologyGenerator

Generate topology files for MD simulations.

Supports AMBER (prmtop, inpcrd, frcmod, lib), GROMACS (top, gro, pdb), and OpenMM (xml, pdb) formats.

Parameters:

Name Type Description Default
work_dir str

Working directory.

'./work'
force_field str

Force field to use. Default 'gaff2'.

'gaff2'
Source code in parametrizani/topology_gen.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
class TopologyGenerator:
    """
    Generate topology files for MD simulations.

    Supports AMBER (prmtop, inpcrd, frcmod, lib), GROMACS (top, gro, pdb),
    and OpenMM (xml, pdb) formats.

    Parameters
    ----------
    work_dir : str
        Working directory.
    force_field : str
        Force field to use. Default 'gaff2'.
    """

    def __init__(self, work_dir: str = './work', force_field: str = 'gaff2'):
        self.work_dir = create_work_dir(work_dir)
        self.force_field = force_field
        self.tleap_dir = os.path.join(self.work_dir, 'tleap_output')
        os.makedirs(self.tleap_dir, exist_ok=True)

    def generate_amber(self, mol_file: str, frcmod_file: Optional[str] = None,
                       charge_method: str = 'am1bcc', mol2_file: Optional[str] = None,
                       output_prefix: str = 'SYS', lib_name: Optional[str] = None,
                       use_two_stage: bool = True,
                       resp_charges_file: Optional[str] = None) -> Dict[str, Any]:
        """
        Generate AMBER topology files using tLEaP.

        Parameters
        ----------
        mol_file : str
            Path to input MOL/PDB file.
        frcmod_file : str, optional
            Path to custom FRCMOD file (e.g., with optimized dihedral parameters).
        charge_method : str
            Charge method for antechamber ('am1bcc', 'resp', 'gasteiger').
            If resp_charges_file is provided, this is ignored (charges are read
            from file using antechamber's -c rc mode).
        mol2_file : str, optional
            Path to existing MOL2 file with charges. If None, generates or reuses one.
        output_prefix : str
            Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.
        lib_name : str, optional
            Library filename to write/read during two-stage tLEaP execution.
        use_two_stage : bool
            If True, follow the original notebook workflow with saveoff/loadoff.
        resp_charges_file : str, optional
            Path to a file containing pre-computed RESP charges (one charge per line).
            Typically generated by calculate_resp_charges(). When provided, antechamber
            uses these charges directly (-c rc -cf <file>) instead of computing them.
            This allows using Psi4-computed RESP charges in the AMBER topology.
        """
        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'new.mol2'),
                os.path.join(self.tleap_dir, 'ligand.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    logger.info(f"Reusing existing MOL2: {mol2_file}")
                    break
            if mol2_file is None:
                mol2_file = self._generate_mol2_with_charges(
                    mol_file, charge_method, resp_charges_file=resp_charges_file
                )

        if frcmod_file is not None:
            frcmod_path = os.path.join(self.tleap_dir, os.path.basename(frcmod_file))
            if os.path.abspath(frcmod_file) != os.path.abspath(frcmod_path):
                shutil.copy(frcmod_file, frcmod_path)
            else:
                frcmod_path = frcmod_file
        else:
            frcmod_candidates = [
                os.path.join(self.tleap_dir, 'ligand_new.frcmod'),
                os.path.join(self.tleap_dir, 'ligand.frcmod'),
            ]
            frcmod_path = None
            for candidate in frcmod_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    frcmod_path = candidate
                    break
            if frcmod_path is None:
                frcmod_path = os.path.join(self.tleap_dir, 'ligand.frcmod')
                self._run_parmchk2(mol2_file, frcmod_path)

        lib_default = 'lig_new.lib' if output_prefix.endswith('_new') else 'lig.lib'
        if lib_name is None:
            lib_name = lib_default

        stem = os.path.splitext(os.path.basename(mol2_file))[0] or 'ligand'
        paths = {
            'prmtop': os.path.join(self.tleap_dir, f'{output_prefix}.prmtop'),
            'inpcrd': os.path.join(self.tleap_dir, f'{output_prefix}.crd'),
            'pdb': os.path.join(self.tleap_dir, f'{output_prefix}.pdb'),
            'lib': os.path.join(self.tleap_dir, lib_name),
            'mol2': mol2_file,
            'frcmod': frcmod_path,
            'ligand_pdb': os.path.join(self.tleap_dir, f'{stem}_gaff.pdb'),
            'tleap_in': os.path.join(self.tleap_dir, 'tleap.in'),
            'tleap_stage1': os.path.join(self.tleap_dir, 'tleap_stage1.in'),
            'tleap_stage2': os.path.join(self.tleap_dir, 'tleap_stage2.in'),
        }

        if use_two_stage:
            stage1_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
savepdb LIG {paths['ligand_pdb']}
quit"""
            stage1_result = self._run_tleap(stage1_input, paths['tleap_stage1'])
            if not os.path.exists(paths['lib']) or os.path.getsize(paths['lib']) == 0:
                self._raise_tleap_failure(stage1_result, paths['lib'])

            stage2_input = f"""source leaprc.protein.ff19SB
source leaprc.DNA.OL15
source leaprc.RNA.OL3
source leaprc.GLYCAM_06j-1
source leaprc.{self.force_field}
loadamberparams {paths['frcmod']}
loadoff {paths['lib']}
SYS = loadmol2 {paths['mol2']}
alignaxes SYS
check SYS
charge SYS
set SYS box {{50,50,50}}
saveamberparm SYS {paths['prmtop']} {paths['inpcrd']}
savepdb SYS {paths['pdb']}
quit"""
            result = self._run_tleap(stage2_input, paths['tleap_stage2'])
        else:
            tleap_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
saveamberparm LIG {paths['prmtop']} {paths['inpcrd']}
savepdb LIG {paths['pdb']}
quit"""
            result = self._run_tleap(tleap_input, paths['tleap_in'])

        if not os.path.exists(paths['prmtop']) or os.path.getsize(paths['prmtop']) == 0:
            self._raise_tleap_failure(result, paths['prmtop'])

        logger.info(f"AMBER topology files generated (prefix={output_prefix})")
        return paths

    def generate_gromacs(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
        """
        Generate GROMACS topology files from AMBER topology.

        Follows the original ParametrizANI notebook approach:
        1. Load prmtop/inpcrd with OpenMM
        2. Create the OpenMM System (which correctly encodes all parameters)
        3. Use parmed.openmm.load_topology() to build the parmed structure
        4. Save to GROMACS format (.top, .gro, .pdb)

        This ensures all dihedral parameters (including optimized ones) are
        correctly transferred to the GROMACS topology.
        """
        import parmed
        import openmm as mm
        from openmm.app import AmberPrmtopFile, AmberInpcrdFile
        from openmm import unit

        gmx_dir = os.path.join(self.work_dir, 'gromacs')
        os.makedirs(gmx_dir, exist_ok=True)

        # Load and create OpenMM system (matches original notebook approach)
        prmtop = AmberPrmtopFile(prmtop_file)
        inpcrd = AmberInpcrdFile(inpcrd_file)
        omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

        # Create parmed structure from OpenMM system (preserves all parameters)
        parmed_structure = parmed.openmm.load_topology(
            prmtop.topology, omm_system, inpcrd.positions
        )

        paths = {
            'top': os.path.join(gmx_dir, 'system.top'),
            'gro': os.path.join(gmx_dir, 'system.gro'),
            'pdb': os.path.join(gmx_dir, 'system.pdb'),
        }
        parmed_structure.save(paths['top'], overwrite=True)
        parmed_structure.save(paths['gro'], overwrite=True)
        parmed_structure.save(paths['pdb'], overwrite=True)
        logger.info("GROMACS topology files generated")
        return paths

    def generate_openmm(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
        """
        Generate OpenMM XML system file and PDB.

        Follows the original ParametrizANI notebook approach:
        1. Load prmtop/inpcrd with OpenMM
        2. Create the OpenMM System (constraints=None to preserve all parameters)
        3. Serialize the system to XML
        4. Save PDB via parmed.openmm.load_topology()

        This ensures all dihedral parameters (including optimized ones) are
        correctly encoded in the XML system file.
        """
        import parmed
        import openmm as mm
        from openmm.app import AmberPrmtopFile, AmberInpcrdFile, PDBFile
        from openmm import unit, XmlSerializer

        omm_dir = os.path.join(self.work_dir, 'openmm')
        os.makedirs(omm_dir, exist_ok=True)

        prmtop = AmberPrmtopFile(prmtop_file)
        inpcrd = AmberInpcrdFile(inpcrd_file)
        omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

        # Serialize the system to XML
        xml_content = XmlSerializer.serialize(omm_system)

        paths = {
            'xml': os.path.join(omm_dir, 'system.xml'),
            'pdb': os.path.join(omm_dir, 'system.pdb'),
        }
        with open(paths['xml'], 'w') as f:
            f.write(xml_content)

        # Save PDB via parmed (matches original notebook)
        parmed_structure = parmed.openmm.load_topology(
            prmtop.topology, omm_system, inpcrd.positions
        )
        parmed_structure.save(paths['pdb'], overwrite=True)

        logger.info("OpenMM system files generated")
        return paths

    def generate_all(self, mol_file: str, frcmod_file=None, charge_method='am1bcc',
                     formats=None, resp_charges_file=None):
        """Generate topology files in all requested formats."""
        if formats is None:
            formats = ['amber', 'gromacs', 'openmm']
        results = {}
        amber_result = self.generate_amber(
            mol_file, frcmod_file, charge_method,
            resp_charges_file=resp_charges_file
        )
        results['amber'] = amber_result
        if 'gromacs' in formats:
            results['gromacs'] = self.generate_gromacs(amber_result['prmtop'], amber_result['inpcrd'])
        if 'openmm' in formats:
            results['openmm'] = self.generate_openmm(amber_result['prmtop'], amber_result['inpcrd'])
        return results

    def package_results(self, results, output_name='topology_files'):
        """Package all generated files into a ZIP archive."""
        zip_path = os.path.join(self.work_dir, f'{output_name}.zip')
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
            for format_name, files in results.items():
                for file_type, file_path in files.items():
                    if file_path and os.path.exists(file_path):
                        arcname = f'{format_name}/{os.path.basename(file_path)}'
                        zf.write(file_path, arcname)
        logger.info(f"Results packaged: {zip_path}")
        return zip_path

    def update_frcmod(self, original_frcmod: str, optimized_params: str,
                      dihedral_indices: Optional[List[int]] = None,
                      mol2_file: Optional[str] = None, output_file=None):
        """
        Update FRCMOD file with optimized dihedral parameters.

        Follows the original ParametrizANI logic:
        1. Find DIHE lines matching the central 2-atom pattern → set to zero
        2. Delete DIHE lines matching the full 4-atom pattern
        3. Insert new optimized parameters right after the DIHE header using the
           reversed orientation derived from the MOL2 atom types.
        """
        if output_file is None:
            output_file = os.path.join(self.tleap_dir, 'ligand_new.frcmod')
        if os.path.abspath(original_frcmod) != os.path.abspath(output_file):
            shutil.copy(original_frcmod, output_file)

        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'ligand.mol2'),
                os.path.join(self.tleap_dir, 'new.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    break

        if mol2_file and dihedral_indices is not None:
            central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern = (
                self._get_patterns_from_mol2(mol2_file, dihedral_indices)
            )
        else:
            full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern = (
                self._get_patterns_from_params(optimized_params)
            )

        with open(output_file, 'r') as f:
            content = f.read()
        content = content.replace(' -', '-')
        lines = content.split('\n')

        optimized_lines = self._prepare_optimized_param_lines(
            optimized_params,
            full_pattern=full_pattern,
            reversed_full_pattern=reversed_full_pattern,
        )

        modified_lines = []
        in_dihe_section = False
        dihe_header_idx = -1

        for line in lines:
            stripped = line.strip()

            if stripped == 'DIHE':
                in_dihe_section = True
                modified_lines.append(line)
                dihe_header_idx = len(modified_lines) - 1
                continue
            if in_dihe_section and stripped and stripped.isupper() and '-' not in stripped:
                in_dihe_section = False
                modified_lines.append(line)
                continue

            if in_dihe_section and stripped:
                if self._matches_full_pattern(line, full_pattern, reversed_full_pattern):
                    continue
                if self._contains_central_pattern(line, central_pattern, reversed_central_pattern):
                    modified_lines.append(self._zero_out_line(line))
                else:
                    modified_lines.append(line)
            else:
                modified_lines.append(line)

        # Generate explicit zero lines for ALL quartets around the central bond
        # that aren't already in the frcmod. This prevents tLEaP from falling back
        # to generic wildcard params (e.g. X-ce-c-X) which would add unwanted
        # torsion energy on top of our optimized parameters.
        extra_zero_lines = []
        if mol2_file and dihedral_indices is not None:
            try:
                all_quartets = self._get_all_central_bond_quartets(mol2_file, dihedral_indices)
                # Collect patterns already handled (zeroed or in optimized lines)
                existing_patterns = set()
                for line in modified_lines:
                    stripped = line.strip()
                    if stripped and '-' in stripped:
                        parts = stripped.split()
                        if parts:
                            p = parts[0]
                            existing_patterns.add(p)
                            # Also add reversed
                            elems = p.split('-')
                            if len(elems) == 4:
                                existing_patterns.add('-'.join(reversed(elems)))
                for line in optimized_lines:
                    parts = line.split()
                    if parts:
                        p = parts[0]
                        existing_patterns.add(p)
                        elems = p.split('-')
                        if len(elems) == 4:
                            existing_patterns.add('-'.join(reversed(elems)))

                for quartet in all_quartets:
                    rev_quartet = '-'.join(reversed(quartet.split('-')))
                    if quartet not in existing_patterns and rev_quartet not in existing_patterns:
                        extra_zero_lines.append(f"{quartet}   1    0.000       0.000           1.0")
                        existing_patterns.add(quartet)
                        existing_patterns.add(rev_quartet)
            except Exception as e:
                logger.warning(f"Could not generate extra zero lines: {e}")

        if dihe_header_idx >= 0:
            # Insert optimized lines first, then extra zeros
            all_new_lines = optimized_lines + extra_zero_lines
            for new_line in reversed(all_new_lines):
                modified_lines.insert(dihe_header_idx + 1, new_line)
        else:
            modified_lines.append('DIHE')
            modified_lines.extend(optimized_lines)
            modified_lines.extend(extra_zero_lines)
            modified_lines.append('')

        with open(output_file, 'w') as f:
            f.write('\n'.join(modified_lines))

        logger.info(
            f"Updated FRCMOD with MOL2-matched patterns and inserted params: {output_file}"
        )
        return output_file

    def _prepare_optimized_param_lines(self, optimized_params: str, full_pattern: str,
                                       reversed_full_pattern: str) -> List[str]:
        """Normalize optimized parameter lines to the reversed MOL2 orientation."""
        new_param_lines = []
        for line in optimized_params.strip().split('\n'):
            stripped = line.strip()
            if not stripped or stripped.upper() == 'DIHE':
                continue
            parts = line.split()
            if not parts:
                continue
            remainder = line[len(parts[0]):]
            type_field = reversed_full_pattern or full_pattern or parts[0]
            new_param_lines.append(f"{type_field}{remainder}")
        return new_param_lines

    def _extract_mol2_atom_types(self, mol2_file: str, atom_indices_1based: List[int]) -> List[str]:
        """Extract atom types for the requested 1-based atom indices from a MOL2 file."""
        index_to_type: Dict[int, str] = {}
        with open(mol2_file, 'r') as f:
            in_atom_block = False
            for line in f:
                if line.startswith('@<TRIPOS>ATOM'):
                    in_atom_block = True
                    continue
                if line.startswith('@<TRIPOS>'):
                    in_atom_block = False
                    continue
                if not in_atom_block:
                    continue
                parts = line.split()
                if len(parts) >= 6 and parts[0].isdigit():
                    atom_index = int(parts[0])
                    if atom_index in atom_indices_1based:
                        index_to_type[atom_index] = parts[5].lower()

        return [index_to_type[idx] for idx in atom_indices_1based if idx in index_to_type]

    def _get_patterns_from_mol2(self, mol2_file: str,
                                dihedral_indices: List[int]) -> Tuple[str, str, str, str]:
        """Build central and full dihedral atom-type patterns from a MOL2 file."""
        if len(dihedral_indices) != 4:
            raise ValueError("dihedral_indices must contain exactly 4 atom indices")

        indices_1based = [int(idx) + 1 for idx in dihedral_indices]
        central_indices = [indices_1based[1], indices_1based[2]]

        central_types = self._extract_mol2_atom_types(mol2_file, central_indices)
        full_types = self._extract_mol2_atom_types(mol2_file, indices_1based)

        if len(central_types) != 2 or len(full_types) != 4:
            raise ValueError(
                f"Could not extract requested atom types from MOL2 file: {mol2_file}"
            )

        central_pattern = '-'.join(central_types)
        reversed_central_pattern = '-'.join(reversed(central_types))
        full_pattern = '-'.join(full_types)
        reversed_full_pattern = '-'.join(reversed(full_types))
        return central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern

    def _get_all_central_bond_quartets(self, mol2_file: str, dihedral_indices: List[int]) -> List[str]:
        """
        Find ALL dihedral quartets around the central bond from the MOL2 file.

        Returns type patterns like 'h4-ce-c-o', 'c2-ce-c-os', etc. for every
        proper torsion that shares the central bond (atoms at indices 1 and 2
        of dihedral_indices). These must all be explicitly zeroed in the FRCMOD
        to prevent generic wildcard parameters (e.g. X-ce-c-X from gaff2.dat)
        from being applied by tLEaP.
        """
        indices_1based = [int(idx) + 1 for idx in dihedral_indices]
        central_atom2 = indices_1based[1]  # atom index of central atom 2 (1-based)
        central_atom3 = indices_1based[2]  # atom index of central atom 3 (1-based)

        # Parse MOL2: get all atom types and bonds
        atom_types_map: Dict[int, str] = {}
        bonds: List[Tuple[int, int]] = []

        with open(mol2_file, 'r') as f:
            section = None
            for line in f:
                if line.startswith('@<TRIPOS>ATOM'):
                    section = 'atom'
                    continue
                elif line.startswith('@<TRIPOS>BOND'):
                    section = 'bond'
                    continue
                elif line.startswith('@<TRIPOS>'):
                    section = None
                    continue

                if section == 'atom':
                    parts = line.split()
                    if len(parts) >= 6 and parts[0].isdigit():
                        atom_idx = int(parts[0])
                        atom_types_map[atom_idx] = parts[5].lower()
                elif section == 'bond':
                    parts = line.split()
                    if len(parts) >= 4 and parts[0].isdigit():
                        a1 = int(parts[1])
                        a2 = int(parts[2])
                        bonds.append((a1, a2))

        # Build adjacency list
        neighbors: Dict[int, List[int]] = {}
        for a1, a2 in bonds:
            neighbors.setdefault(a1, []).append(a2)
            neighbors.setdefault(a2, []).append(a1)

        # Find all atoms bonded to central_atom2 (excluding central_atom3)
        outer_left = [a for a in neighbors.get(central_atom2, []) if a != central_atom3]
        # Find all atoms bonded to central_atom3 (excluding central_atom2)
        outer_right = [a for a in neighbors.get(central_atom3, []) if a != central_atom2]

        # Generate all quartet type patterns: left-central2-central3-right
        type2 = atom_types_map.get(central_atom2, '')
        type3 = atom_types_map.get(central_atom3, '')

        all_quartets = []
        for left_atom in outer_left:
            for right_atom in outer_right:
                type_left = atom_types_map.get(left_atom, '')
                type_right = atom_types_map.get(right_atom, '')
                if type_left and type_right and type2 and type3:
                    pattern = f"{type_left}-{type2}-{type3}-{type_right}"
                    all_quartets.append(pattern)

        return all_quartets

    def _get_patterns_from_params(self, optimized_params: str) -> Tuple[str, str, str, str]:
        """Fallback pattern extraction from optimized parameter lines."""
        for line in optimized_params.strip().split('\n'):
            stripped = line.strip()
            if not stripped or stripped.upper() == 'DIHE':
                continue
            type_field = stripped.split()[0]
            atom_types = type_field.split('-')
            if len(atom_types) == 4:
                full_pattern = '-'.join(atom_types)
                reversed_full_pattern = '-'.join(reversed(atom_types))
                central_pattern = '-'.join(atom_types[1:3])
                reversed_central_pattern = '-'.join(reversed(atom_types[1:3]))
                return full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern
        return '', '', '', ''

    def _contains_central_pattern(self, line: str, pattern: str, rev_pattern: str) -> bool:
        """Check if a DIHE line contains the internal 2-atom pattern."""
        parts = line.split()
        if not parts:
            return False
        type_field = parts[0]
        elements = type_field.split('-')
        if len(elements) < 4:
            return False
        for i in range(1, len(elements) - 1):
            segment = f"{elements[i]}-{elements[i + 1]}"
            if segment == pattern or segment == rev_pattern:
                return True
        return False

    def _matches_full_pattern(self, line: str, pattern4: str, rev_pattern4: str) -> bool:
        """Check if a DIHE line matches the full 4-atom pattern."""
        parts = line.split()
        if not parts:
            return False
        type_field = parts[0]
        return type_field == pattern4 or type_field == rev_pattern4

    def _zero_out_line(self, line: str) -> str:
        """Zero a DIHE line: set PK=0, PHASE=0, PN=1.0 while keeping type and IDIVF."""
        parts = line.split()
        if len(parts) < 5:
            return line
        # parts[0]=type, parts[1]=IDIVF, parts[2]=PK, parts[3]=PHASE, parts[4]=PN
        type_field = parts[0]
        idivf = parts[1]
        # Preserve any trailing comment (e.g., SCEE, SCNB, or # comments)
        suffix = '      ' + '  '.join(parts[5:]) if len(parts) > 5 else ''
        return f"{type_field}   {idivf}    0.000       0.000           1.0{suffix}"

    def _run_tleap(self, tleap_input: str, tleap_file: str):
        """Write and run a tLEaP input file."""
        with open(tleap_file, 'w') as f:
            f.write(tleap_input)
        return subprocess.run(
            f"tleap -f {tleap_file}", shell=True, capture_output=True, text=True
        )

    def _raise_tleap_failure(self, result, expected_output: str):
        """Raise a detailed tLEaP failure with recent stdout/stderr."""
        logger.error("tLEaP failed to generate topology files.")
        logger.error(f"tLEaP stdout: {result.stdout[-500:] if result.stdout else 'empty'}")
        logger.error(f"tLEaP stderr: {result.stderr[-500:] if result.stderr else 'empty'}")
        raise RuntimeError(
            f"tLEaP failed to generate {expected_output}. Check that mol2 and frcmod files are valid.\n"
            f"tLEaP output: {result.stdout[-200:] if result.stdout else 'none'}"
        )

    def _generate_mol2_with_charges(self, mol_file, charge_method,
                                    resp_charges_file=None):
        """
        Generate MOL2 file with charges using antechamber.

        If resp_charges_file is provided, uses antechamber's -c rc -cf <file>
        mode to read pre-computed charges (e.g., from Psi4 RESP calculation)
        instead of computing them internally.
        """
        mol2_output = os.path.join(self.tleap_dir, 'ligand.mol2')
        ext = os.path.splitext(mol_file)[1].lower()
        input_format = {'.mol': 'mdl', '.pdb': 'pdb', '.mol2': 'mol2', '.sdf': 'sdf'}.get(ext, 'pdb')

        if resp_charges_file is not None:
            # Use pre-computed charges (e.g., from Psi4 RESP)
            # antechamber -c rc reads charges from file specified with -cf
            charges_path = os.path.abspath(resp_charges_file)

            # Ensure the charges file is in the format antechamber expects
            # (one charge per line, no header). Our calculate_resp_charges()
            # saves with np.savetxt which adds a # header line — strip it.
            clean_charges_path = os.path.join(self.tleap_dir, 'resp_charges_clean.dat')
            self._prepare_charges_file(charges_path, clean_charges_path)

            cmd = (
                f"antechamber -i {mol_file} -fi {input_format} "
                f"-o {mol2_output} -fo mol2 "
                f"-c rc -cf {clean_charges_path} "
                f"-at {self.force_field} -pf y"
            )
            logger.info(f"Using pre-computed RESP charges from: {resp_charges_file}")
        else:
            charge_flag = {
                'am1bcc': 'bcc', 'bcc': 'bcc',
                'resp': 'resp', 'gasteiger': 'gas'
            }.get(charge_method.lower(), 'bcc')
            cmd = (
                f"antechamber -i {mol_file} -fi {input_format} "
                f"-o {mol2_output} -fo mol2 "
                f"-c {charge_flag} -at {self.force_field} -pf y"
            )

        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if not os.path.exists(mol2_output) or os.path.getsize(mol2_output) == 0:
            error_msg = f"antechamber failed to generate MOL2 file.\nCommand: {cmd}"
            if result.stderr:
                error_msg += f"\nstderr: {result.stderr[-300:]}"
            raise RuntimeError(error_msg)
        return mol2_output

    def _prepare_charges_file(self, input_path: str, output_path: str):
        """
        Prepare a charges file for antechamber -c rc -cf mode.

        antechamber expects one charge per line with no header comments.
        Our calculate_resp_charges() uses np.savetxt which adds a '# ...' header.
        This method strips any comment lines and ensures the format is correct.
        """
        charges = []
        with open(input_path, 'r') as f:
            for line in f:
                stripped = line.strip()
                if not stripped or stripped.startswith('#'):
                    continue
                try:
                    charges.append(float(stripped))
                except ValueError:
                    continue

        if not charges:
            raise ValueError(
                f"No valid charges found in {input_path}. "
                "Expected one numeric value per line."
            )

        # Write clean charges file (one per line, no header)
        with open(output_path, 'w') as f:
            for charge in charges:
                f.write(f"{charge:.6f}\n")

        logger.info(f"Prepared {len(charges)} charges for antechamber from {input_path}")

    def _run_parmchk2(self, mol2_file, frcmod_output):
        """Run parmchk2 to generate missing parameters."""
        cmd = f"parmchk2 -i {mol2_file} -f mol2 -o {frcmod_output} -s {self.force_field}"
        subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if not os.path.exists(frcmod_output):
            raise RuntimeError("parmchk2 failed to generate FRCMOD file.")
        return frcmod_output

__init__

__init__(work_dir: str = './work', force_field: str = 'gaff2')
Source code in parametrizani/topology_gen.py
def __init__(self, work_dir: str = './work', force_field: str = 'gaff2'):
    self.work_dir = create_work_dir(work_dir)
    self.force_field = force_field
    self.tleap_dir = os.path.join(self.work_dir, 'tleap_output')
    os.makedirs(self.tleap_dir, exist_ok=True)

generate_amber

generate_amber(mol_file: str, frcmod_file: Optional[str] = None, charge_method: str = 'am1bcc', mol2_file: Optional[str] = None, output_prefix: str = 'SYS', lib_name: Optional[str] = None, use_two_stage: bool = True, resp_charges_file: Optional[str] = None) -> Dict[str, Any]

Generate AMBER topology files using tLEaP.

Parameters:

Name Type Description Default
mol_file str

Path to input MOL/PDB file.

required
frcmod_file str

Path to custom FRCMOD file (e.g., with optimized dihedral parameters).

None
charge_method str

Charge method for antechamber ('am1bcc', 'resp', 'gasteiger'). If resp_charges_file is provided, this is ignored (charges are read from file using antechamber's -c rc mode).

'am1bcc'
mol2_file str

Path to existing MOL2 file with charges. If None, generates or reuses one.

None
output_prefix str

Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.

'SYS'
lib_name str

Library filename to write/read during two-stage tLEaP execution.

None
use_two_stage bool

If True, follow the original notebook workflow with saveoff/loadoff.

True
resp_charges_file str

Path to a file containing pre-computed RESP charges (one charge per line). Typically generated by calculate_resp_charges(). When provided, antechamber uses these charges directly (-c rc -cf ) instead of computing them. This allows using Psi4-computed RESP charges in the AMBER topology.

None
Source code in parametrizani/topology_gen.py
    def generate_amber(self, mol_file: str, frcmod_file: Optional[str] = None,
                       charge_method: str = 'am1bcc', mol2_file: Optional[str] = None,
                       output_prefix: str = 'SYS', lib_name: Optional[str] = None,
                       use_two_stage: bool = True,
                       resp_charges_file: Optional[str] = None) -> Dict[str, Any]:
        """
        Generate AMBER topology files using tLEaP.

        Parameters
        ----------
        mol_file : str
            Path to input MOL/PDB file.
        frcmod_file : str, optional
            Path to custom FRCMOD file (e.g., with optimized dihedral parameters).
        charge_method : str
            Charge method for antechamber ('am1bcc', 'resp', 'gasteiger').
            If resp_charges_file is provided, this is ignored (charges are read
            from file using antechamber's -c rc mode).
        mol2_file : str, optional
            Path to existing MOL2 file with charges. If None, generates or reuses one.
        output_prefix : str
            Prefix for output files (default 'SYS'). Use 'SYS_new' for regeneration.
        lib_name : str, optional
            Library filename to write/read during two-stage tLEaP execution.
        use_two_stage : bool
            If True, follow the original notebook workflow with saveoff/loadoff.
        resp_charges_file : str, optional
            Path to a file containing pre-computed RESP charges (one charge per line).
            Typically generated by calculate_resp_charges(). When provided, antechamber
            uses these charges directly (-c rc -cf <file>) instead of computing them.
            This allows using Psi4-computed RESP charges in the AMBER topology.
        """
        if mol2_file is None:
            mol2_candidates = [
                os.path.join(self.tleap_dir, 'new.mol2'),
                os.path.join(self.tleap_dir, 'ligand.mol2'),
            ]
            for candidate in mol2_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    mol2_file = candidate
                    logger.info(f"Reusing existing MOL2: {mol2_file}")
                    break
            if mol2_file is None:
                mol2_file = self._generate_mol2_with_charges(
                    mol_file, charge_method, resp_charges_file=resp_charges_file
                )

        if frcmod_file is not None:
            frcmod_path = os.path.join(self.tleap_dir, os.path.basename(frcmod_file))
            if os.path.abspath(frcmod_file) != os.path.abspath(frcmod_path):
                shutil.copy(frcmod_file, frcmod_path)
            else:
                frcmod_path = frcmod_file
        else:
            frcmod_candidates = [
                os.path.join(self.tleap_dir, 'ligand_new.frcmod'),
                os.path.join(self.tleap_dir, 'ligand.frcmod'),
            ]
            frcmod_path = None
            for candidate in frcmod_candidates:
                if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                    frcmod_path = candidate
                    break
            if frcmod_path is None:
                frcmod_path = os.path.join(self.tleap_dir, 'ligand.frcmod')
                self._run_parmchk2(mol2_file, frcmod_path)

        lib_default = 'lig_new.lib' if output_prefix.endswith('_new') else 'lig.lib'
        if lib_name is None:
            lib_name = lib_default

        stem = os.path.splitext(os.path.basename(mol2_file))[0] or 'ligand'
        paths = {
            'prmtop': os.path.join(self.tleap_dir, f'{output_prefix}.prmtop'),
            'inpcrd': os.path.join(self.tleap_dir, f'{output_prefix}.crd'),
            'pdb': os.path.join(self.tleap_dir, f'{output_prefix}.pdb'),
            'lib': os.path.join(self.tleap_dir, lib_name),
            'mol2': mol2_file,
            'frcmod': frcmod_path,
            'ligand_pdb': os.path.join(self.tleap_dir, f'{stem}_gaff.pdb'),
            'tleap_in': os.path.join(self.tleap_dir, 'tleap.in'),
            'tleap_stage1': os.path.join(self.tleap_dir, 'tleap_stage1.in'),
            'tleap_stage2': os.path.join(self.tleap_dir, 'tleap_stage2.in'),
        }

        if use_two_stage:
            stage1_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
savepdb LIG {paths['ligand_pdb']}
quit"""
            stage1_result = self._run_tleap(stage1_input, paths['tleap_stage1'])
            if not os.path.exists(paths['lib']) or os.path.getsize(paths['lib']) == 0:
                self._raise_tleap_failure(stage1_result, paths['lib'])

            stage2_input = f"""source leaprc.protein.ff19SB
source leaprc.DNA.OL15
source leaprc.RNA.OL3
source leaprc.GLYCAM_06j-1
source leaprc.{self.force_field}
loadamberparams {paths['frcmod']}
loadoff {paths['lib']}
SYS = loadmol2 {paths['mol2']}
alignaxes SYS
check SYS
charge SYS
set SYS box {{50,50,50}}
saveamberparm SYS {paths['prmtop']} {paths['inpcrd']}
savepdb SYS {paths['pdb']}
quit"""
            result = self._run_tleap(stage2_input, paths['tleap_stage2'])
        else:
            tleap_input = f"""source leaprc.protein.ff19SB
source leaprc.{self.force_field}
LIG = loadmol2 {paths['mol2']}
loadamberparams {paths['frcmod']}
saveoff LIG {paths['lib']}
saveamberparm LIG {paths['prmtop']} {paths['inpcrd']}
savepdb LIG {paths['pdb']}
quit"""
            result = self._run_tleap(tleap_input, paths['tleap_in'])

        if not os.path.exists(paths['prmtop']) or os.path.getsize(paths['prmtop']) == 0:
            self._raise_tleap_failure(result, paths['prmtop'])

        logger.info(f"AMBER topology files generated (prefix={output_prefix})")
        return paths

generate_gromacs

generate_gromacs(prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]

Generate GROMACS topology files from AMBER topology.

Follows the original ParametrizANI notebook approach: 1. Load prmtop/inpcrd with OpenMM 2. Create the OpenMM System (which correctly encodes all parameters) 3. Use parmed.openmm.load_topology() to build the parmed structure 4. Save to GROMACS format (.top, .gro, .pdb)

This ensures all dihedral parameters (including optimized ones) are correctly transferred to the GROMACS topology.

Source code in parametrizani/topology_gen.py
def generate_gromacs(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
    """
    Generate GROMACS topology files from AMBER topology.

    Follows the original ParametrizANI notebook approach:
    1. Load prmtop/inpcrd with OpenMM
    2. Create the OpenMM System (which correctly encodes all parameters)
    3. Use parmed.openmm.load_topology() to build the parmed structure
    4. Save to GROMACS format (.top, .gro, .pdb)

    This ensures all dihedral parameters (including optimized ones) are
    correctly transferred to the GROMACS topology.
    """
    import parmed
    import openmm as mm
    from openmm.app import AmberPrmtopFile, AmberInpcrdFile
    from openmm import unit

    gmx_dir = os.path.join(self.work_dir, 'gromacs')
    os.makedirs(gmx_dir, exist_ok=True)

    # Load and create OpenMM system (matches original notebook approach)
    prmtop = AmberPrmtopFile(prmtop_file)
    inpcrd = AmberInpcrdFile(inpcrd_file)
    omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

    # Create parmed structure from OpenMM system (preserves all parameters)
    parmed_structure = parmed.openmm.load_topology(
        prmtop.topology, omm_system, inpcrd.positions
    )

    paths = {
        'top': os.path.join(gmx_dir, 'system.top'),
        'gro': os.path.join(gmx_dir, 'system.gro'),
        'pdb': os.path.join(gmx_dir, 'system.pdb'),
    }
    parmed_structure.save(paths['top'], overwrite=True)
    parmed_structure.save(paths['gro'], overwrite=True)
    parmed_structure.save(paths['pdb'], overwrite=True)
    logger.info("GROMACS topology files generated")
    return paths

generate_openmm

generate_openmm(prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]

Generate OpenMM XML system file and PDB.

Follows the original ParametrizANI notebook approach: 1. Load prmtop/inpcrd with OpenMM 2. Create the OpenMM System (constraints=None to preserve all parameters) 3. Serialize the system to XML 4. Save PDB via parmed.openmm.load_topology()

This ensures all dihedral parameters (including optimized ones) are correctly encoded in the XML system file.

Source code in parametrizani/topology_gen.py
def generate_openmm(self, prmtop_file: str, inpcrd_file: str) -> Dict[str, Any]:
    """
    Generate OpenMM XML system file and PDB.

    Follows the original ParametrizANI notebook approach:
    1. Load prmtop/inpcrd with OpenMM
    2. Create the OpenMM System (constraints=None to preserve all parameters)
    3. Serialize the system to XML
    4. Save PDB via parmed.openmm.load_topology()

    This ensures all dihedral parameters (including optimized ones) are
    correctly encoded in the XML system file.
    """
    import parmed
    import openmm as mm
    from openmm.app import AmberPrmtopFile, AmberInpcrdFile, PDBFile
    from openmm import unit, XmlSerializer

    omm_dir = os.path.join(self.work_dir, 'openmm')
    os.makedirs(omm_dir, exist_ok=True)

    prmtop = AmberPrmtopFile(prmtop_file)
    inpcrd = AmberInpcrdFile(inpcrd_file)
    omm_system = prmtop.createSystem(nonbondedCutoff=1*unit.nanometer, constraints=None)

    # Serialize the system to XML
    xml_content = XmlSerializer.serialize(omm_system)

    paths = {
        'xml': os.path.join(omm_dir, 'system.xml'),
        'pdb': os.path.join(omm_dir, 'system.pdb'),
    }
    with open(paths['xml'], 'w') as f:
        f.write(xml_content)

    # Save PDB via parmed (matches original notebook)
    parmed_structure = parmed.openmm.load_topology(
        prmtop.topology, omm_system, inpcrd.positions
    )
    parmed_structure.save(paths['pdb'], overwrite=True)

    logger.info("OpenMM system files generated")
    return paths

generate_all

generate_all(mol_file: str, frcmod_file=None, charge_method='am1bcc', formats=None, resp_charges_file=None)

Generate topology files in all requested formats.

Source code in parametrizani/topology_gen.py
def generate_all(self, mol_file: str, frcmod_file=None, charge_method='am1bcc',
                 formats=None, resp_charges_file=None):
    """Generate topology files in all requested formats."""
    if formats is None:
        formats = ['amber', 'gromacs', 'openmm']
    results = {}
    amber_result = self.generate_amber(
        mol_file, frcmod_file, charge_method,
        resp_charges_file=resp_charges_file
    )
    results['amber'] = amber_result
    if 'gromacs' in formats:
        results['gromacs'] = self.generate_gromacs(amber_result['prmtop'], amber_result['inpcrd'])
    if 'openmm' in formats:
        results['openmm'] = self.generate_openmm(amber_result['prmtop'], amber_result['inpcrd'])
    return results

update_frcmod

update_frcmod(original_frcmod: str, optimized_params: str, dihedral_indices: Optional[List[int]] = None, mol2_file: Optional[str] = None, output_file=None)

Update FRCMOD file with optimized dihedral parameters.

Follows the original ParametrizANI logic: 1. Find DIHE lines matching the central 2-atom pattern → set to zero 2. Delete DIHE lines matching the full 4-atom pattern 3. Insert new optimized parameters right after the DIHE header using the reversed orientation derived from the MOL2 atom types.

Source code in parametrizani/topology_gen.py
def update_frcmod(self, original_frcmod: str, optimized_params: str,
                  dihedral_indices: Optional[List[int]] = None,
                  mol2_file: Optional[str] = None, output_file=None):
    """
    Update FRCMOD file with optimized dihedral parameters.

    Follows the original ParametrizANI logic:
    1. Find DIHE lines matching the central 2-atom pattern → set to zero
    2. Delete DIHE lines matching the full 4-atom pattern
    3. Insert new optimized parameters right after the DIHE header using the
       reversed orientation derived from the MOL2 atom types.
    """
    if output_file is None:
        output_file = os.path.join(self.tleap_dir, 'ligand_new.frcmod')
    if os.path.abspath(original_frcmod) != os.path.abspath(output_file):
        shutil.copy(original_frcmod, output_file)

    if mol2_file is None:
        mol2_candidates = [
            os.path.join(self.tleap_dir, 'ligand.mol2'),
            os.path.join(self.tleap_dir, 'new.mol2'),
        ]
        for candidate in mol2_candidates:
            if os.path.exists(candidate) and os.path.getsize(candidate) > 0:
                mol2_file = candidate
                break

    if mol2_file and dihedral_indices is not None:
        central_pattern, reversed_central_pattern, full_pattern, reversed_full_pattern = (
            self._get_patterns_from_mol2(mol2_file, dihedral_indices)
        )
    else:
        full_pattern, reversed_full_pattern, central_pattern, reversed_central_pattern = (
            self._get_patterns_from_params(optimized_params)
        )

    with open(output_file, 'r') as f:
        content = f.read()
    content = content.replace(' -', '-')
    lines = content.split('\n')

    optimized_lines = self._prepare_optimized_param_lines(
        optimized_params,
        full_pattern=full_pattern,
        reversed_full_pattern=reversed_full_pattern,
    )

    modified_lines = []
    in_dihe_section = False
    dihe_header_idx = -1

    for line in lines:
        stripped = line.strip()

        if stripped == 'DIHE':
            in_dihe_section = True
            modified_lines.append(line)
            dihe_header_idx = len(modified_lines) - 1
            continue
        if in_dihe_section and stripped and stripped.isupper() and '-' not in stripped:
            in_dihe_section = False
            modified_lines.append(line)
            continue

        if in_dihe_section and stripped:
            if self._matches_full_pattern(line, full_pattern, reversed_full_pattern):
                continue
            if self._contains_central_pattern(line, central_pattern, reversed_central_pattern):
                modified_lines.append(self._zero_out_line(line))
            else:
                modified_lines.append(line)
        else:
            modified_lines.append(line)

    # Generate explicit zero lines for ALL quartets around the central bond
    # that aren't already in the frcmod. This prevents tLEaP from falling back
    # to generic wildcard params (e.g. X-ce-c-X) which would add unwanted
    # torsion energy on top of our optimized parameters.
    extra_zero_lines = []
    if mol2_file and dihedral_indices is not None:
        try:
            all_quartets = self._get_all_central_bond_quartets(mol2_file, dihedral_indices)
            # Collect patterns already handled (zeroed or in optimized lines)
            existing_patterns = set()
            for line in modified_lines:
                stripped = line.strip()
                if stripped and '-' in stripped:
                    parts = stripped.split()
                    if parts:
                        p = parts[0]
                        existing_patterns.add(p)
                        # Also add reversed
                        elems = p.split('-')
                        if len(elems) == 4:
                            existing_patterns.add('-'.join(reversed(elems)))
            for line in optimized_lines:
                parts = line.split()
                if parts:
                    p = parts[0]
                    existing_patterns.add(p)
                    elems = p.split('-')
                    if len(elems) == 4:
                        existing_patterns.add('-'.join(reversed(elems)))

            for quartet in all_quartets:
                rev_quartet = '-'.join(reversed(quartet.split('-')))
                if quartet not in existing_patterns and rev_quartet not in existing_patterns:
                    extra_zero_lines.append(f"{quartet}   1    0.000       0.000           1.0")
                    existing_patterns.add(quartet)
                    existing_patterns.add(rev_quartet)
        except Exception as e:
            logger.warning(f"Could not generate extra zero lines: {e}")

    if dihe_header_idx >= 0:
        # Insert optimized lines first, then extra zeros
        all_new_lines = optimized_lines + extra_zero_lines
        for new_line in reversed(all_new_lines):
            modified_lines.insert(dihe_header_idx + 1, new_line)
    else:
        modified_lines.append('DIHE')
        modified_lines.extend(optimized_lines)
        modified_lines.extend(extra_zero_lines)
        modified_lines.append('')

    with open(output_file, 'w') as f:
        f.write('\n'.join(modified_lines))

    logger.info(
        f"Updated FRCMOD with MOL2-matched patterns and inserted params: {output_file}"
    )
    return output_file

package_results

package_results(results, output_name='topology_files')

Package all generated files into a ZIP archive.

Source code in parametrizani/topology_gen.py
def package_results(self, results, output_name='topology_files'):
    """Package all generated files into a ZIP archive."""
    zip_path = os.path.join(self.work_dir, f'{output_name}.zip')
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for format_name, files in results.items():
            for file_type, file_path in files.items():
                if file_path and os.path.exists(file_path):
                    arcname = f'{format_name}/{os.path.basename(file_path)}'
                    zf.write(file_path, arcname)
    logger.info(f"Results packaged: {zip_path}")
    return zip_path