diff --git a/interfaces/Multiwfn_interface/molden.py b/interfaces/Multiwfn_interface/molden.py new file mode 100644 index 00000000000..7149898e90e --- /dev/null +++ b/interfaces/Multiwfn_interface/molden.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Generate ABACUS Molden files with [Nval] metadata for Multiwfn. + +Keep this script in the Multiwfn_interface directory so it can locate +tools/molden/molden.py by relative path. Users may add this directory to PATH +and run chmod +x on this file to use it as a command. +""" + +from pathlib import Path +import argparse +import importlib.util +import os + +DEFAULT_FOLDER = "$(pwd)" + + +def load_molden_module(): + """Load tools/molden/molden.py without requiring it to be installed.""" + molden_path = Path(__file__).resolve().parents[2] / "tools" / "molden" / "molden.py" + spec = importlib.util.spec_from_file_location("abacus_molden", molden_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def write_molden_nval(kinds, zvals): + """Write Multiwfn-oriented Molden [Nval] values per element.""" + out = "[Nval]\n" + for elem, zval in zip(kinds, zvals): + out += f"{elem} {zval:g}\n" + return out + + +def make_molden_nval_section(molden, folder): + """Build [Nval] from UPF z_valence without adding this logic to tools/molden.""" + folder = os.path.abspath(folder) + kv = molden.read_abacus_input(os.path.join(folder, "INPUT")) + stru = molden.read_abacus_stru(os.path.join(folder, kv.get("stru_file", "STRU"))) + pseudo_dir = kv.get("pseudo_dir", "./") + if not os.path.isabs(pseudo_dir): + pseudo_dir = os.path.join(folder, pseudo_dir) + kinds = [spec["symbol"] for spec in stru["species"]] + fpps = [os.path.abspath(os.path.join(pseudo_dir, spec["pp_file"])) for spec in stru["species"]] + zvals = [molden.read_upf_z_valence(fpp) for fpp in fpps] + return write_molden_nval(kinds, zvals) + + +def insert_molden_nval(text, nval): + """Insert [Nval] after [Atoms] and before [GTO].""" + marker = "[GTO]\n" + index = text.find(marker) + if index < 0: + raise ValueError("Cannot insert [Nval]: [GTO] section was not found") + return text[:index] + nval + text[index:] + + +def moldengen(folder, ndigits=3, ngto=7, rel_r="2", fmolden="ABACUS_Multiwfn.molden", write_cell=True, with_nval=True, with_pseudo=False): + """Call tools/molden/molden.py with Multiwfn-oriented [Nval] metadata.""" + molden = load_molden_module() + pseudo = "pseudo" if with_pseudo else "none" + out = molden.moldengen(folder, ndigits, ngto, rel_r, fmolden, write_cell, pseudo) + if with_nval: + out = insert_molden_nval(out, make_molden_nval_section(molden, folder)) + with open(fmolden, "w") as file: + file.write(out) + return out + + + +def _argparse(): + def str_to_bool(value): + if isinstance(value, bool): + return value + value = value.lower() + if value in ("true", "t", "yes", "y", "1"): + return True + if value in ("false", "f", "no", "n", "0"): + return False + raise argparse.ArgumentTypeError("expected true or false") + + parser = argparse.ArgumentParser( + description="Generate Molden file from ABACUS LCAO calculation for Multiwfn", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-f", "--folder", type=str, default=DEFAULT_FOLDER, help="the folder of the ABACUS calculation") + parser.add_argument("-n", "--ndigits", type=int, default=3, help="the number of digits for the MO coefficients") + parser.add_argument("-g", "--ngto", type=int, default=7, help="the number of GTOs to fit ABACUS NAOs") + parser.add_argument("-r", "--rel_r", type=str, default="2", help="the relative cutoff radius for the GTOs; comma-separated values enable multi-start fitting") + parser.add_argument("--with-cell", type=str_to_bool, default=True, help="whether to write the Molden [Cell] section") + parser.add_argument("--with-Nval", dest="with_nval", type=str_to_bool, default=True, help="whether to write the Molden [Nval] section") + parser.add_argument("--with-pseudo", type=str_to_bool, default=False, help="whether to write the Molden [Pseudo] section") + parser.add_argument("-o", "--output", type=str, default="ABACUS_Multiwfn.molden", help="the output Molden file name") + args = parser.parse_args() + if args.folder == DEFAULT_FOLDER: + args.folder = os.getcwd() + return args + + +if __name__ == "__main__": + args = _argparse() + moldengen(args.folder, args.ndigits, args.ngto, args.rel_r, args.output, args.with_cell, args.with_nval, args.with_pseudo) diff --git a/tools/molden/molden.py b/tools/molden/molden.py index 9908c6e7ce7..c789f201e2d 100644 --- a/tools/molden/molden.py +++ b/tools/molden/molden.py @@ -1,3 +1,10 @@ +#!/usr/bin/env python3 +''' +This script is implemented referring to Molden file standard: +https://www.theochem.ru.nl/molden/molden_format.html +https://zorkzou.github.io/Molden2AIM/readme.html +''' + #################### # Impl. of NAO2GTO # #################### @@ -500,14 +507,14 @@ def write_molden_gto(total_gto: GTORadials, labels: list): out += total_gto.molden(l, iat + 1) # iat starts from 0, molden starts from 1 return out -def read_abacus_lowf(flowf, pat=r'^WFC_NAO_(K\d+|GAMMA(1|2)).txt$'): - """Read the ABACUS WFC_NAO_(K|GAMMA) file, which contains the coefficients of MOs. +def read_abacus_lowf(flowf, pat=r'^(WFC_NAO_(K\d+|GAMMA\d+)(_ION\d+)?|wf((k\d+)?(s\d+)?|(s\d+)?(k\d+)?)(g\d+)?_nao)\.txt$'): + """Read the ABACUS LCAO wavefunction file, which contains the coefficients of MOs. Please note, only Gamma-only calculation is supported. For ABACUS version earlier than 3.7.1, the `flowf` file is something like LOWF_K_. Manually change the file name and run is okay. Args: - flowf (str): the file name of the ABACUS WFC_NAO_(K|GAMMA) file + flowf (str): the file name of the ABACUS LCAO wavefunction file pat (str): the pattern to match the file name Returns: @@ -549,7 +556,8 @@ def read_abacus_lowf(flowf, pat=r'^WFC_NAO_(K\d+|GAMMA(1|2)).txt$'): i += 1 # check if the data we collected has the correct number of elements - if "WFC_NAO_K" in flowf: # multi-k case, the coef will be complex + basename = os.path.basename(flowf) + if "WFC_NAO_K" in basename or re.match(r"^wf(k\d+|s\d+k\d+)", basename): # multi-k case, the coef will be complex data = [complex(data[i], data[i+1]) for i in range(0, len(data), 2)] data = [d.real for d in data] data = np.array(data).reshape(nbands, nlocal) @@ -560,6 +568,106 @@ def read_abacus_lowf(flowf, pat=r'^WFC_NAO_(K\d+|GAMMA(1|2)).txt$'): return nbands, nlocal, occ, band, ener, data +def find_abacus_lowf_files(nspin): + """Find ABACUS LCAO wavefunction files in OUT.*. + + The converter keeps compatibility with older WFC_NAO_* names and newer + wf*_nao.txt names. The nspin=4 spinor/SOC format is not mapped to Molden + because the current writer only emits restricted/Alpha/Beta MOs. K-style + file selection uses the first k point when several k-point files are found. + """ + import glob + import os + import re + + if nspin == 4: + raise NotImplementedError("nspin=4/SOC spinor wavefunctions are not supported by this Molden writer") + if nspin not in (1, 2): + raise NotImplementedError(f"Unsupported nspin={nspin}") + + def newest(pattern): + def key(path): + nums = [int(x) for x in re.findall(r"\d+", os.path.basename(path))] + return nums, path + matches = sorted(glob.glob(pattern), key=key) + return matches[-1] if matches else None + + def old_k_file(spin): + """Select the first-k old WFC_NAO_K* file for a spin channel.""" + files_by_k = {} + for path in glob.glob("WFC_NAO_K*.txt"): + match = re.match(r"^WFC_NAO_K(\d+)(?:_ION(\d+))?\.txt$", os.path.basename(path)) + if match is None: + continue + ik = int(match.group(1)) + ion = int(match.group(2) or 0) + if ik not in files_by_k or (ion, path) > files_by_k[ik][0]: + files_by_k[ik] = ((ion, path), path) + k_indices = sorted(files_by_k) + if not k_indices: + return None + if nspin == 1: + target = k_indices[0] + else: + nk = len(k_indices) // 2 + if nk == 0: + return None + target = k_indices[0] if spin == 1 else k_indices[nk] + return files_by_k.get(target, (None, None))[1] + + if nspin == 1: + pattern_groups = [[ + "WFC_NAO_GAMMA1.txt", + "WFC_NAO_GAMMA1_ION*.txt", + "OLD_WFC_NAO_K_SPIN1", + "wf_nao.txt", + "wfk1_nao.txt", + "WFC/wfg*_nao.txt", + "WFC/wfk1g*_nao.txt", + ]] + else: + pattern_groups = [ + [ + "WFC_NAO_GAMMA1.txt", + "WFC_NAO_GAMMA1_ION*.txt", + "OLD_WFC_NAO_K_SPIN1", + "wfs1_nao.txt", + "wfs1k1_nao.txt", + "wfk1s1_nao.txt", + "WFC/wfs1g*_nao.txt", + "WFC/wfs1k1g*_nao.txt", + "WFC/wfk1s1g*_nao.txt", + ], + [ + "WFC_NAO_GAMMA2.txt", + "WFC_NAO_GAMMA2_ION*.txt", + "OLD_WFC_NAO_K_SPIN2", + "wfs2_nao.txt", + "wfs2k1_nao.txt", + "wfk1s2_nao.txt", + "WFC/wfs2g*_nao.txt", + "WFC/wfs2k1g*_nao.txt", + "WFC/wfk1s2g*_nao.txt", + ], + ] + + files = [] + for patterns in pattern_groups: + found = None + for pattern in patterns: + if pattern == "OLD_WFC_NAO_K_SPIN1": + found = old_k_file(1) + elif pattern == "OLD_WFC_NAO_K_SPIN2": + found = old_k_file(2) + else: + found = newest(pattern) + if found is not None: + break + if found is None: + raise FileNotFoundError(f"Cannot find ABACUS LCAO wavefunction file. Tried: {', '.join(patterns)}") + files.append(found) + return files + def write_molden_mo(coefs, mo_map, occ, ener, spin=0, ndigits=3): """Write Molden [MO] section from the coefficients, occupations and energies. @@ -848,20 +956,24 @@ def read_upf_z_valence(fupf): return int(round(zval)) return zval -def write_molden_nval(kinds, nvals): - """Write Molden [Nval] effective valence charges per element.""" - out = "[Nval]\n" - for elem, nval in zip(kinds, nvals): - out += f"{elem} {nval:g}\n" +def write_molden_pseudo(kinds, labels_kinds_map, zvals): + """Write Molden [Pseudo] effective nuclear charges per atom.""" + out = "[Pseudo]\n" + for i, itype in enumerate(labels_kinds_map): + out += f"{kinds[itype]} {i + 1} {zvals[itype]:g}\n" return out -def write_molden_nval_from_stru(stru, pseudo_dir): - """Build [Nval] from STRU species and their UPF z_valence values.""" +def write_molden_pseudopotential_from_stru(stru, pseudo_dir, labels_kinds_map, pseudo="pseudo"): + """Build optional pseudopotential-related sections from UPF z_valence values.""" import os + if pseudo == "none": + return "" kinds = [spec['symbol'] for spec in stru['species']] fpps = [os.path.abspath(os.path.join(pseudo_dir, spec['pp_file'])) for spec in stru["species"]] - nvals = [read_upf_z_valence(fpp) for fpp in fpps] - return write_molden_nval(kinds, nvals) + zvals = [read_upf_z_valence(fpp) for fpp in fpps] + if pseudo == "pseudo": + return write_molden_pseudo(kinds, labels_kinds_map, zvals) + raise ValueError(f"Unknown Molden pseudopotential mode {pseudo}") def read_abacus_input(finput): """Read the ABACUS input file and return the key-value pairs @@ -961,7 +1073,7 @@ def indexing_mo(total_gto: GTORadials, labels: list): i += 2*l+1 return out -def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", write_nval=False): +def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", write_cell=False, pseudo="none"): """Entrance function: generate molden file by reading the outdir of ABACUS, for only LCAO calculation. @@ -976,9 +1088,16 @@ def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", import os import numpy as np + folder = os.path.abspath(folder) files = os.listdir(folder) assert ("STRU" in files) and ("INPUT" in files) and ("KPT" in files),\ "STRU, INPUT, KPT files are required in the folder." + input_file = os.path.join(folder, "INPUT") + print(f"MOLDEN: Processing ABACUS calculation directory {folder}") + kv = read_abacus_input(input_file) + nspin = int(kv.get("nspin", 1)) + if nspin == 4: + raise NotImplementedError("nspin=4/SOC spinor wavefunctions are not supported by this Molden writer") cwd = os.path.abspath(os.getcwd()) os.chdir(folder) #################### @@ -989,12 +1108,10 @@ def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", #################### # write the cell # #################### - kv = read_abacus_input("INPUT") _ = read_abacus_kpt(kv.get("kpoint_file", "KPT")) stru = read_abacus_stru(kv.get("stru_file", "STRU")) - out += write_molden_cell(stru['lat']['const'], stru['lat']['vec']) - if write_nval: - out += write_molden_nval_from_stru(stru, kv.get("pseudo_dir", "./")) + if write_cell: + out += write_molden_cell(stru['lat']['const'], stru['lat']['vec']) #################### # write the atoms # @@ -1019,6 +1136,7 @@ def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", else: raise NotImplementedError(f"Unknown coordinate type {stru['coord_type']}") out += write_molden_atoms(labels, kinds, labels_kinds_map, coords.tolist()) + out += write_molden_pseudopotential_from_stru(stru, kv.get("pseudo_dir", "./"), labels_kinds_map, pseudo) #################### # write the basis # @@ -1039,17 +1157,18 @@ def moldengen(folder: str, ndigits=3, ngto=7, rel_r=2, fmolden="ABACUS.molden", #################### suffix = kv.get("suffix", "ABACUS") out_dir = ".".join(["OUT", suffix]) + print(f"MOLDEN: Parsing ABACUS output directory {os.path.abspath(out_dir)}") os.chdir(out_dir) - nspin = int(kv.get("nspin", 1)) - mo_files = "WFC_NAO_GAMMA1.txt" - mo_files = "WFC_NAO_K1.txt" if mo_files not in os.listdir() else mo_files - mo_files = [mo_files] if nspin == 1 else [mo_files, mo_files.replace("1.txt", "2.txt")] + mo_files = find_abacus_lowf_files(nspin) for ispin, mo_file in enumerate(mo_files): + print(f"MOLDEN: Parsing wavefunction file {os.path.abspath(mo_file)}") nbands, nlocal, occ, band, ener, data_np = read_abacus_lowf(mo_file) coefs = data_np.tolist() out += write_molden_mo(coefs, mo_map, occ, ener, spin=ispin, ndigits=ndigits) os.chdir(cwd) + fmolden = os.path.abspath(fmolden) + print(f"MOLDEN: Writing Molden file {fmolden}") with open(fmolden, "w") as file: file.write(out) return out @@ -1392,7 +1511,8 @@ def _argparse(): -n, --ndigits: the number of digits for the MO coefficients. For MO coefficients smaller than 10^-n, they will be set to 0. -g, --ngto: the number of GTOs to fit ABACUS NAOs. -r, --rel_r: the relative cutoff radius for the GTOs. - --write-nval: write the Molden [Nval] section from UPF z_valence. + --with-cell: write the Molden [Cell] section. + --with-pseudo: write the Molden [Pseudo] section from UPF z_valence. -o, --output: the output Molden file name. """ import argparse @@ -1409,7 +1529,8 @@ def _argparse(): parser.add_argument("-n", "--ndigits", type=int, default=3, help="the number of digits for the MO coefficients") parser.add_argument("-g", "--ngto", type=int, default=7, help="the number of GTOs to fit ABACUS NAOs") parser.add_argument("-r", "--rel_r", type=str, default="2", help="the relative cutoff radius for the GTOs; comma-separated values enable multi-start fitting") - parser.add_argument("--write-nval", action="store_true", help="write the Molden [Nval] section from UPF z_valence") + parser.add_argument("--with-cell", action="store_true", help="write the Molden [Cell] section") + parser.add_argument("--with-pseudo", action="store_true", help="write the Molden [Pseudo] section from UPF z_valence") parser.add_argument("-o", "--output", type=str, default="ABACUS.molden", help="the output Molden file name") args = parser.parse_args() return args @@ -1417,7 +1538,8 @@ def _argparse(): if __name__ == "__main__": #unittest.main(exit=False) args = _argparse() - moldengen(args.folder, args.ndigits, args.ngto, args.rel_r, args.output, args.write_nval) + pseudo = "pseudo" if args.with_pseudo else "none" + moldengen(args.folder, args.ndigits, args.ngto, args.rel_r, args.output, args.with_cell, pseudo) print(" ".join("*"*10).center(80, " ")) print(f"""MOLDEN: Generated Molden file {args.output} from ABACUS calculation in folder {args.folder}. WARNING: use at your own risk because the NAO2GTO will not always conserve the shape of radial function, therefore