From e651d903bb2b30e9d17aa6b0b468b1ff20dfaf32 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 11 Sep 2024 13:23:33 +0800 Subject: [PATCH 1/8] docstring tweak and support Path --- src/pymatgen/io/vasp/outputs.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 6b7f9f5272b..e0a3030a80c 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4262,29 +4262,30 @@ class VaspParseError(ParseError): def get_band_structure_from_vasp_multiple_branches( - dir_name: str, + dir_name: PathLike, efermi: float | None = None, projections: bool = False, ) -> BandStructureSymmLine | BandStructure | None: """Get band structure info from a VASP directory. - It takes into account that a run can be divided in several branches named - "branch_x". If the run has not been divided in branches the method will - turn to parsing vasprun.xml directly. + It takes into account that a run can be divided in several branches, + each inside a directory named "branch_x". If the run has not been + divided in branches the function will turn to parse vasprun.xml + directly from the selected directory. Args: - dir_name: Directory containing all bandstructure runs. - efermi: Efermi for bandstructure. - projections: True if you want to get the data on site projections if - any. Note that this is sometimes very large + dir_name (PathLike): Parent directory containing all bandstructure runs. + efermi (float): Fermi level for bandstructure. + projections (bool): True if you want to get the data on site + projections if any. Note that this is sometimes very large Returns: - A BandStructure Object. + A BandStructure/BandStructureSymmLine Object. None is there's a parsing error. """ - # TODO: Add better error handling!!! + # TODO: Add better error handling if os.path.isfile(f"{dir_name}/branch_0"): - # Get all branch dir names + # Get all branch directory names branch_dir_names = [os.path.abspath(d) for d in glob(f"{dir_name}/branch_*") if os.path.isdir(d)] # Sort by the directory name (e.g, branch_10) @@ -4299,7 +4300,7 @@ def get_band_structure_from_vasp_multiple_branches( branches.append(run.get_band_structure(efermi=efermi)) else: # TODO: It might be better to throw an exception - warnings.warn(f"Skipping {dname}. Unable to find {xml_file}") + warnings.warn(f"Skipping {dname}. Unable to find {xml_file}", stacklevel=2) return get_reconstructed_band_structure(branches, efermi) From e524dfd29692f72a699971fdf6d81309d0d3180d Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 11 Sep 2024 13:24:54 +0800 Subject: [PATCH 2/8] isfile -> isdir fix --- src/pymatgen/io/vasp/outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index e0a3030a80c..cc7a1ff1b67 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4284,7 +4284,7 @@ def get_band_structure_from_vasp_multiple_branches( None is there's a parsing error. """ # TODO: Add better error handling - if os.path.isfile(f"{dir_name}/branch_0"): + if os.path.isdir(f"{dir_name}/branch_0"): # Get all branch directory names branch_dir_names = [os.path.abspath(d) for d in glob(f"{dir_name}/branch_*") if os.path.isdir(d)] From 0a03a914982969c265cac5ac1862f8ceb5a6163d Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 11 Sep 2024 13:35:56 +0800 Subject: [PATCH 3/8] raise error if vasprun.xml missing in any branch dir --- src/pymatgen/io/vasp/outputs.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index cc7a1ff1b67..6aa5b47df16 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4281,31 +4281,27 @@ def get_band_structure_from_vasp_multiple_branches( Returns: A BandStructure/BandStructureSymmLine Object. - None is there's a parsing error. + None if there is a parsing error. """ - # TODO: Add better error handling if os.path.isdir(f"{dir_name}/branch_0"): - # Get all branch directory names + # Get and sort all branch directories branch_dir_names = [os.path.abspath(d) for d in glob(f"{dir_name}/branch_*") if os.path.isdir(d)] - - # Sort by the directory name (e.g, branch_10) sorted_branch_dir_names = sorted(branch_dir_names, key=lambda x: int(x.split("_")[-1])) - # Populate branches with Bandstructure instances - branches = [] - for dname in sorted_branch_dir_names: - xml_file = f"{dname}/vasprun.xml" - if os.path.isfile(xml_file): - run = Vasprun(xml_file, parse_projected_eigen=projections) - branches.append(run.get_band_structure(efermi=efermi)) - else: - # TODO: It might be better to throw an exception - warnings.warn(f"Skipping {dname}. Unable to find {xml_file}", stacklevel=2) + # Collect BandStructure from all branches + bs_branches: list[BandStructure | BandStructureSymmLine] = [] + for directory in sorted_branch_dir_names: + xml_file = f"{directory}/vasprun.xml" + if not os.path.isfile(xml_file): + raise FileNotFoundError(f"cannot find vasprun.xml in {directory=}") + + run = Vasprun(xml_file, parse_projected_eigen=projections) + bs_branches.append(run.get_band_structure(efermi=efermi)) - return get_reconstructed_band_structure(branches, efermi) + return get_reconstructed_band_structure(bs_branches, efermi) + # Read vasprun.xml directly if no branch head (branch_0) is found xml_file = f"{dir_name}/vasprun.xml" - # Better handling of Errors if os.path.isfile(xml_file): return Vasprun(xml_file, parse_projected_eigen=projections).get_band_structure( kpoints_filename=None, efermi=efermi From c134361c10bd3eb3e49452349dfe8f2d1cf67eb7 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 11 Sep 2024 17:47:03 +0800 Subject: [PATCH 4/8] sort pymatgen core imports --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index adcb6cd125d..e1f2b45ba4c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -178,7 +178,7 @@ perform further structure manipulation or analyses. Here are some quick examples of the core capabilities and objects: ```python -from pymatgen.core import Element, Composition, Lattice, Structure, Molecule +from pymatgen.core import Composition, Element, Lattice, Molecule, Structure # Integrated symmetry analysis tools from spglib from pymatgen.symmetry.analyzer import SpacegroupAnalyzer From ad7fac32b78617328a2222e554240e5b259d6069 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 11 Sep 2024 18:19:44 +0800 Subject: [PATCH 5/8] add some unit test --- src/pymatgen/io/vasp/outputs.py | 16 +++++++++------- tests/io/vasp/test_outputs.py | 34 ++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 6aa5b47df16..84173cac25a 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4281,7 +4281,7 @@ def get_band_structure_from_vasp_multiple_branches( Returns: A BandStructure/BandStructureSymmLine Object. - None if there is a parsing error. + None if no vasprun.xml found in given directory and branch directory. """ if os.path.isdir(f"{dir_name}/branch_0"): # Get and sort all branch directories @@ -4291,22 +4291,24 @@ def get_band_structure_from_vasp_multiple_branches( # Collect BandStructure from all branches bs_branches: list[BandStructure | BandStructureSymmLine] = [] for directory in sorted_branch_dir_names: - xml_file = f"{directory}/vasprun.xml" - if not os.path.isfile(xml_file): + vasprun_file = f"{directory}/vasprun.xml" + if not os.path.isfile(vasprun_file): raise FileNotFoundError(f"cannot find vasprun.xml in {directory=}") - run = Vasprun(xml_file, parse_projected_eigen=projections) + run = Vasprun(vasprun_file, parse_projected_eigen=projections) bs_branches.append(run.get_band_structure(efermi=efermi)) return get_reconstructed_band_structure(bs_branches, efermi) # Read vasprun.xml directly if no branch head (branch_0) is found - xml_file = f"{dir_name}/vasprun.xml" - if os.path.isfile(xml_file): - return Vasprun(xml_file, parse_projected_eigen=projections).get_band_structure( + vasprun_file = f"{dir_name}/vasprun.xml" + if os.path.isfile(vasprun_file): + warnings.warn(f"no branch found, reading directly from {dir_name=}", stacklevel=2) + return Vasprun(vasprun_file, parse_projected_eigen=projections).get_band_structure( kpoints_filename=None, efermi=efermi ) + warnings.warn(f"failed to find any vasprun.xml file in selected {dir_name=}", stacklevel=2) return None diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index dd6decacab2..317023be0d9 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -12,13 +12,15 @@ import numpy as np import pytest from monty.io import zopen +from monty.shutil import decompress_file +from monty.tempfile import ScratchDir from numpy.testing import assert_allclose from pytest import approx from pymatgen.core import Element from pymatgen.core.lattice import Lattice from pymatgen.core.structure import Structure -from pymatgen.electronic_structure.bandstructure import BandStructureSymmLine +from pymatgen.electronic_structure.bandstructure import BandStructure, BandStructureSymmLine from pymatgen.electronic_structure.core import Magmom, Orbital, OrbitalType, Spin from pymatgen.entries.compatibility import MaterialsProjectCompatibility from pymatgen.io.vasp.inputs import Incar, Kpoints, Poscar, Potcar @@ -39,6 +41,7 @@ Wavecar, Waveder, Xdatcar, + get_band_structure_from_vasp_multiple_branches, ) from pymatgen.io.wannier90 import Unk from pymatgen.util.testing import FAKE_POTCAR_DIR, TEST_FILES_DIR, VASP_IN_DIR, VASP_OUT_DIR, PymatgenTest @@ -1429,13 +1432,38 @@ def test_init(self): assert len(oszicar.electronic_steps) == len(oszicar.ionic_steps) assert len(oszicar.all_energies) == 60 assert oszicar.final_energy == approx(-526.63928) - assert set(oszicar.ionic_steps[-1]) == set({"F", "E0", "dE", "mag"}) + assert set(oszicar.ionic_steps[-1]) == {"F", "E0", "dE", "mag"} def test_static(self): fpath = f"{TEST_DIR}/fixtures/static_silicon/OSZICAR" oszicar = Oszicar(fpath) assert oszicar.final_energy == approx(-10.645278) - assert set(oszicar.ionic_steps[-1]) == set({"F", "E0", "dE", "mag"}) + assert set(oszicar.ionic_steps[-1]) == {"F", "E0", "dE", "mag"} + + +class TestGetBandStructureFromVaspMultipleBranches: + def test_read_multi_branches(self): + pass + + def test_missing_vasprun_in_branch_dir(self): + """Test vasprun.xml missing from branch_*.""" + + def test_no_branch_head(self): + """Test branch_0 is missing and read dir_name/vasprun.xml directly.""" + with ScratchDir("."): + copyfile(f"{VASP_OUT_DIR}/vasprun.force_hybrid_like_calc.xml.gz", "./vasprun.xml.gz") + decompress_file("./vasprun.xml.gz") + + with pytest.warns(match="no branch found, reading directly from"): + bs = get_band_structure_from_vasp_multiple_branches(".") + assert isinstance(bs, BandStructure) + + def test_cannot_read_anything(self): + """Test no branch_0/, no dir_name/vasprun.xml, nothing.""" + with pytest.warns(match="failed to find any vasprun.xml file in selected"): + with ScratchDir("."): + bs = get_band_structure_from_vasp_multiple_branches(".") + assert bs is None class TestLocpot(PymatgenTest): From f8ab30e361388df3a5c7e0be36d9cbffd6dd3f28 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Thu, 12 Sep 2024 19:12:36 +0800 Subject: [PATCH 6/8] raise error if no vasprun.xml file found at all --- src/pymatgen/io/vasp/outputs.py | 3 +-- tests/io/vasp/test_outputs.py | 8 +++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 84173cac25a..eeeea927b00 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4308,8 +4308,7 @@ def get_band_structure_from_vasp_multiple_branches( kpoints_filename=None, efermi=efermi ) - warnings.warn(f"failed to find any vasprun.xml file in selected {dir_name=}", stacklevel=2) - return None + raise FileNotFoundError(f"failed to find any vasprun.xml in selected {dir_name=}") class Xdatcar: diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 317023be0d9..7ee660c21f1 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1459,11 +1459,9 @@ def test_no_branch_head(self): assert isinstance(bs, BandStructure) def test_cannot_read_anything(self): - """Test no branch_0/, no dir_name/vasprun.xml, nothing.""" - with pytest.warns(match="failed to find any vasprun.xml file in selected"): - with ScratchDir("."): - bs = get_band_structure_from_vasp_multiple_branches(".") - assert bs is None + """Test no branch_0/, no dir_name/vasprun.xml, no vasprun.xml at all.""" + with pytest.raises(FileNotFoundError, match="failed to find any vasprun.xml in selected"), ScratchDir("."): + get_band_structure_from_vasp_multiple_branches(".") class TestLocpot(PymatgenTest): From 5dfd2d3a6e8df37ae0199d6cc31fb9b17c04b3c2 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 14 Sep 2024 16:29:36 +0800 Subject: [PATCH 7/8] deprecation warning for fall back branch --- src/pymatgen/io/vasp/outputs.py | 11 ++++++++++- tests/io/vasp/test_outputs.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index eeeea927b00..ef1b4a43e61 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -4301,9 +4301,18 @@ def get_band_structure_from_vasp_multiple_branches( return get_reconstructed_band_structure(bs_branches, efermi) # Read vasprun.xml directly if no branch head (branch_0) is found + # TODO: remove this branch and raise error directly after 2025-09-14 vasprun_file = f"{dir_name}/vasprun.xml" if os.path.isfile(vasprun_file): - warnings.warn(f"no branch found, reading directly from {dir_name=}", stacklevel=2) + warnings.warn( + ( + f"no branch dir found, reading directly from {dir_name=}\n" + "this fallback branch would be removed after 2025-09-14\n" + "please check your data dir or use Vasprun.get_band_structure directly" + ), + DeprecationWarning, + stacklevel=2, + ) return Vasprun(vasprun_file, parse_projected_eigen=projections).get_band_structure( kpoints_filename=None, efermi=efermi ) diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 7ee660c21f1..e971b307ae3 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1454,7 +1454,7 @@ def test_no_branch_head(self): copyfile(f"{VASP_OUT_DIR}/vasprun.force_hybrid_like_calc.xml.gz", "./vasprun.xml.gz") decompress_file("./vasprun.xml.gz") - with pytest.warns(match="no branch found, reading directly from"): + with pytest.warns(DeprecationWarning, match="no branch dir found, reading directly from"): bs = get_band_structure_from_vasp_multiple_branches(".") assert isinstance(bs, BandStructure) From fb74f612f70e2e15dc596f977427d1a98db6a6af Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Wed, 18 Sep 2024 21:40:44 +0800 Subject: [PATCH 8/8] remove unused ignore tag --- src/pymatgen/electronic_structure/bandstructure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/electronic_structure/bandstructure.py b/src/pymatgen/electronic_structure/bandstructure.py index 7eafa9cbc84..4d5fbc0b42d 100644 --- a/src/pymatgen/electronic_structure/bandstructure.py +++ b/src/pymatgen/electronic_structure/bandstructure.py @@ -1071,7 +1071,7 @@ def get_projections_on_elements_and_orbitals( @overload -def get_reconstructed_band_structure( # type: ignore[overload-overlap] +def get_reconstructed_band_structure( list_bs: list[BandStructure], efermi: float | None = None, ) -> BandStructure: