Skip to content

Commit

Permalink
Merge pull request #44 from robinzyb/devel
Browse files Browse the repository at this point in the history
enhance for parsing restart md files
  • Loading branch information
robinzyb committed Apr 12, 2024
2 parents 1120602 + 54d93c0 commit b8c241b
Show file tree
Hide file tree
Showing 18 changed files with 30,076 additions and 29 deletions.
2 changes: 2 additions & 0 deletions cp2kdata/block_parser/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def parse_all_md_cells(output_file: List[str],
# convert bohr to angstrom
cell[:3] = cell[:3] * au2A
# make sure cell length are in angstrom and cell angles are in degree before sent to cellpar_to_cell
#TODO: replace this cellpar_to_cell with more accurate functions in the future
cell = cellpar_to_cell(cell)
all_md_cells.append(cell)
else:
Expand All @@ -130,6 +131,7 @@ def parse_all_md_cells(output_file: List[str],
# convert bohr to angstrom
cell[:3] = cell[:3] * au2A
# make sure cell length are in angstrom and cell angles are in degree before sent to cellpar_to_cell
#TODO: replace this cellpar_to_cell with more accurate functions in the future
cell = cellpar_to_cell(cell)
all_md_cells.append(cell)

Expand Down
6 changes: 4 additions & 2 deletions cp2kdata/dpdata_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def from_labeled_system(self, file_name, **kwargs):
@Format.register("cp2k/aimd_output")
@Format.register("cp2kdata/md")
class CP2KMDFormat(Format):
def from_labeled_system(self, file_name, **kwargs):
def from_labeled_system(self, file_name, restart: bool=None, **kwargs):

# -- Set Basic Parameters --
path_prefix = file_name # in cp2k md, file_name is directory name.
Expand All @@ -84,7 +84,9 @@ def from_labeled_system(self, file_name, **kwargs):
print(WRAPPER)

cp2kmd = Cp2kOutput(output_file=cp2k_output_name,
run_type="MD", path_prefix=path_prefix)
run_type="MD",
path_prefix=path_prefix,
restart=restart)

num_frames = cp2kmd.get_num_frames()

Expand Down
49 changes: 26 additions & 23 deletions cp2kdata/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,21 @@ def parse_md(self):
"------------------\n"
)

WARNING_MSG = "cp2kdata obtains more than one initial cell from the output file, \
please check if your output file has duplicated header information."

cell_file_list = glob.glob(os.path.join(self.path_prefix, "*.cell"))
if (self.md_info.ensemble_type == "NVT") or \
(self.md_info.ensemble_type == "NVE") or \
(self.md_info.ensemble_type == "REFTRAJ"):
(self.md_info.ensemble_type == "REFTRAJ"): # not ture REFTRAJ also contrains different cell?
if cell_file_list:
self.all_cells = parse_md_cell(cell_file_list[0])
elif self.filename:
format_logger(info="Cells", filename=self.filename)
print(WARNING_MSG_PARSE_CELL_FROM_OUTPUT)

#self.organize_md_cell()
# parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), WARNING_MSG
assert first_cell.shape == (1, 3, 3)
self.all_cells = first_cell
self.all_cells = np.repeat(
self.all_cells, repeats=self.num_frames, axis=0)
Expand All @@ -466,37 +466,40 @@ def parse_md(self):
elif self.filename:
format_logger(info="Cells", filename=self.filename)
print(WARNING_MSG_PARSE_CELL_FROM_OUTPUT)
# only parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), WARNING_MSG
# parse the rest of the cells
self.all_cells = parse_all_md_cells(self.output_file,
cp2k_info=self.cp2k_info)
# prepend the first cell
self.all_cells = np.insert(
self.all_cells, 0, first_cell[0], axis=0)

self.organize_md_cell()

elif (self.md_info.ensemble_type == "NPT_I"):
if cell_file_list:
self.all_cells = parse_md_cell(cell_file_list[0])
elif self.filename:
format_logger(info="Cells", filename=self.filename)
print(WARNING_MSG_PARSE_CELL_FROM_OUTPUT)
# only parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), WARNING_MSG
# parse the rest of the cells
self.all_cells = parse_all_md_cells(self.output_file,
cp2k_info=self.cp2k_info,
init_cell_info=first_cell[0])
# prepend the first cell
self.all_cells = np.insert(
self.all_cells, 0, first_cell[0], axis=0)

self.organize_md_cell()

self.init_atomic_coordinates, self.atom_kind_list, self.chemical_symbols = parse_init_atomic_coordinates(
self.output_file)
self.atomic_kind = parse_atomic_kinds(self.output_file)

def organize_md_cell(self):
# whether reserve the first cell is determined by the restart

WARNING_MSG = "cp2kdata obtains more than one initial cell from the output file, \
please check if your output file has duplicated header information."

# only parse the first cell
first_cell = parse_all_cells(self.output_file)
assert first_cell.shape == (1, 3, 3), WARNING_MSG
# parse the rest of the cells
self.all_cells = parse_all_md_cells(self.output_file,
cp2k_info=self.cp2k_info,
init_cell_info=first_cell[0])
# prepend the first cell
if self.cp2k_info.restart is not True:
self.all_cells = np.insert(
self.all_cells, 0, first_cell[0], axis=0)

@staticmethod
def get_global_info(run_type=None, filename=None):
if filename:
Expand Down
45 changes: 44 additions & 1 deletion docs/dpdata_plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

For instructions on how to use `dpdata`, please refer to the official repository: https://github.com/deepmodeling/dpdata.

In the following, we provide two exmples that demonstrate how to use `Cp2kData` with `dpdata` to parse data from CP2K simulations in specified formats.

Currently, `CP2KData` supports two formats for use with `dpdata`:

1. `cp2kdata/e_f` format for parsing `ENERGY_FORCE` outputs.
Expand Down Expand Up @@ -93,4 +95,45 @@ Currently, `CP2KData` supports two formats for use with `dpdata`:
&END MOTION
```

These examples demonstrate how to use `Cp2kData` with `dpdata` to parse and work with data from CP2K simulations in the specified formats.

In some cases, cp2k md simulations are restarted from `-1.restart` file in which the initial structure will not be evaluated again.
Therefore, the initial cell information should not be parsed again. Otherwise, the number of frames for cells is inconsistent with those for `poses`, `forces`, and `energies`.
Cp2kdata can automatically check whether the simulations are restarted or not through the header information of output:
```
*******************************************************************************
* RESTART INFORMATION *
*******************************************************************************
* *
* RESTART FILE NAME: bivo4-water-1.restart *
* *
* RESTARTED QUANTITIES: *
* CELL *
* COORDINATES *
* RANDOM NUMBER GENERATOR *
* VELOCITIES *
* MD COUNTERS *
* MD AVERAGES *
* PARTICLE THERMOSTAT *
* REAL TIME PROPAGATION *
* PINT BEAD POSITIONS *
* PINT BEAD VELOCITIES *
* PINT NOSE THERMOSTAT *
* PINT GLE THERMOSTAT *
* HELIUM BEAD POSITIONS *
* HELIUM PERMUTATION STATE *
* HELIUM FORCES ON SOLUTE *
* HELIUM RNG STATE *
*******************************************************************************
```
if the simulations are restarted using:
```cp2k
&EXT_RESTART
RESTART_FILE_NAME Li-LiFSI-DME-1-2-1.restart
&END EXT_RESTART
```
In case your restarted output doesn't have the above header, you can explicitly tell the cp2kdata/dpdata by setting `restart=True`,
```python
# restart = True in case the output doesn't contains header
dp = dpdata.LabeledSystem(cp2kmd_dir, cp2k_output_name=cp2kmd_output_name, fmt="cp2kdata/md", restart=True)
```

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "Cp2kData"
version = "0.6.6"
version = "0.6.7"
description = "A Small Package to Postprocess Cp2k Output"
authors = [
{name = "Yongbin Zhuang", email = "[email protected]"}
Expand Down
5 changes: 3 additions & 2 deletions tests/test_dpdata/test_labeledsys.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"tests/test_dpdata/v2022.1/aimd",
"tests/test_dpdata/v2022.2/aimd_npt_i",
"tests/test_dpdata/v2023.1/aimd_nvt",
"tests/test_dpdata/v2023.1/aimd_npt_f"
"tests/test_dpdata/v2023.1/aimd_npt_f",
"tests/test_dpdata/v2024.1/aimd_npt_i_restart"
]

e_f_dpdata_list = [
Expand Down Expand Up @@ -97,7 +98,7 @@ def test_cell(self, cp2k_and_ref):
if not cp2k_and_ref[0].nopbc and not cp2k_and_ref[1].nopbc:
np.testing.assert_almost_equal(cp2k_and_ref[0].data['cells'],
cp2k_and_ref[1].data['cells'],
decimal = 6,
decimal = 4,
err_msg = 'cell failed')

def test_coord(self, cp2k_and_ref):
Expand Down
Binary file modified tests/test_dpdata/v2023.1/aimd_npt_f/deepmd/set.000/box.npy
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Step Nr. Time[fs] Kin.[a.u.] Temp[K] Pot.[a.u.] Cons Qty[a.u.] UsedTime[s]
6 6.000000 3.618561712 709.941006762 -8580.312466062 -8576.705940797 1856.756846905
7 7.000000 3.774142332 740.465030228 -8580.480831882 -8576.708048669 160.384975910
8 8.000000 3.861779396 757.658918417 -8580.592783729 -8576.709931201 119.853107214
9 9.000000 3.897660921 764.698667254 -8580.627119159 -8576.708212095 120.193082809
10 10.000000 3.863355283 757.968098316 -8580.607578460 -8576.707023605 120.599007130
Loading

0 comments on commit b8c241b

Please sign in to comment.