Fung-Lab · vxfung · Jan 17, 2024 · Nov 20, 2023 · Nov 25, 2023 · Jan 9, 2024
diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml
@@ -1,26 +1,9 @@
-trainer: matdeeplearn.trainers.PropertyTrainer
-
 task:
-  run_mode: train
-  identifier: my_train_job
-  parallel: False
-  # If seed is not set, then it will be random every time
-  seed: 12345678
-  # Defaults to run directory if not specified
-  save_dir: 
-  # continue from a previous job
-  continue_job: False
-  # spefcify if the training state is loaded: epochs, learning rate, etc
-  load_training_state: False
   # Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file.
-  checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt
-  # E.g. ["train", "val", "test"]
-  write_output: [train, val, test]
-  # Specify if labels are provided for the predict task
-  # labels: True
-  use_amp: True  
+  checkpoint_path: ./checkpoints/cgcnn_checkpoint.pt
 
 model:
+  # Model used by the calculator
   name: CGCNN    
   # model attributes
   dim1: 100
@@ -39,62 +22,12 @@ model:
   # Compute edge attributes on the fly in the model forward
   otf_edge_attr: True  
   # Compute node attributes on the fly in the model forward
-  otf_node_attr: True
+  otf_node_attr: False
+  model_ensemble: 1
   # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True      
   gradient: True
 
-optim:
-  max_epochs: 40
-  max_checkpoint_epochs: 0
-  lr: 0.002
-  # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
-  loss:
-    loss_type: TorchLossWrapper
-    loss_args: {loss_fn: l1_loss}
-  # gradient clipping value
-  clip_grad_norm: 10       
-  batch_size: 100
-  optimizer:
-    optimizer_type: AdamW
-    optimizer_args: {}
-  scheduler:
-    scheduler_type: ReduceLROnPlateau
-    scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002}
-  #Training print out frequency (print per n number of epochs)
-  verbosity: 5
-  # tdqm progress bar per batch in the epoch
-  batch_tqdm: False
-
-dataset:
-  name: test_data
-  # Whether the data has already been processed and a data.pt file is present from a previous run
-  processed: False
-  # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path}
-  src: data/force_data/data.json
-  # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file
-  # Example: target_path: "data/raw_graph_scalar/targets.csv"
-  target_path: 
-  # Path to save processed data.pt file
-  pt_path: data/force_data/
-  # Either "node" or "graph" level
-  prediction_level: graph
-
-  transforms:
-    - name: GetY
-      args:
-        # index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset
-        # For example, an index: 0 (default) will use the first entry in the target vector 
-        # if all values are to be predicted simultaneously, then specify index: -1
-        index: -1
-      otf_transform: True # Optional parameter, default is True
-  # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html)
-  data_format: json
-  # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress]
-  additional_attributes: 
-  # Print out processing info
-  verbose: True
-  # Index of target column in targets.csv
-  # graph specific settings
+dataset:  
   preprocess_params:
     # one of mdl (minimum image convention), ocp (all neighbors included)
     edge_calc_method: ocp 
@@ -118,13 +51,4 @@ dataset:
     self_loop: True
     # Method of obtaining atom dictionary: available: (onehot)
     node_representation: onehot    
-    all_neighbors: True 
-
-  # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html
-  num_workers: 0
-  # Where the dataset is loaded; either "cpu" or "cuda"
-  dataset_device: cpu
-  # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data)
-  train_ratio: 0.9
-  val_ratio: 0.05
-  test_ratio: 0.05
+    all_neighbors: True
diff --git a/matdeeplearn/common/ase_utils.py b/matdeeplearn/common/ase_utils.py
@@ -1,74 +1,75 @@
-import torch
+from typing import List
+import logging
+
 import numpy as np
+import torch
 import yaml
 from ase import Atoms
 from ase.geometry import Cell
 from ase.calculators.calculator import Calculator
-from matdeeplearn.preprocessor.helpers import generate_node_features
 from torch_geometric.data.data import Data
 from torch_geometric.loader import DataLoader
-import logging
-from typing import List
+
 from matdeeplearn.common.registry import registry
+from matdeeplearn.models.base_model import BaseModel
+from matdeeplearn.preprocessor.helpers import generate_node_features
 
 
 logging.basicConfig(level=logging.INFO)
 
 
 class MDLCalculator(Calculator):
+    """
+    A neural networked based Calculator that calculates the energy, forces and stress of a crystal structure.
+    """
     implemented_properties = ["energy", "forces", "stress"]
 
-    def __init__(self, config):
+    def __init__(self, config, rank='cuda:0'):
         """
         Initialize the MDLCalculator instance.
 
         Args:
-            config (str or dict): Configuration settings for the MDLCalculator.
+        - config (str or dict): Configuration settings for the MDLCalculator.
+        - rank (str): Rank of device the calculator calculates properties. Defaults to 'cuda:0'
 
         Raises:
-            AssertionError: If the trainer name is not in the correct format or if the trainer class is not found.
+        - AssertionError: If the trainer name is not in the correct format or if the trainer class is not found.
         """
         Calculator.__init__(self)
+
         if isinstance(config, str):
+            logging.info(f'MDLCalculator instantiated from config: {config}')
             with open(config, "r") as yaml_file:
                 config = yaml.safe_load(yaml_file)
+        elif isinstance(config, dict):
+            logging.info('MDLCalculator instantiated from a dictionary.')
+        else:
+            raise NotImplementedError('Unsupported config type.')
 
         gradient = config["model"].get("gradient", False)
         otf_edge_index = config["model"].get("otf_edge_index", False)
         otf_edge_attr = config["model"].get("otf_edge_attr", False)
         self.otf_node_attr = config["model"].get("otf_node_attr", False)
         assert otf_edge_index and otf_edge_attr and gradient, "To use this calculator to calculate forces and stress, you should set otf_edge_index, oft_edge_attr and gradient to True."
 
-        trainer_name = config.get("trainer", "matdeeplearn.trainers.PropertyTrainer")
-        assert trainer_name.count(".") >= 1, "Trainer name should be in format {module}.{trainer_name}, like matdeeplearn.trainers.PropertyTrainer"
-
-        trainer_cls = registry.get_trainer_class(trainer_name)
-        load_state = config['task'].get('checkpoint_path', None)
-        assert trainer_cls is not None, "Trainer not found"
-        self.trainer = trainer_cls.from_config(config)
-
-        try:
-            self.trainer.load_checkpoint()
-        except ValueError:
-            logging.warning("No checkpoint.pt file is found, and an untrained model is used for prediction.")
-
+        self.device = rank if torch.cuda.is_available() else 'cpu'
+        self.models = MDLCalculator._load_model(config, self.device)
         self.n_neighbors = config['dataset']['preprocess_params'].get('n_neighbors', 250)
-        self.device = 'cpu'
 
-    def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None):
+    def calculate(self, atoms: Atoms, properties=implemented_properties, system_changes=None) -> None:
         """
         Calculate energy, forces, and stress for a given ase.Atoms object.
 
         Args:
-            atoms (ase.Atoms): The atomic structure for which calculations are to be performed.
-            properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress'].
-            system_changes: Not supported in the current implementation.
+        - atoms (ase.Atoms): The atomic structure for which calculations are to be performed.
+        - properties (list): List of properties to calculate. Defaults to ['energy', 'forces', 'stress'].
+        - system_changes: Not supported in the current implementation.
 
         Returns:
-            None: The results are stored in the instance variable 'self.results'.
+        - None: The results are stored in the instance variable 'self.results'.
 
         Note:
-            This method performs energy, forces, and stress calculations using a neural network-based calculator.
+        - This method performs energy, forces, and stress calculations using a neural network-based calculator.
             The results are stored in the instance variable 'self.results' as 'energy', 'forces', and 'stress'.
         """
         Calculator.calculate(self, atoms, properties, system_changes)
@@ -87,11 +88,20 @@ def calculate(self, atoms: Atoms, properties=implemented_properties, system_chan
 
         data_list = [data]
         loader = DataLoader(data_list, batch_size=1)
+        loader_iter = iter(loader)
+        batch = next(loader_iter).to(self.device)
+
+        out_list = []
+        for model in self.models:      
+            out_list.append(model(batch))
 
-        out = self.trainer.predict_by_calculator(loader)
-        self.results['energy'] = out['energy']
-        self.results['forces'] = out['forces']
-        self.results['stress'] = out['stress']
+        energy = torch.stack([entry["output"] for entry in out_list]).mean(dim=0)
+        forces = torch.stack([entry["pos_grad"] for entry in out_list]).mean(dim=0)
+        stresses = torch.stack([entry["cell_grad"] for entry in out_list]).mean(dim=0)
+
+        self.results['energy'] = energy.detach().cpu().numpy()
+        self.results['forces'] = forces.detach().cpu().numpy()
+        self.results['stress'] = stresses.squeeze().detach().cpu().numpy()
 
     @staticmethod
     def data_to_atoms_list(data: Data) -> List[Atoms]:
@@ -101,11 +111,11 @@ def data_to_atoms_list(data: Data) -> List[Atoms]:
         with its associated properties such as positions and cell.
 
         Args:
-            data (Data): A data object containing information about atomic structures.
+        - data (Data): A data object containing information about atomic structures.
 
         Returns:
-            List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure
-                         with positions and associated properties.
+        - List[Atoms]: A list of 'ase.Atoms' objects, each representing an atomic structure
+            with positions and associated properties.
         """
         cells = data.cell.numpy()
 
@@ -120,3 +130,55 @@ def data_to_atoms_list(data: Data) -> List[Atoms]:
         for i in range(len(data.structure_id)):
             atoms_list[i].structure_id = data.structure_id[i][0]
         return atoms_list
+
+    @staticmethod
+    def _load_model(config: dict, rank: str) -> List[BaseModel]:
+        """
+        This static method loads a model based on the provided configuration.
+
+        Parameters:
+        - config (dict): Configuration dictionary containing model and dataset parameters.
+        - rank: Rank information for distributed training.
+
+        Returns:
+        - model_list: A list of loaded models.
+        """
+
+        graph_config = config['dataset']['preprocess_params']
+        model_config = config['model']
+
+        model_list = []
+        model_name = 'matdeeplearn.models.' + model_config["name"]
+        logging.info(f'MDLCalculator: setting up {model_name} for calculation')
+        # Obtain node, edge, and output dimensions for model initialization   
+        for _ in range(model_config["model_ensemble"]): 
+            node_dim = graph_config["node_dim"]
+            edge_dim = graph_config["edge_dim"]   
+
+            model_cls = registry.get_model_class(model_name)
+            model = model_cls(
+                    node_dim=node_dim, 
+                    edge_dim=edge_dim, 
+                    output_dim=1, 
+                    cutoff_radius=graph_config["cutoff_radius"], 
+                    n_neighbors=graph_config["n_neighbors"], 
+                    graph_method=graph_config["edge_calc_method"], 
+                    num_offsets=graph_config["num_offsets"], 
+                    **model_config
+                    )
+            model = model.to(rank)
+            model_list.append(model)
+
+        checkpoints = config['task']["checkpoint_path"].split(',')
+        if len(checkpoints) == 0:
+            logging.warning("MDLCalculator: No checkpoint.pt file is found, and untrained models are used for prediction.")
+        else:
+            for i in range(len(checkpoints)):
+                try:
+                    checkpoint = torch.load(checkpoints[i])
+                    model_list[i].load_state_dict(checkpoint["state_dict"])
+                    logging.info(f'MDLCalculator: weights for model No.{i+1} loaded from {checkpoints[i]}')
+                except ValueError:
+                    logging.warning(f"MDLCalculator: No checkpoint.pt file is found for model No.{i+1}, and an untrained model is used for prediction.")
+
+        return model_list