Fung-Lab · saraheisenach · Feb 5, 2023 · Dec 19, 2022 · Dec 30, 2022 · Jan 1, 2023
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+core.python.*
 
 # C extensions
 *.so
@@ -163,18 +164,23 @@ dmypy.json
 # mac
 .DS_Store
 
-# data
-data/**
-
 # config
 ./config/*
 
 # results
-results/**
+**/*/results/
 
 server/
 
-main.py
-
+# tests
+testing/*
 test*.py
 test*.ipynb
+
+checkpoints/
+
+# misc
+.flake8
+.pylintrc
+**/wandb/
+*.out
diff --git a/README.md b/README.md
@@ -11,8 +11,9 @@
       ```
       conda-merge env.common.yaml env.gpu.yaml > env.yaml
       conda env create -f env.yaml
+      conda activate matdeeplearn
       ```
-   
+
    2. CPU-only machines:
 
       1. M1 Macs (see https://github.com/pyg-team/pytorch_geometric/issues/4549):
@@ -27,6 +28,7 @@
          ```
          conda-merge env.common.yaml env.cpu.yaml > env.yaml
          conda env create -f env.yaml
+         conda activate matdeeplearn
          ```
 
 3. Install package:

diff --git a/configs/config.yml b/configs/config.yml
@@ -1,33 +1,28 @@
-
 trainer: property
 
 task:
   # run_mode: train
   identifier: "my_train_job"
-
   reprocess: False
-
-
   parallel: True
+  # seed=0 means random initalization
   seed: 0
-  #seed=0 means random initalization
-
-
+  # Defaults to run directory if not specified
+  # save_dir: "."
+  # checkpoint_dir: "."
   write_output: True
   parallel: True
   #Training print out frequency (print per n number of epochs)
   verbosity: 5
 
-
-
 model:
   name: CGCNN
   load_model: False
   save_model: True
   model_path: "my_model.pth"
   edge_steps: 50
   self_loop: True
-  #model attributes
+  # model attributes
   dim1: 100
   dim2: 150
   pre_fc_count: 1
@@ -42,8 +37,9 @@ model:
 
 optim:
   max_epochs: 250
+  max_checkpoint_epochs: 0
   lr: 0.002
-  #Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
+  # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
   loss:
     loss_type: "TorchLossWrapper"
     loss_args: {"loss_fn": "l1_loss"}
@@ -57,33 +53,31 @@ optim:
     scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
 
 dataset:
-  processed: False # if False, need to preprocessor data and generate .pt file
-  # Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
-  # dataset_type: "inmemory"
-  #Path to data files
+  processed: False
+  # Path to data files
   src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_npj/raw/"
-  #Path to target file within data_path
+  # Path to target file within data_path
   target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_npj/targets.csv"
-  #Path to save processed data.pt file
+  # Path to save processed data.pt file
   pt_path: "/global/homes/s/shuyijia/datasets/MP_data_npj/"
-  #Format of data files (limit to those supported by ASE)
+  transforms:
+    - name: GetY
+      args:
+        index: 0
+      otf: False # Optional parameter, default is False
+  # Format of data files (limit to those supported by ASE)
   data_format: "json"
-  #Method of obtaining atom idctionary: available:(onehot)
+  # Method of obtaining atom idctionary: available:(onehot)
   node_representation: "onehot"
   additional_attributes: []
-  #Print out processing info
+  # Print out processing info
   verbose: True
-
-  #Loading dataset params
-  #Index of target column in targets.csv
-  target_index: 0
-
-  #graph specific settings
+  # Index of target column in targets.csv
+  # graph specific settings
   cutoff_radius : 8.0
   n_neighbors : 12
   edge_steps : 50
-
-  #Ratios for train/val/test split out of a total of 1
+  # Ratios for train/val/test split out of a total of 1
   train_ratio: 0.8
   val_ratio: 0.05
   test_ratio: 0.15
diff --git a/configs/examples/config_alignn.yml b/configs/examples/config_alignn.yml
@@ -0,0 +1,92 @@
+trainer: property
+
+task:
+  identifier: "alignn_train_100"
+  reprocess: False
+  parallel: True
+  seed: 0
+  save_dir: "."
+  checkpoint_dir: "."
+  write_output: True
+  parallel: True
+  # Training print out frequency (print per n number of epochs)
+  verbosity: 1
+
+
+model:
+  name: ALIGNN
+  load_model: False
+  save_model: True
+  model_path: "alignn_model.pth"
+  alignn_layers: 4
+  gcn_layers: 4
+  atom_input_features: 114
+  edge_input_features: 50
+  triplet_input_features: 40
+  embedding_features: 64
+  hidden_features: 256
+  output_features: 1
+  min_edge_distance: 0.0
+  max_edge_distance: 8.0
+  link: "identity"
+
+optim:
+  max_epochs: 100
+  lr: 0.001
+  # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
+  loss:
+    loss_type: "TorchLossWrapper"
+    loss_args: {"loss_fn": "mse_loss"}
+
+  batch_size: 64
+
+  optimizer:
+    optimizer_type: "AdamW"
+    optimizer_args: {"weight_decay": 0.00001}
+  scheduler:
+    scheduler_type: "OneCycleLR"
+    # Look further into steps per epoch, for now hardcoded calculation from paper
+    scheduler_args: {"max_lr": 0.001, "epochs": 300, "steps_per_epoch": 1}
+
+dataset:
+  processed: False
+  # Path to data files
+  # src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/raw/"
+  src: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/raw/"
+  # Path to target file within data_path
+  # target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/targets.csv"
+  target_path: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/targets.csv"
+  # Path to save processed data.pt file (a directory path not filepath)
+  # pt_path: "/global/cfs/projectdirs/m3641/Sidharth/datasets/MP_data_69K/"
+  pt_path: "/storage/home/hhive1/sbaskaran31/scratch/MP_data_69K/"
+  transforms:
+    - name: GetY
+      args:
+        index: 0
+      otf: False
+    - name: NumNodeTransform
+      args:
+      otf: False
+    - name: LineGraphMod
+      args:
+      otf: False
+    - name: ToFloat
+      args:
+      otf: False
+  # Format of data files (limit to those supported by ASE)
+  data_format: "json"
+  # Method of obtaining atom idctionary: available:(onehot)
+  node_representation: "onehot"
+  additional_attributes: []
+  # Print out processing info
+  verbose: True
+  # Loading dataset params
+  # Index of target column in targets.csv
+  # graph specific settings
+  cutoff_radius : 8.0
+  n_neighbors : 12
+  edge_steps : 50
+  # Ratios for train/val/test split out of a total of 1
+  train_ratio: 0.8
+  val_ratio: 0.05
+  test_ratio: 0.15
diff --git a/configs/examples/config_graphite.yml b/configs/examples/config_graphite.yml
@@ -0,0 +1,98 @@
+
+trainer: property
+
+task:
+  # run_mode: train
+  identifier: "alignn_train_100"
+
+  reprocess: False
+
+
+  parallel: True
+  seed: 0
+  #seed=0 means random initalization
+
+
+  write_output: True
+  parallel: True
+  #Training print out frequency (print per n number of epochs)
+  verbosity: 1
+
+
+
+model:
+  name: ALIGNN_GRAPHITE
+  load_model: False
+  save_model: True
+  model_path: "alignn_graphite_model.pth"
+  num_interactions: 4
+  num_species: 3
+  cutoff: 3.0
+  dim: 64
+  # min_angle: float = 0.0,
+  # max_angle: float = torch.acos(torch.zeros(1)).item() * 2,
+  link: "identity"
+
+optim:
+  max_epochs: 103
+  lr: 0.001
+  #Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
+  loss:
+    loss_type: "TorchLossWrapper"
+    loss_args: {"loss_fn": "mse_loss"}
+
+  batch_size: 64
+
+  optimizer:
+    optimizer_type: "AdamW"
+    optimizer_args: {"weight_decay": 0.00001}
+  scheduler:
+    scheduler_type: "OneCycleLR"
+    # Look further into steps per epoch, for now hardcoded calculation from paper
+    scheduler_args: {"max_lr": 0.001, "epochs": 300, "steps_per_epoch": 1}
+
+dataset:
+  processed: True # if False, need to preprocessor data and generate .pt file
+  # Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
+  # dataset_type: "inmemory"
+  #Path to data files
+  src: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/raw/"
+  #Path to target file within data_path
+  target_path: "/global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_69K/targets.csv"
+  #Path to save processed data.pt file (a directory path not filepath)
+  pt_path: "/global/cfs/projectdirs/m3641/Sidharth/datasets/MP_data_69K/"
+  transforms:
+    - name: GetY
+      args:
+        index: 0
+      otf: False
+    - name: NumNodeTransform
+      args:
+      otf: False
+    - name: LineGraphMod
+      args:
+      otf: False
+    - name: ToFloat
+      args:
+      otf: False
+  #Format of data files (limit to those supported by ASE)
+  data_format: "json"
+  #Method of obtaining atom idctionary: available:(onehot)
+  node_representation: "onehot"
+  additional_attributes: []
+  #Print out processing info
+  verbose: True
+
+  #Loading dataset params
+  #Index of target column in targets.csv
+  target_index: 0
+
+  #graph specific settings
+  cutoff_radius : 8.0
+  n_neighbors : 12
+  edge_steps : 50
+
+  #Ratios for train/val/test split out of a total of 1
+  train_ratio: 0.8
+  val_ratio: 0.05
+  test_ratio: 0.15
diff --git a/env.common.yaml b/env.common.yaml
@@ -11,6 +11,6 @@ dependencies:
     - pre-commit
     - numpy
     - scipy
-    - ase=3.21.*
+    - ase==3.21.*
     - black
     - pandas
diff --git a/matdeeplearn/common/config/build_config.py b/matdeeplearn/common/config/build_config.py
@@ -87,9 +87,9 @@ def create_dict_from_args(args: list, sep: str = "."):
 
 def build_config(args, args_override):
     # Open provided config file
-    assert os.path.exists(args.config_path), (
-        "Config file not found in " + args.config_path
-    )
+    assert os.path.exists(
+        args.config_path
+    ), f"Config file not found in {str(args.config_path)}"
     with open(args.config_path, "r") as ymlfile:
         config = yaml.load(ymlfile, Loader=yaml.FullLoader)