utterwqlnut · utterwqlnut · Oct 1, 2023 · Sep 5, 2023 · Sep 6, 2023 · Sep 11, 2023
diff --git a/configs/config.yml b/configs/config.yml
@@ -14,10 +14,15 @@ task:
   load_training_state: False
   # Path to the checkpoint.pt file
   checkpoint_path: 
-  # E.g. ["train", "val", "test"]
+  # Whether to write predictions to csv file. E.g. ["train", "val", "test"]
   write_output: [train, val, test]
+  # Frequency of writing to file; 0 denotes writing only at the end, 1 denotes writing every time
+  output_frequency: 0
+  # Frequency of saving model .pt file; 0 denotes saving only at the end, 1 denotes saving every time, -1 denotes never saving; this controls both checkpoint and best_checkpoint
+  model_save_frequency: 0
   # Specify if labels are provided for the predict task
   # labels: True
+  # Use amp mixed precision
   use_amp: True  
 
 model:
@@ -34,9 +39,13 @@ model:
   batch_track_stats: True
   act: relu
   dropout_rate: 0.0
-  # Compute edge features on the fly
-  otf_edge: False 
-  # compute gradients w.r.t to positions and cell, requires otf_edge=True  
+  # Compute edge indices on the fly in the model forward
+  otf_edge_index: False 
+  # Compute edge attributes on the fly in the model forward
+  otf_edge_attr: False  
+  # Compute node attributes on the fly in the model forward
+  otf_node_attr: False
+  # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True      
   gradient: False
 
 optim:
@@ -47,8 +56,8 @@ optim:
   loss:
     loss_type: TorchLossWrapper
     loss_args: {loss_fn: l1_loss}
-  clip_grad_norm: 10
-
+  # gradient clipping value
+  clip_grad_norm: 10       
   batch_size: 100
   optimizer:
     optimizer_type: AdamW
@@ -63,6 +72,7 @@ optim:
 
 dataset:
   name: test_data
+  # Whether the data has already been processed and a data.pt file is present from a previous run
   processed: False
   # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path}
   src: data/test_data/data_graph_scalar.json
@@ -71,7 +81,7 @@ dataset:
   target_path: 
   # Path to save processed data.pt file
   pt_path: data/
-  # Either "node" or "graph"
+  # Either "node" or "graph" level
   prediction_level: graph
 
   transforms:
@@ -81,12 +91,11 @@ dataset:
         # For example, an index: 0 (default) will use the first entry in the target vector 
         # if all values are to be predicted simultaneously, then specify index: -1
         index: -1
-      otf: True # Optional parameter, default is False
+      otf_transform: True # Optional parameter, default is True
   # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html)
   data_format: json
-  # E.g. additional_attributes: [forces, stress]
+  # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress]
   additional_attributes: 
-  #additional_attributes:
   # Print out processing info
   verbose: True
   # Index of target column in targets.csv
@@ -99,16 +108,26 @@ dataset:
     # determine if edge attributes are computed during processing, if false, then they need to be computed on the fly   
     preprocess_edge_features: True
     # determine if node attributes are computed during processing, if false, then they need to be computed on the fly   
-    preprocess_nodes: True
+    preprocess_node_features: True
+    # distance cutoff to determine if two atoms are connected by an edge
     cutoff_radius : 8.0
+    # maximum number of neighbors to consider (usually an arbitrarily high number to consider all neighbors)
     n_neighbors : 250
+    # number of pbc offsets to consider when determining neighbors (usually not changed)
     num_offsets: 2
-    edge_steps : 50
+    # dimension of node attributes
+    node_dim : 100
+    # dimension of edge attributes
+    edge_dim : 50
+    # whether or not to add self-loops
     self_loop: True
     # Method of obtaining atom dictionary: available: (onehot)
     node_representation: onehot    
-    all_neighbors: True
-  # Ratios for train/val/test split out of a total of less than 1
+  # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html
+  num_workers: 0
+  # Where the dataset is loaded; either "cpu" or "cuda"
+  dataset_device: cpu
+  # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data)
   train_ratio: 0.8
   val_ratio: 0.05
-  test_ratio: 0.15
+  test_ratio: 0.15
diff --git a/configs/config_calculator.yml b/configs/config_calculator.yml
@@ -0,0 +1,130 @@
+trainer: matdeeplearn.trainers.PropertyTrainer
+
+task:
+  run_mode: train
+  identifier: my_train_job
+  parallel: False
+  # If seed is not set, then it will be random every time
+  seed: 12345678
+  # Defaults to run directory if not specified
+  save_dir: 
+  # continue from a previous job
+  continue_job: False
+  # spefcify if the training state is loaded: epochs, learning rate, etc
+  load_training_state: False
+  # Path to the checkpoint.pt file. The model used in the calculator will load parameters from this file.
+  checkpoint_path: results/2023-09-20-16-22-38-738-my_train_job/checkpoint/best_checkpoint.pt
+  # E.g. ["train", "val", "test"]
+  write_output: [train, val, test]
+  # Specify if labels are provided for the predict task
+  # labels: True
+  use_amp: True  
+
+model:
+  name: CGCNN    
+  # model attributes
+  dim1: 100
+  dim2: 150
+  pre_fc_count: 1
+  gc_count: 4
+  post_fc_count: 3
+  pool: global_add_pool
+  pool_order: early
+  batch_norm: False
+  batch_track_stats: True
+  act: silu
+  dropout_rate: 0.0
+  # Compute edge indices on the fly in the model forward
+  otf_edge_index: True 
+  # Compute edge attributes on the fly in the model forward
+  otf_edge_attr: True  
+  # Compute node attributes on the fly in the model forward
+  otf_node_attr: True
+  # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True      
+  gradient: True
+
+optim:
+  max_epochs: 40
+  max_checkpoint_epochs: 0
+  lr: 0.002
+  # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
+  loss:
+    loss_type: TorchLossWrapper
+    loss_args: {loss_fn: l1_loss}
+  # gradient clipping value
+  clip_grad_norm: 10       
+  batch_size: 100
+  optimizer:
+    optimizer_type: AdamW
+    optimizer_args: {}
+  scheduler:
+    scheduler_type: ReduceLROnPlateau
+    scheduler_args: {mode: min, factor: 0.8, patience: 10, min_lr: 0.00001, threshold: 0.0002}
+  #Training print out frequency (print per n number of epochs)
+  verbosity: 5
+  # tdqm progress bar per batch in the epoch
+  batch_tqdm: False
+
+dataset:
+  name: test_data
+  # Whether the data has already been processed and a data.pt file is present from a previous run
+  processed: False
+  # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path}
+  src: data/force_data/data.json
+  # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file
+  # Example: target_path: "data/raw_graph_scalar/targets.csv"
+  target_path: 
+  # Path to save processed data.pt file
+  pt_path: data/force_data/
+  # Either "node" or "graph" level
+  prediction_level: graph
+
+  transforms:
+    - name: GetY
+      args:
+        # index specifies the index of a target vector to predict, which is useful when there are multiple property labels for a single dataset
+        # For example, an index: 0 (default) will use the first entry in the target vector 
+        # if all values are to be predicted simultaneously, then specify index: -1
+        index: -1
+      otf_transform: True # Optional parameter, default is True
+  # Format of data files (limit to those supported by ASE: https://wiki.fysik.dtu.dk/ase/ase/io/io.html)
+  data_format: json
+  # specify if additional attributes to be loaded into the dataset from the .json file; e.g. additional_attributes: [forces, stress]
+  additional_attributes: 
+  # Print out processing info
+  verbose: True
+  # Index of target column in targets.csv
+  # graph specific settings
+  preprocess_params:
+    # one of mdl (minimum image convention), ocp (all neighbors included)
+    edge_calc_method: ocp 
+    # determine if edges are computed, if false, then they need to be computed on the fly   
+    preprocess_edges: False
+    # determine if edge attributes are computed during processing, if false, then they need to be computed on the fly   
+    preprocess_edge_features: False
+    # determine if node attributes are computed during processing, if false, then they need to be computed on the fly   
+    preprocess_node_features: False
+    # distance cutoff to determine if two atoms are connected by an edge
+    cutoff_radius : 8.0
+    # maximum number of neighbors to consider (usually an arbitrarily high number to consider all neighbors)
+    n_neighbors : 250
+    # number of pbc offsets to consider when determining neighbors (usually not changed)
+    num_offsets: 2
+    # dimension of node attributes
+    node_dim : 100
+    # dimension of edge attributes
+    edge_dim : 50
+    # whether or not to add self-loops
+    self_loop: True
+    # Method of obtaining atom dictionary: available: (onehot)
+    node_representation: onehot    
+    all_neighbors: True 
+
+  # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html
+  num_workers: 0
+  # Where the dataset is loaded; either "cpu" or "cuda"
+  dataset_device: cpu
+  # Ratios for train/val/test split out of a total of less than 1 (0.8 corresponds to 80% of the data)
+  train_ratio: 0.9
+  val_ratio: 0.05
+  test_ratio: 0.05
diff --git a/configs/config_forces.yml b/configs/config_forces.yml
@@ -16,6 +16,10 @@ task:
   checkpoint_path: 
   # E.g. [train, val, test]
   write_output: [val, test]
+  # Frequency of writing to file; 0 denotes writing only at the end, 1 denotes writing every time
+  output_frequency: 1
+  # Frequency of saving model .pt file; 0 denotes saving only at the end, 1 denotes saving every time, -1 denotes never saving; this controls both checkpoint and best_checkpoint
+  model_save_frequency: 1
   # Specify if labels are provided for the predict task
   # labels: True
   use_amp: False  
@@ -34,15 +38,19 @@ model:
   batch_track_stats: True
   act: silu
   dropout_rate: 0.0
-  # Compute edge features on the fly
-  otf_edge: True 
-  # compute gradients w.r.t to positions and cell, requires otf_edge=True  
+  # Compute edge indices on the fly in the model forward
+  otf_edge_index: True 
+  # Compute edge attributes on the fly in the model forward
+  otf_edge_attr: True 
+  # Compute node attributes on the fly in the model forward
+  otf_node_attr: False
+  # compute gradients w.r.t to positions and cell, requires otf_edge_attr=True      
   gradient: True
 
 optim:
-  max_epochs: 40
+  max_epochs: 400
   max_checkpoint_epochs: 0
-  lr: 0.002
+  lr: 0.001
   # Either custom or from torch.nn.functional library. If from torch, loss_type is TorchLossWrapper
   loss:
     #loss_type: "TorchLossWrapper"
@@ -69,12 +77,12 @@ dataset:
   name: test_data
   processed: False
   # Path to data files - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path, predict: predict_path}
-  src: /global/cfs/projectdirs/m3641/Shared/Materials_datasets/MP_data_forces/raw/data.json
+  src: data/force_data/data.json
   # Path to target file within data_path - this can either be in the form of a string denoting a single path or a dictionary of {train: train_path, val: val_path, test: test_path} or left blank when the dataset is a single json file
   # Example: target_path: "data/test_data/raw_graph_scalar/targets.csv"
   target_path: 
   # Path to save processed data.pt file
-  pt_path: data/
+  pt_path: data/force_data/
   # Either "node" or "graph"
   prediction_level: graph
 
@@ -103,18 +111,24 @@ dataset:
     # determine if edge attributes are computed during processing, if false, then they need to be computed on the fly   
     preprocess_edge_features: False
     # determine if node attributes are computed during processing, if false, then they need to be computed on the fly   
-    preprocess_nodes: True
+    preprocess_node_features: True
     cutoff_radius : 8.0
     n_neighbors : 250
     num_offsets: 2
-    edge_steps : 50
+    # dimension of node attributes
+    node_dim : 100
+    # dimension of edge attributes
+    edge_dim : 50
     self_loop: True
     # Method of obtaining atom dictionary: available: (onehot)
     node_representation: onehot    
     all_neighbors: True
+
+  # Number of workers for dataloader, see https://pytorch.org/docs/stable/data.html
+  num_workers: 0
+  # Where the dataset is loaded; either "cpu" or "cuda"
+  dataset_device: cpu
   # Ratios for train/val/test split out of a total of less than 1
-  train_ratio: 0.8
+  train_ratio: 0.9
   val_ratio: 0.05
-  test_ratio: 0.015
-
-
+  test_ratio: 0.05