claws-lab
diff --git a/‎config/Ant.json
Lines changed: 10 additions & 0 deletions b/‎config/Ant.json
Lines changed: 10 additions & 0 deletions
diff --git a/‎config/Chickenpox.json
Lines changed: 14 additions & 1 deletion b/‎config/Chickenpox.json
Lines changed: 14 additions & 1 deletion
diff --git a/‎dygetviz/arguments.py
Lines changed: 53 additions & 38 deletions b/‎dygetviz/arguments.py
Lines changed: 53 additions & 38 deletions
diff --git a/‎dygetviz/const.py
Lines changed: 5 additions & 0 deletions b/‎dygetviz/const.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎dygetviz/data/chickenpox.py
Lines changed: 106 additions & 0 deletions b/‎dygetviz/data/chickenpox.py
Lines changed: 106 additions & 0 deletions
diff --git a/‎dygetviz/data/dataloader.py
Lines changed: 28 additions & 1 deletion b/‎dygetviz/data/dataloader.py
Lines changed: 28 additions & 1 deletion
@@ -0,0 +1,10 @@
+{
+  "embedding_dim": 128,
+  "idx_reference_snapshot": 7,
+  "interpolation": 0.2,
+  "model_name": "GConvGRU",
+  "num_nearest_neighbors": [20],
+  "perplexity": 2,
+  "projected_nodes": "projected_nodes.json",
+  "reference_nodes": "reference_nodes.json"
+}
@@ -10,5 +10,18 @@
   "perplexity": 2,
   "reference_nodes": ["BACS", "BARANYA", "BEKES", "BORSOD", "BUDAPEST", "CSONGRAD",
        "FEJER", "GYOR", "HAJDU", "HEVES", "JASZ", "KOMAROM", "NOGRAD",
-       "PEST", "SOMOGY", "SZABOLCS", "TOLNA", "VAS", "VESZPREM", "ZALA"]
+       "PEST", "SOMOGY", "SZABOLCS", "TOLNA", "VAS", "VESZPREM", "ZALA"],
+
+
+  "do_node_classification": false,
+  "do_node_regression": true,
+  "do_edge_classification": false,
+  "do_edge_regression": false,
+  "num_classes_nodes": 0,
+  "num_classes_edges": 0,
+  "do_link_prediction": true,
+
+  "tasks": ["node_regression","link_pred"]
+
+
 }
@@ -1,32 +1,30 @@
 import argparse
 import os
 import os.path as osp
-import uuid
-from pprint import pprint
 
 import const
 from const import *
 
-if platform.system() == "Windows":
-    DEVICE = "cuda:0"
+if platform.system() in ["Windows", "Linux"]:
+    import torch
 
-elif platform.system() == "Linux":
-    DEVICE = "cuda:0"
+    if torch.cuda.is_available():
+        DEFAULT_DEVICE = "cuda:0"
+    else:
+        DEFAULT_DEVICE = "cpu"
 
 
 elif platform.system() == "Darwin":
-    DEVICE = "mps:0"
+    DEFAULT_DEVICE = "mps:0"
 
 
 else:
     raise NotImplementedError("Unknown System")
 
-print(f"Your system: {platform.system()}. Default device: {DEVICE}")
+print(f"Your system: {platform.system()}. Default device: {DEFAULT_DEVICE}")
 
-
-
-
-parser = argparse.ArgumentParser(description="Dynamic Graph Embedding Trajectory.")
+parser = argparse.ArgumentParser(
+    description="Dynamic Graph Embedding Trajectory.")
 # Parameters for Analysis
 parser.add_argument('--do_visual', action='store_true',
                     help="Whether to do visualization")
@@ -43,22 +41,25 @@
                     help="Comment for each run. Useful for identifying each run on Tensorboard")
 parser.add_argument('--data_dir', type=str, default="data",
                     help="Location to store all the data.")
-parser.add_argument('--dataset_name', type=str, default='Chickenpox', help="Name of dataset.")
-parser.add_argument('--device', type=str, default=DEVICE, help="Device to use. When using multi-gpu, this is the 'master' device where all operations are performed.")
-parser.add_argument('--device2', type=str, default='cpu',
-                    help="For Multi-GPU training")
+parser.add_argument('--dataset_name', type=str, default='Chickenpox',
+                    help="Name of dataset.")
+parser.add_argument('--device', type=str, default=DEFAULT_DEVICE,
+                    help="Device to use. When using multi-gpu, this is the 'master' device where all operations are performed.")
 
 parser.add_argument('--do_test', action='store_true')
 parser.add_argument('--do_val', action='store_true')
-parser.add_argument('--do_weighted', action='store_true', help="Construct weighted graph instead of multigraph for each graph snapshot")
+parser.add_argument('--do_weighted', action='store_true',
+                    help="Construct weighted graph instead of multigraph for each graph snapshot")
 
 parser.add_argument('--dropout', type=float, default=0.1,
                     help="Dropout rate (1 - keep probability).")
 
-
-parser.add_argument('--embedding_dim', type=int, default=64, help="the embedding size of model")
-parser.add_argument('--embedding_dim_user', type=int, default=32, help="The embedding size for the users")
-parser.add_argument('--embedding_dim_resource', type=int, default=32, help="The embedding size for the resource (e.g. video)")
+parser.add_argument('--embedding_dim', type=int, default=64,
+                    help="the embedding size of model")
+parser.add_argument('--embedding_dim_user', type=int, default=32,
+                    help="The embedding size for the users")
+parser.add_argument('--embedding_dim_resource', type=int, default=32,
+                    help="The embedding size for the resource (e.g. video)")
 
 parser.add_argument('--epochs', type=int, default=50,
                     help="Number of epochs to train.")
@@ -85,24 +86,30 @@
 
 parser.add_argument('--i_end', type=int, default=None,
                     help="Index of the end dataset.")
-
+parser.add_argument('--in_channels', type=int, default=None,
+                    help="Index of the end dataset.")
 
 parser.add_argument('--lr', type=float, default=1e-3, help="Learning rate")
 parser.add_argument('--max_seq_length', type=int, default=128,
                     help="Maximum sequence length")
 
-parser.add_argument('--model', type=str, default=None, help="Model Name")
+parser.add_argument('--model', type=str, default=None, help="Model name")
 
-parser.add_argument('--node_types', type=str, choices=["v_subreddit", "author_subreddit", "author_resource"], default="v_subreddit",
+parser.add_argument('--node_types', type=str,
+                    choices=["v_subreddit", "author_subreddit",
+                             "author_resource"], default="v_subreddit",
                     help="What types of node to include in the GCN bipartite graph?")
 
 parser.add_argument('--num_negative_candidates', type=int, default=1000,
                     help="How many negative examples to sample for each video during the initial sampling?")
-parser.add_argument('--num_neighbors', type=int, default=10, help="Number of neighboring nodes in GNN")
+parser.add_argument('--num_neighbors', type=int, default=10,
+                    help="Number of neighboring nodes in GNN")
 parser.add_argument('--num_resource_prototypes', type=int, default=-1, help="")
 
-parser.add_argument('--num_workers', type=int, default=1, help="Number of workers for multiprocessing")
-parser.add_argument('--perplexity', type=int, default=20, help="Perplexity of the generated t-SNE plot")
+parser.add_argument('--num_workers', type=int, default=1,
+                    help="Number of workers for multiprocessing")
+parser.add_argument('--perplexity', type=int, default=20,
+                    help="Perplexity of the generated t-SNE plot")
 parser.add_argument('--pretrained_embeddings_epoch', type=int, default=195,
                     help="Which epoch of the pretrained embeddings (Node2Vec, GCN ...) to use")
 parser.add_argument('--output_dir', type=str, default="outputs")
@@ -122,19 +129,11 @@
 parser.add_argument('--num_sample_author', type=int, default=-1,
                     help="Number of resource to sample in our dataset. Set to -1 if we do not want to sample")
 
-
-
-
 parser.add_argument('--port', type=int, default=8050)
 
-
 parser.add_argument('--save_embed_every', type=int, default=10,
                     help="How many epochs to save embeddings for visualization?")
 
-parser.add_argument('--save_resource_embed', action='store_true',
-                    help="Whether to save the embeddings for resources (videos, URLs, Misinformative URLs)?")
-
-
 parser.add_argument('--save_model_every', type=int, default=-1,
                     help="How many epochs to save the model weights?")
 parser.add_argument('--seed', type=int, default=42, help="Random seed.")
@@ -154,17 +153,33 @@
 parser.add_argument('--snapshot_interval', type=int, default=1,
                     help="Time interval (in days) between each snapshot. Default: 1 month. Interactions happening within this time interval will be grouped into one snapshot.")
 
+parser.add_argument('--transform_input', action='store_true',
+                    help="Whether to transform the input to a new embedding space. This field is automatically set to True if in_channels does not equal to embedding_dim")
+
 parser.add_argument('--suffix', type=str, default="",
                     help="Suffix to append to the end of the log file name")
 
-parser.add_argument('--visualization_dim', type=int, choices=[2, 3], default=2, help="Dimension of the generated visualization. Can be 2- or 3-dimensional.")
+parser.add_argument('--tasks', type=str,
+                    default="['node_classification','link_pred']",
+                    help="Tasks to run, passed as a list of strings")
+
+parser.add_argument('--visualization_dim', type=int, choices=[2, 3], default=2,
+                    help="Dimension of the generated visualization. Can be 2- or 3-dimensional.")
 
-parser.add_argument('--visualization_model', type=str, choices=[const.TSNE, const.UMAP, const.PCA, const.ISOMAP, const.MDS], default=const.TSNE,
+parser.add_argument('--visualization_model', type=str,
+                    choices=[const.TSNE, const.UMAP, const.PCA, const.ISOMAP,
+                             const.MDS], default=const.TSNE,
                     help="Visualization model to use")
 
 args = parser.parse_args()
 
+if args.in_channels is None:
+    args.in_channels = args.embedding_dim
 args.num_nearest_neighbors = eval(args.num_nearest_neighbors)
 
 args.visual_dir = osp.join(args.output_dir, "visual", args.dataset_name)
-os.makedirs(args.visual_dir, exist_ok=True)
+os.makedirs(args.visual_dir, exist_ok=True)
+
+args.transform_input = args.in_channels != args.embedding_dim
+args.tasks = eval(args.tasks)
+print(args.tasks)
@@ -1,6 +1,11 @@
 import platform
 
 
+DATASET2FILEID = {
+    "Chickenpox": "1oAO5S1ikjxbbgPzBhZJf7Xf9bodbwwCE",
+
+}
+
 # Features in Subreddit
 AUTHOR = 'author'
 AUTHOR_FULLNAME = 'author_fullname'
 
@@ -0,0 +1,106 @@
+import os.path as osp
+import pickle
+import zipfile
+
+import torch
+import pandas as pd
+import numpy as np
+from typing import List, Union
+from torch_geometric.data import Data
+from torch_geometric_temporal import DynamicGraphStaticSignal
+
+from dygetviz.data.dygetviz_dataset import DyGETVizDataset
+from dygetviz.data.static_graph_static_signal import StaticGraphStaticSignal
+
+Edge_Index = Union[np.ndarray, None]
+Edge_Weight = Union[np.ndarray, None]
+Node_Features = List[Union[np.ndarray, None]]
+Targets = List[Union[np.ndarray, None]]
+Additional_Features = List[np.ndarray]
+
+class ChickenpoxDataset(StaticGraphStaticSignal, DyGETVizDataset):
+    def __init__(self, args, **kwargs: Additional_Features):
+        self.args = args
+
+        self.dataset_name = "Chickenpox"
+
+        DyGETVizDataset.__init__(self, self.dataset_name, **kwargs)
+
+
+        if osp.exists(self.dataset_path):
+
+            with open(self.dataset_path, "rb") as f:
+                d = pickle.load(f)
+
+        else:
+            self.download()
+
+            d = self.process()
+
+
+
+        node2idx = d["node2idx"]
+        targets = d["targets"]
+        node_presence = d["node_presence"]
+        edge_index = d["edge_index"]
+        edge_weight = d["edge_weight"]
+
+        self.num_nodes = len(node2idx)
+
+        limit = np.sqrt(6 / (self.num_nodes + args.embedding_dim))
+        features = np.random.uniform(-limit, limit, size=(
+            self.num_nodes, args.embedding_dim))
+
+        StaticGraphStaticSignal.__init__(self,
+            edge_index=edge_index,
+            edge_weight=edge_weight,
+            features=features,
+            targets=targets,
+            node_masks=node_presence,
+            **kwargs
+        )
+
+
+    def process(self):
+        mapping = pd.read_excel(osp.join(self.cache_dir, "raw_data", "idx2county.xlsx"))
+
+        self.nodes = mapping["county"].values
+
+        node2idx = {row["id"]: row["county"] for idx, row in
+                    mapping.iterrows()}
+
+        idx2node = {v: k for k, v in node2idx.items()}
+
+        edges = pd.read_csv(osp.join(self.cache_dir, "raw_data", "hungary_edges.csv"))
+
+        edge_index = [edges[["id_1", "id_2"]].values.T for i in range(522)]
+
+        edge_weight = [np.ones(edges.shape[0]) for i in range(522)]
+
+        # We use the actual #weekly cases as the ground-truth
+        weekly_cases = pd.read_csv(
+            osp.join(self.cache_dir, "raw_data", "hungary_weekly_chickenpox_cases.csv"))
+
+
+        # We predict the log2 of the weekly cases
+        targets = weekly_cases.loc[:, self.nodes].values
+        # There are 522 weeks in total
+        targets = [np.log2(targets[i] + 1) for i in range(522)]
+
+        node_presence = np.ones((522, len(self.nodes)))
+
+
+
+        d = {
+            "targets": targets,
+            "node_presence": node_presence,
+            "node2idx": node2idx,
+            "idx2node": idx2node,
+            "edge_index": edge_index,
+            "edge_weight": edge_weight
+        }
+
+        with open(self.dataset_path, "wb") as f:
+            pickle.dump(d, f)
+
+        return d
@@ -6,9 +6,10 @@
 import pandas as pd
 from numba import NumbaDeprecationWarning
 
-import const
 from arguments import args
 
+from dygetviz.data.chickenpox import ChickenpoxDataset
+
 warnings.simplefilter(action='ignore', category=NumbaDeprecationWarning)
 
 
@@ -21,6 +22,8 @@ def load_data(dataset_name=args.dataset_name) -> dict:
         node_presence: np.ndarray of shape (num_nodes, num_timesteps): 1 if node is present at timestep, 0 otherwise
     """
 
+
+
     config = json.load(
         open(osp.join("config", f"{dataset_name}.json"), 'r',
              encoding='utf-8'))
@@ -191,3 +194,27 @@ def load_data(dataset_name=args.dataset_name) -> dict:
         "z": z,
 
     }
+
+
+
+def load_data_dtdg(dataset_name: str):
+    """
+    Load data for embedding training on Discrete-Time Dynamic-Graph (DTDG) models.
+    """
+    from torch_geometric_temporal.signal import temporal_signal_split
+
+    if dataset_name == "UNComtrade":
+
+        path = osp.join(args.cache_dir, f"full_dataset_{args.dataset_name}.pt")
+        full_dataset = UNComtradeDataset(args)
+
+    elif dataset_name == "Chickenpox":
+        full_dataset = ChickenpoxDataset(args)
+
+    else:
+        raise NotImplementedError
+
+    train_dataset, test_dataset = temporal_signal_split(full_dataset,
+                                                        train_ratio=1.)
+
+    return train_dataset, test_dataset, full_dataset