teto
diff --git a/‎mptcpanalyzer/__init__.py
+6-10 b/‎mptcpanalyzer/__init__.py
+6-10
diff --git a/‎mptcpanalyzer/cli.py
+39-24 b/‎mptcpanalyzer/cli.py
+39-24
diff --git a/‎mptcpanalyzer/data.py
+65-21 b/‎mptcpanalyzer/data.py
+65-21
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
-
 import logging
-from enum import Enum, IntEnum
+from enum import Enum, IntEnum, Flag, auto
 from .config import MpTcpAnalyzerConfig
 from .cache import Cache
 import collections
@@ -55,7 +54,7 @@ def get_config() -> MpTcpAnalyzerConfig:
 # METADATA_ROWS = 2
 
 
-class TcpFlags(Enum):
+class TcpFlags(Flag):
     NONE = 0
     FIN = 1
     SYN = 2
@@ -68,23 +67,20 @@ class TcpFlags(Enum):
 
 
 # hopefully mypy will work with IntEnum's too
-class ConnectionRoles(Enum):
+class ConnectionRoles(IntEnum):
     """
     Used to filter datasets and keep packets flowing in only one direction !
     Parser should accept --destination Client --destination Server if you want both.
 
     TODO: convert back to enum, that was done for bad reasons
     """
-    # Client = "client"
-    # Server = "server"
-    Client = 0
-    Server = 1
+    Client = auto()
+    Server = auto()
 
     def __str__(self):
         # Note that defining __str__ is required to get ArgumentParser's help output to include the human readable (values) of Color
         return self.name
-    # def __getitem__(cls, name):
-    #     return cls._member_map_[name]
+
     @staticmethod
     def from_string(s):
         try:
 
@@ -41,6 +41,9 @@
 from typing import List, Any, Tuple, Dict, Callable, Set
 import cmd2
 import math
+from cmd2 import with_argparser, with_argparser_and_unknown_args, with_category
+from enum import Enum, auto
+
 
 from stevedore import extension
 
@@ -56,16 +59,24 @@
 # log.setLevel(logging.DEBUG)
 # handler = logging.FileHandler("mptcpanalyzer.log", delay=False)
 
-# def format_tcp_mapping(main: TcpConnection, mapped: TcpMapping):
-#     )
 
 histfile_size = 1000
 
-CAT_REINJECTIONS = "Reinjections"
 
 # workaround to get
 DestinationChoice = mp.CustomConnectionRolesChoices([e.name for e in mp.ConnectionRoles])
 
+
+# class Categories(Enum):
+    # CAT_TCP = auto()
+    # CAT_MPTCP = auto()
+    # CAT_GENERAL = auto()
+
+CAT_TCP = "TCP related"
+CAT_MPTCP = "MPTCP related"
+CAT_GENERAL = "Tool"
+# CAT_REINJECTIONS = "Reinjections"
+
 def is_loaded(f):
     """
     Decorator checking that dataset has correct columns
@@ -503,28 +514,28 @@ def do_summary(self, line):
             self.poutput('tcpstream %d transferred %d out of %d, accounting for %f%%' % (
                 tcpstream, sf_bytes, mptcp_transferred, subflow_load*100))
 
+
+    # TODO check for reinjections etc...
+    parser = argparse.ArgumentParser(
+        description="Export connection(s) to CSV"
+    )
+    parser.add_argument("output", action="store", help="Output filename")
+    # parser.add_argument("--stream", action="store", )
+    # )
+
+    group = parser.add_mutually_exclusive_group(required=False)
+    group.add_argument('--tcpstream', action= 'store', type=int)
+    group.add_argument('--mptcpstream', action= 'store', type=int)
+    # parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], help="tcp.stream id visible in wireshark")
+    parser.add_argument("--destination", action="store", choices=DestinationChoice, help="tcp.stream id visible in wireshark")
+    parser.add_argument("--drop-syn", action="store_true", default=False,
+            help="Helper just for my very own specific usecase")
     @is_loaded
-    def do_tocsv(self, line):
+    @with_argparser(parser)
+    def do_tocsv(self, args):
         """
         Selects tcp/mptcp/udp connection and exports it to csv
         """
-        # TODO check for reinjections etc...
-        parser = argparse.ArgumentParser(
-            description="Export connection(s) to CSV"
-        )
-        parser.add_argument("output", action="store", help="Output filename")
-        # parser.add_argument("--stream", action="store", )
-        # )
-
-        group = parser.add_mutually_exclusive_group(required=False)
-        group.add_argument('--tcpstream', action= 'store', type=int)
-        group.add_argument('--mptcpstream', action= 'store', type=int)
-        # parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], help="tcp.stream id visible in wireshark")
-        parser.add_argument("--destination", action="store", choices=DestinationChoice, help="tcp.stream id visible in wireshark")
-        parser.add_argument("--drop-syn", action="store_true", default=False,
-                help="Helper just for my very own specific usecase")
-
-        args = parser.parse_args(shlex.split(line))
 
         df = self.data
         if args.tcpstream:
@@ -588,8 +599,6 @@ def do_summary_extended(self, line):
             self.tshark_config
         )
 
-
-
         success, ret = stats.mptcp_compute_throughput_extended(
                 # self.data, args.mptcpstream, args.destination
                 df,
@@ -615,6 +624,7 @@ def do_list_connections(self, *args):
         List mptcp connections via their ids (mptcp.stream)
         """
         streams = self.data.groupby("mptcpstream")
+        # TODO use ppaged instead ?
         self.poutput('%d mptcp connection(s)' % len(streams))
         for mptcpstream, group in streams:
             self.list_subflows(mptcpstream)
@@ -778,6 +788,7 @@ def _print_reinjection_comparison(original_packet, reinj):
 
 
 
+    @with_category(CAT_TCP)
     @custom_tshark
     @is_loaded
     def do_list_reinjections(self, line):
@@ -922,13 +933,17 @@ def register_plots(ext, subparsers):
         # Allocate plot object
         plotter = self.plot_mgr[args.plot_type].obj
 
-        dargs = vars(args)  # 'converts' the namespace to a dict
+        dargs = vars(args)  # 'converts' the namespace to for the syntax defin a dict
+
+        # print(dargs)
 
+        dargs.update(destinations= dargs.get("destinations") or mp.ConnectionRoles)
         dataframes = plotter.preprocess(**dargs)
         assert dataframes is not None, "Preprocess must return a list"
         result = plotter.run(dataframes, **dargs)
         plotter.postprocess(result, **dargs)
 
+    @with_category(CAT_GENERAL)
     def do_clean_cache(self, line):
         """
         mptcpanalyzer saves pcap to csv converted files in a cache folder, (most likely
 
@@ -2,7 +2,7 @@
 import os
 import pandas as pd
 import numpy as np
-from mptcpanalyzer.tshark import TsharkConfig
+from mptcpanalyzer.tshark import TsharkConfig, Field
 from mptcpanalyzer.connection import MpTcpSubflow, MpTcpConnection, TcpConnection, MpTcpMapping, TcpMapping
 import mptcpanalyzer as mp
 from mptcpanalyzer import RECEIVER_SUFFIX, SENDER_SUFFIX, _receiver, _sender, suffix_fields
@@ -12,7 +12,7 @@
 import tempfile
 import pprint
 import functools
-from enum import Enum
+from enum import Enum, auto
 
 log = logging.getLogger(__name__)
 slog = logging.getLogger(__name__)
@@ -35,6 +35,12 @@
 MPTCP_DEBUG_FIELDS=TCP_DEBUG_FIELDS + [ 'mptcpdest']
 
 
+def _convert_role(x):
+    """
+    Workaround https://github.com/pandas-dev/pandas/pull/20826
+    """
+    return ConnectionRoles[x] if x else np.nan
+
 
 def ignore(f1, f2):
     return 0
@@ -133,6 +139,16 @@ def _convert_list2str(serie):
     "tcplen"
 ]
 
+
+"""
+On top of Tshark fields, we also describe fields generated by mptcpanalyzer
+"""
+artificial_fields = [
+    # TODO use dtype_role as type
+    Field("mptcpdest", "mptcpdest", dtype_role, "MPTCP destination"),
+    Field("tcpdest", "tcpdest", dtype_role, "TCP destination")
+]
+
 class PacketMappingMode(Enum):
     """
     How to map packets from one stream to another
@@ -142,9 +158,8 @@ class PacketMappingMode(Enum):
 
     The hash based is more straightforward
     """
-    HASH = 1
-    SCORE = 2
-
+    HASH = auto()
+    SCORE = auto()
 
 
 def load_merged_streams_into_pandas(
@@ -276,11 +291,6 @@ def _load_list(x, field="set field to debug"):
                 res = ast.literal_eval(x) if (x is not None and x != '') else np.nan
                 return res
 
-            def _convert_role(x):
-                """
-                Workaround https://github.com/pandas-dev/pandas/pull/20826
-                """
-                return ConnectionRoles[x] if x else np.nan
 
             with open(cachename) as fd:
                 import ast
@@ -370,10 +380,20 @@ def load_into_pandas(
     filename = getrealpath(input_file)
     cache = mp.get_cache()
 
+    fields = config.get_fields("fullname", "type")
+    tshark_dtypes = {k: v for k, v in fields.items() if v is not None or k not in ["tcpflags"]}
+
+    artifical_dtypes = { field.fullname: field.type for field in artificial_fields }
+    print("artifical_dtypes", artifical_dtypes)
+    dtypes = dict(tshark_dtypes, **artifical_dtypes)
+
+
+    # TODO add artificial_fields hash
+    pseudohash = hash(config) + hash(frozenset(dtypes.items()))
     uid = cache.cacheuid(
         '',  # prefix (might want to shorten it a bit)
         [ filename ], # dependencies
-        str(config.hash())  + '.csv'
+        str(pseudohash)  + '.csv'
     )
 
     is_cache_valid, csv_filename = cache.get(uid)
@@ -395,8 +415,9 @@ def load_into_pandas(
             else:
                 raise Exception(stderr)
 
-    temp = config.get_fields("fullname", "type")
-    dtypes = {k: v for k, v in temp.items() if v is not None or k not in ["tcpflags"]}
+    print("ARTIFICAL_DTYPES:", artifical_dtypes)
+ 
+
     log.debug("Loading a csv file %s" % csv_filename)
 
     try:
@@ -409,12 +430,15 @@ def load_into_pandas(
                 # having both a converter and a dtype for a field generates warnings
                 # so we pop tcp.flags
                 # dtype=dtypes.pop("tcp.flags"),
-                dtype=dtypes, # poping still generates
+                dtype=dtypes,
                 converters={
                     "tcp.flags": _convert_flags,
                     # reinjections, converts to list of integers
                     "mptcp.reinjection_of": functools.partial(_convert_to_list, field="reinjectionOf"),
                     "mptcp.reinjected_in": functools.partial(_convert_to_list, field="reinjectedIn"),
+
+                    "mptcpdest": _convert_role,
+                    "tcpdest": _convert_role,
                 },
                 # nrows=10, # useful for debugging purpose
             )
@@ -444,6 +468,32 @@ def load_into_pandas(
         raise e
 
     log.info("Finished loading dataframe for %s. Size=%d" % (input_file, len(data)))
+    
+    names = set([ field.name for field in artificial_fields ])
+    print("NAMES", names)
+    column_names = set(data.columns)
+    print("column_names", column_names)
+
+
+    # TODO here I should assign the type
+    new = pd.DataFrame(dtype= {
+        "tcpdest": dtype_role
+        })
+    data = pd.concat([ data, new ],
+            # ignore_index=False,
+           # copy=False,
+           )
+
+    # for missing_field in names - column_names:
+    #     print("missing field", missing_field)
+    #     data[missing_field] = np.nan
+
+    # data.astype({ })
+    # data.assign( { missing_field: np.nan for missing_field in (names - column_names) } )
+
+    print("FINAL_DTYPES")
+    print(data.dtypes)
+    print(data.tcpdest.head(10))
     return data
 
 
@@ -519,6 +569,7 @@ def tcpdest_from_connections(df, con: TcpConnection):
         log.debug("Looking at destination %s" % dest)
         q = con.generate_direction_query(dest)
         df_dest = df.query(q)
+        print("tcpdest %r" % dest)
         df.loc[df_dest.index, 'tcpdest'] = dest
 
     # print("df", 
@@ -604,13 +655,6 @@ def merge_tcp_dataframes_known_streams(
 
     # TODO move elsewhere, to outer function
     # total = total.reindex(columns=firstcols + list(filter(lambda x: x not in firstcols, total.columns.tolist())))
-    # total.to_csv(
-    #     cachename, # output
-    #     # columns=self.columns,
-    #     index=False,
-    #     header=True,
-    #     # sep=main.config["DEFAULT"]["delimiter"],
-    # )
     log.info("Resulting merged tcp dataframe of size {} (to compare with {} and {})".format(
         len(total), len(h1_df), len(h2_df)
     ))