Skip to content

Commit d91091e

Browse files
committed
1 parent daab642 commit d91091e

File tree

7 files changed

+199
-99
lines changed

7 files changed

+199
-99
lines changed

mptcpanalyzer/__init__.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# -*- coding: utf-8 -*-
2-
32
import logging
4-
from enum import Enum, IntEnum
3+
from enum import Enum, IntEnum, Flag, auto
54
from .config import MpTcpAnalyzerConfig
65
from .cache import Cache
76
import collections
@@ -55,7 +54,7 @@ def get_config() -> MpTcpAnalyzerConfig:
5554
# METADATA_ROWS = 2
5655

5756

58-
class TcpFlags(Enum):
57+
class TcpFlags(Flag):
5958
NONE = 0
6059
FIN = 1
6160
SYN = 2
@@ -68,23 +67,20 @@ class TcpFlags(Enum):
6867

6968

7069
# hopefully mypy will work with IntEnum's too
71-
class ConnectionRoles(Enum):
70+
class ConnectionRoles(IntEnum):
7271
"""
7372
Used to filter datasets and keep packets flowing in only one direction !
7473
Parser should accept --destination Client --destination Server if you want both.
7574
7675
TODO: convert back to enum, that was done for bad reasons
7776
"""
78-
# Client = "client"
79-
# Server = "server"
80-
Client = 0
81-
Server = 1
77+
Client = auto()
78+
Server = auto()
8279

8380
def __str__(self):
8481
# Note that defining __str__ is required to get ArgumentParser's help output to include the human readable (values) of Color
8582
return self.name
86-
# def __getitem__(cls, name):
87-
# return cls._member_map_[name]
83+
8884
@staticmethod
8985
def from_string(s):
9086
try:

mptcpanalyzer/cli.py

+39-24
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
from typing import List, Any, Tuple, Dict, Callable, Set
4242
import cmd2
4343
import math
44+
from cmd2 import with_argparser, with_argparser_and_unknown_args, with_category
45+
from enum import Enum, auto
46+
4447

4548
from stevedore import extension
4649

@@ -56,16 +59,24 @@
5659
# log.setLevel(logging.DEBUG)
5760
# handler = logging.FileHandler("mptcpanalyzer.log", delay=False)
5861

59-
# def format_tcp_mapping(main: TcpConnection, mapped: TcpMapping):
60-
# )
6162

6263
histfile_size = 1000
6364

64-
CAT_REINJECTIONS = "Reinjections"
6565

6666
# workaround to get
6767
DestinationChoice = mp.CustomConnectionRolesChoices([e.name for e in mp.ConnectionRoles])
6868

69+
70+
# class Categories(Enum):
71+
# CAT_TCP = auto()
72+
# CAT_MPTCP = auto()
73+
# CAT_GENERAL = auto()
74+
75+
CAT_TCP = "TCP related"
76+
CAT_MPTCP = "MPTCP related"
77+
CAT_GENERAL = "Tool"
78+
# CAT_REINJECTIONS = "Reinjections"
79+
6980
def is_loaded(f):
7081
"""
7182
Decorator checking that dataset has correct columns
@@ -503,28 +514,28 @@ def do_summary(self, line):
503514
self.poutput('tcpstream %d transferred %d out of %d, accounting for %f%%' % (
504515
tcpstream, sf_bytes, mptcp_transferred, subflow_load*100))
505516

517+
518+
# TODO check for reinjections etc...
519+
parser = argparse.ArgumentParser(
520+
description="Export connection(s) to CSV"
521+
)
522+
parser.add_argument("output", action="store", help="Output filename")
523+
# parser.add_argument("--stream", action="store", )
524+
# )
525+
526+
group = parser.add_mutually_exclusive_group(required=False)
527+
group.add_argument('--tcpstream', action= 'store', type=int)
528+
group.add_argument('--mptcpstream', action= 'store', type=int)
529+
# parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], help="tcp.stream id visible in wireshark")
530+
parser.add_argument("--destination", action="store", choices=DestinationChoice, help="tcp.stream id visible in wireshark")
531+
parser.add_argument("--drop-syn", action="store_true", default=False,
532+
help="Helper just for my very own specific usecase")
506533
@is_loaded
507-
def do_tocsv(self, line):
534+
@with_argparser(parser)
535+
def do_tocsv(self, args):
508536
"""
509537
Selects tcp/mptcp/udp connection and exports it to csv
510538
"""
511-
# TODO check for reinjections etc...
512-
parser = argparse.ArgumentParser(
513-
description="Export connection(s) to CSV"
514-
)
515-
parser.add_argument("output", action="store", help="Output filename")
516-
# parser.add_argument("--stream", action="store", )
517-
# )
518-
519-
group = parser.add_mutually_exclusive_group(required=False)
520-
group.add_argument('--tcpstream', action= 'store', type=int)
521-
group.add_argument('--mptcpstream', action= 'store', type=int)
522-
# parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], help="tcp.stream id visible in wireshark")
523-
parser.add_argument("--destination", action="store", choices=DestinationChoice, help="tcp.stream id visible in wireshark")
524-
parser.add_argument("--drop-syn", action="store_true", default=False,
525-
help="Helper just for my very own specific usecase")
526-
527-
args = parser.parse_args(shlex.split(line))
528539

529540
df = self.data
530541
if args.tcpstream:
@@ -588,8 +599,6 @@ def do_summary_extended(self, line):
588599
self.tshark_config
589600
)
590601

591-
592-
593602
success, ret = stats.mptcp_compute_throughput_extended(
594603
# self.data, args.mptcpstream, args.destination
595604
df,
@@ -615,6 +624,7 @@ def do_list_connections(self, *args):
615624
List mptcp connections via their ids (mptcp.stream)
616625
"""
617626
streams = self.data.groupby("mptcpstream")
627+
# TODO use ppaged instead ?
618628
self.poutput('%d mptcp connection(s)' % len(streams))
619629
for mptcpstream, group in streams:
620630
self.list_subflows(mptcpstream)
@@ -778,6 +788,7 @@ def _print_reinjection_comparison(original_packet, reinj):
778788

779789

780790

791+
@with_category(CAT_TCP)
781792
@custom_tshark
782793
@is_loaded
783794
def do_list_reinjections(self, line):
@@ -922,13 +933,17 @@ def register_plots(ext, subparsers):
922933
# Allocate plot object
923934
plotter = self.plot_mgr[args.plot_type].obj
924935

925-
dargs = vars(args) # 'converts' the namespace to a dict
936+
dargs = vars(args) # 'converts' the namespace to for the syntax defin a dict
937+
938+
# print(dargs)
926939

940+
dargs.update(destinations= dargs.get("destinations") or mp.ConnectionRoles)
927941
dataframes = plotter.preprocess(**dargs)
928942
assert dataframes is not None, "Preprocess must return a list"
929943
result = plotter.run(dataframes, **dargs)
930944
plotter.postprocess(result, **dargs)
931945

946+
@with_category(CAT_GENERAL)
932947
def do_clean_cache(self, line):
933948
"""
934949
mptcpanalyzer saves pcap to csv converted files in a cache folder, (most likely

mptcpanalyzer/data.py

+65-21
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
import pandas as pd
44
import numpy as np
5-
from mptcpanalyzer.tshark import TsharkConfig
5+
from mptcpanalyzer.tshark import TsharkConfig, Field
66
from mptcpanalyzer.connection import MpTcpSubflow, MpTcpConnection, TcpConnection, MpTcpMapping, TcpMapping
77
import mptcpanalyzer as mp
88
from mptcpanalyzer import RECEIVER_SUFFIX, SENDER_SUFFIX, _receiver, _sender, suffix_fields
@@ -12,7 +12,7 @@
1212
import tempfile
1313
import pprint
1414
import functools
15-
from enum import Enum
15+
from enum import Enum, auto
1616

1717
log = logging.getLogger(__name__)
1818
slog = logging.getLogger(__name__)
@@ -35,6 +35,12 @@
3535
MPTCP_DEBUG_FIELDS=TCP_DEBUG_FIELDS + [ 'mptcpdest']
3636

3737

38+
def _convert_role(x):
39+
"""
40+
Workaround https://github.com/pandas-dev/pandas/pull/20826
41+
"""
42+
return ConnectionRoles[x] if x else np.nan
43+
3844

3945
def ignore(f1, f2):
4046
return 0
@@ -133,6 +139,16 @@ def _convert_list2str(serie):
133139
"tcplen"
134140
]
135141

142+
143+
"""
144+
On top of Tshark fields, we also describe fields generated by mptcpanalyzer
145+
"""
146+
artificial_fields = [
147+
# TODO use dtype_role as type
148+
Field("mptcpdest", "mptcpdest", dtype_role, "MPTCP destination"),
149+
Field("tcpdest", "tcpdest", dtype_role, "TCP destination")
150+
]
151+
136152
class PacketMappingMode(Enum):
137153
"""
138154
How to map packets from one stream to another
@@ -142,9 +158,8 @@ class PacketMappingMode(Enum):
142158
143159
The hash based is more straightforward
144160
"""
145-
HASH = 1
146-
SCORE = 2
147-
161+
HASH = auto()
162+
SCORE = auto()
148163

149164

150165
def load_merged_streams_into_pandas(
@@ -276,11 +291,6 @@ def _load_list(x, field="set field to debug"):
276291
res = ast.literal_eval(x) if (x is not None and x != '') else np.nan
277292
return res
278293

279-
def _convert_role(x):
280-
"""
281-
Workaround https://github.com/pandas-dev/pandas/pull/20826
282-
"""
283-
return ConnectionRoles[x] if x else np.nan
284294

285295
with open(cachename) as fd:
286296
import ast
@@ -370,10 +380,20 @@ def load_into_pandas(
370380
filename = getrealpath(input_file)
371381
cache = mp.get_cache()
372382

383+
fields = config.get_fields("fullname", "type")
384+
tshark_dtypes = {k: v for k, v in fields.items() if v is not None or k not in ["tcpflags"]}
385+
386+
artifical_dtypes = { field.fullname: field.type for field in artificial_fields }
387+
print("artifical_dtypes", artifical_dtypes)
388+
dtypes = dict(tshark_dtypes, **artifical_dtypes)
389+
390+
391+
# TODO add artificial_fields hash
392+
pseudohash = hash(config) + hash(frozenset(dtypes.items()))
373393
uid = cache.cacheuid(
374394
'', # prefix (might want to shorten it a bit)
375395
[ filename ], # dependencies
376-
str(config.hash()) + '.csv'
396+
str(pseudohash) + '.csv'
377397
)
378398

379399
is_cache_valid, csv_filename = cache.get(uid)
@@ -395,8 +415,9 @@ def load_into_pandas(
395415
else:
396416
raise Exception(stderr)
397417

398-
temp = config.get_fields("fullname", "type")
399-
dtypes = {k: v for k, v in temp.items() if v is not None or k not in ["tcpflags"]}
418+
print("ARTIFICAL_DTYPES:", artifical_dtypes)
419+
420+
400421
log.debug("Loading a csv file %s" % csv_filename)
401422

402423
try:
@@ -409,12 +430,15 @@ def load_into_pandas(
409430
# having both a converter and a dtype for a field generates warnings
410431
# so we pop tcp.flags
411432
# dtype=dtypes.pop("tcp.flags"),
412-
dtype=dtypes, # poping still generates
433+
dtype=dtypes,
413434
converters={
414435
"tcp.flags": _convert_flags,
415436
# reinjections, converts to list of integers
416437
"mptcp.reinjection_of": functools.partial(_convert_to_list, field="reinjectionOf"),
417438
"mptcp.reinjected_in": functools.partial(_convert_to_list, field="reinjectedIn"),
439+
440+
"mptcpdest": _convert_role,
441+
"tcpdest": _convert_role,
418442
},
419443
# nrows=10, # useful for debugging purpose
420444
)
@@ -444,6 +468,32 @@ def load_into_pandas(
444468
raise e
445469

446470
log.info("Finished loading dataframe for %s. Size=%d" % (input_file, len(data)))
471+
472+
names = set([ field.name for field in artificial_fields ])
473+
print("NAMES", names)
474+
column_names = set(data.columns)
475+
print("column_names", column_names)
476+
477+
478+
# TODO here I should assign the type
479+
new = pd.DataFrame(dtype= {
480+
"tcpdest": dtype_role
481+
})
482+
data = pd.concat([ data, new ],
483+
# ignore_index=False,
484+
# copy=False,
485+
)
486+
487+
# for missing_field in names - column_names:
488+
# print("missing field", missing_field)
489+
# data[missing_field] = np.nan
490+
491+
# data.astype({ })
492+
# data.assign( { missing_field: np.nan for missing_field in (names - column_names) } )
493+
494+
print("FINAL_DTYPES")
495+
print(data.dtypes)
496+
print(data.tcpdest.head(10))
447497
return data
448498

449499

@@ -519,6 +569,7 @@ def tcpdest_from_connections(df, con: TcpConnection):
519569
log.debug("Looking at destination %s" % dest)
520570
q = con.generate_direction_query(dest)
521571
df_dest = df.query(q)
572+
print("tcpdest %r" % dest)
522573
df.loc[df_dest.index, 'tcpdest'] = dest
523574

524575
# print("df",
@@ -604,13 +655,6 @@ def merge_tcp_dataframes_known_streams(
604655

605656
# TODO move elsewhere, to outer function
606657
# total = total.reindex(columns=firstcols + list(filter(lambda x: x not in firstcols, total.columns.tolist())))
607-
# total.to_csv(
608-
# cachename, # output
609-
# # columns=self.columns,
610-
# index=False,
611-
# header=True,
612-
# # sep=main.config["DEFAULT"]["delimiter"],
613-
# )
614658
log.info("Resulting merged tcp dataframe of size {} (to compare with {} and {})".format(
615659
len(total), len(h1_df), len(h2_df)
616660
))

0 commit comments

Comments
 (0)