Skip to content

Commit 8897e7b

Browse files
committed
list_mptcp_connections chose a wrong mptcpdest
because it was comparing values of different types. For now I encode the failing fields as str instead of UInt64 (dsnraw seems concerned as well) see pandas-dev/pandas#25472 for more details
1 parent 7c82b52 commit 8897e7b

File tree

6 files changed

+51
-52
lines changed

6 files changed

+51
-52
lines changed

mptcpanalyzer/cli.py

+6-23
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,8 @@ def list_subflows(self, mptcpstreamid: MpTcpStreamId):
362362

363363
try:
364364
con = MpTcpConnection.build_from_dataframe(self.data, mptcpstreamid)
365-
self.poutput("mptcp.stream %d has %d subflow(s) (client/server): "
366-
% (mptcpstreamid, len(con.subflows())))
365+
msg = "mptcp.stream %d has %d subflow(s) (client/server): "
366+
self.poutput(msg % (mptcpstreamid, len(con.subflows())))
367367
for sf in con.subflows():
368368
self.poutput("\t%s" % sf)
369369
except mp.MpTcpException as e:
@@ -526,7 +526,6 @@ def do_tcp_summary(self, args, unknown):
526526
df = self.data
527527

528528
# args.pcapdestinations ?
529-
# print(args)
530529

531530
for dest in ConnectionRoles:
532531
# TODO do it only when needed
@@ -573,7 +572,7 @@ def do_mptcp_summary(self, args, unknown):
573572
mptcpstream = args.mptcpstream
574573

575574
# args.pcapdestinations ?
576-
print(args)
575+
# print(args)
577576
ret = mptcp_compute_throughput(
578577
self.data, args.mptcpstream, args.destination
579578
)
@@ -670,7 +669,7 @@ def do_summary_extended(self, args, unknown):
670669
For now it is naive, does not look at retransmissions ?
671670
"""
672671

673-
print("%r" % args)
672+
print("Summary extended resume %r" % args)
674673
df_pcap1 = load_into_pandas(args.pcap1, self.tshark_config)
675674

676675
# to abstract things a bit
@@ -779,17 +778,6 @@ def do_list_mptcp_connections(self, *args):
779778
self.list_subflows(mptcpstream)
780779
self.poutput("\n")
781780

782-
# def generate_namespace(self) -> argparse.Namespace:
783-
# myNamespace = Namespace()
784-
# myNamespace.toto = self.data
785-
# parser = argparse_completer.ACArgumentParser(
786-
# description="""
787-
# Mptcpanalyzer filters pcaps to keep only tcp packets.
788-
# This may explain why printed packet ids dont map
789-
# """
790-
# )
791-
792-
793781

794782
parser = MpTcpAnalyzerParser(
795783
description="Export a pcap that can be used with wireshark to debug ids"
@@ -1091,12 +1079,7 @@ def do_load_pcap(self, args):
10911079
"""
10921080
Load the file as the current one
10931081
"""
1094-
print(args)
1095-
# args = shlex.split(args)
10961082
# print(args)
1097-
# parser = self.do_load_pcap.argparser
1098-
# print(parser)
1099-
# args = parser.parse_args(args)
11001083

11011084
self.poutput("Loading %s" % args.input_file)
11021085
self.data = args._dataframes["input_file"]
@@ -1138,15 +1121,15 @@ def do_plot(self, args, unknown):
11381121
# 'converts' the namespace to for the syntax define a dict
11391122
dargs = vars(args)
11401123

1141-
print("%s" % dargs)
1124+
# print("%s" % dargs)
11421125
dataframes = dargs.pop("_dataframes")
11431126
# workaround argparse limitations to set as default both directions
11441127
# TODO replace that with an action ?
11451128
# destinations=dargs.get("destinations", list(mp.ConnectionRoles))
11461129
# dargs.update(destinations=destinations)
11471130
# log.debug("Selecting destinations %s" % (destinations,))
11481131
# dataframes = plotter.preprocess(**dargs)
1149-
print("%s" % args)
1132+
print("DO_PLOT %s" % args)
11501133
# dataframes = args._dataframes.values()
11511134
assert dataframes is not None, "Preprocess must return a list"
11521135
# pass unknown_args too ?

mptcpanalyzer/connection.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,23 @@ def __str__(self):
154154
# :>5d
155155
# TODO should be converted to int instead, would spare some memory
156156
line = ("tcp.stream {s.tcpstreamid:.0f}: {s.tcpclient_ip}:{s.client_port:0>5.0f} "
157-
" <-> {s.tcpserver_ip}:{s.server_port:0>5.0f} ").format(s=self,
157+
" -> {s.tcpserver_ip}:{s.server_port:0>5.0f} ").format(s=self,
158158
# tcpstreamid=self.tcpstreamid
159159
)
160160
return line
161161

162162

163163
# should it ?
164-
@dataclass
164+
# @dataclass
165165
class MpTcpSubflow(TcpConnection):
166166
"""
167167
168168
"""
169169

170+
""" to which mptcp side belongs the tcp server"""
171+
# mptcpdest: ConnectionRoles
172+
# addrid: int = None
173+
170174
def __init__(self, mptcpdest: ConnectionRoles, addrid=None, **kwargs) -> None:
171175
super().__init__(**kwargs)
172176
self.addrid = addrid
@@ -233,6 +237,7 @@ def __str__(self):
233237

234238

235239
# @dataframe
240+
# @dataclass
236241
class MpTcpConnection:
237242
"""
238243
Holds key characteristics of an MPTCP connection: keys, tokens, subflows
@@ -241,6 +246,8 @@ class MpTcpConnection:
241246
242247
subflows can be any order
243248
"""
249+
# mptcpstreamid: MpTcpStreamId
250+
244251
def __init__(self,
245252
mptcpstreamid: int,
246253
client_key: int, client_token: int, server_key: int,
@@ -294,6 +301,8 @@ def subflows(self, mptcpdest: ConnectionRoles = ConnectionRoles.Server):
294301
def build_from_dataframe(ds: pd.DataFrame, mptcpstreamid: MpTcpStreamId) -> 'MpTcpConnection':
295302
"""
296303
Instantiates a class that describes an MPTCP connection
304+
305+
Look for the first 2 packets containing "sendkey"
297306
"""
298307

299308
def get_index_of_non_null_values(serie):
@@ -345,17 +354,23 @@ def get_index_of_non_null_values(serie):
345354
receiver_token = subflow_ds["recvtok"].iloc[row]
346355

347356
# if we see the token
357+
log.debug("receiver_token %r to compare with server_token %r" % (receiver_token, server_token))
358+
log.debug("Test %s" % (receiver_token == server_token))
359+
mptcpdest = ConnectionRoles.Server if receiver_token == server_token \
360+
else ConnectionRoles.Client
361+
348362
subflow = MpTcpSubflow.create_subflow(
349-
mptcpdest = ConnectionRoles.Server if receiver_token == server_token \
350-
else ConnectionRoles.Client,
363+
mptcpdest = mptcpdest,
351364
tcpstreamid =tcpstreamid,
352365
tcpclient_ip=subflow_ds['ipsrc'].iloc[row],
353366
tcpserver_ip=subflow_ds['ipdst'].iloc[row],
354367
client_port =subflow_ds['sport'].iloc[row],
355368
server_port =subflow_ds['dport'].iloc[row],
356369
addrid =None,
357370
# rcv_token =receiver_token,
358-
)
371+
)
372+
373+
log.debug("Created subflow %s" % subflow)
359374

360375
subflows.append(subflow)
361376

mptcpanalyzer/data.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -430,21 +430,22 @@ def load_into_pandas(
430430

431431
converters = {f.fullname: f.converter for _, f in config.fields.items() if f.converter}
432432
converters.update({name: f.converter for name, f in per_pcap_artificial_fields.items() if f.converter})
433-
# print("converters\n", converters)
434433

435434
dtypes = {field.fullname: field.type for _, field in config.fields.items() if field.converter is None}
436-
log.debug("Dtypes before load: %s" % dtypes)
437-
log.debug("Converters before load: %s" % converters)
435+
log.debug("Dtypes before load: %s" % (pp.pformat(dtypes)))
436+
log.debug("Converters before load: %s" % (pp.pformat(converters)))
438437

439438
from .pdutils import read_csv_debug
440-
fields = [f.fullname for _, f in config.fields.items()]
439+
# fields = [f.fullname for _, f in config.fields.items()]
440+
# fields =[ "tcp.options.mptcp.sendkey" ]
441441
# data = read_csv_debug(fields,
442442
data = pd.read_csv(
443443
fd,
444444
comment='#',
445445
sep=config.delimiter,
446446
dtype=dtypes,
447-
# usecols = [config.fields["ipsrc"].fullname ],
447+
# config.fields["ipsrc"].fullname
448+
# usecols = [ "tcp.options.mptcp.sendkey" ],
448449
# seems like for now we can't change the default representation apart from converting the column to
449450
# a string !!!
450451
# https://stackoverflow.com/questions/46930201/pandas-to-datetime-is-not-formatting-the-datetime-value-in-the-desired-format
@@ -456,7 +457,7 @@ def load_into_pandas(
456457
converters=converters,
457458
# float_precision="high", # might be necessary
458459
# nrows=13, # useful for debugging purpose
459-
# chunksize=5, # useful for debugging purpose
460+
# chunksize=1, # useful for debugging purpose
460461
)
461462

462463
log.debug("Finished loading CSV file")
@@ -472,7 +473,7 @@ def load_into_pandas(
472473
# we want packetid column to survive merges/dataframe transformation so keepit as a column
473474
# TODO remove ? let other functions do it ?
474475
data.set_index("packetid", drop=False, inplace=True)
475-
log.debug("Column names: %s" % data.columns)
476+
# log.debug("Column names: %s" % data.columns)
476477

477478
hashing_fields = [name for name, field in config.fields.items() if field.hash]
478479
log.debug("Hashing over fields %s" % hashing_fields)
@@ -488,14 +489,9 @@ def load_into_pandas(
488489
except Exception as e:
489490
logging.error("You may need to filter more your pcap to keep only mptcp packets")
490491
raise e
491-
# finally:
492-
# print (data)
493492

494493
log.info("Finished loading dataframe for %s. Size=%d" % (input_file, len(data)))
495494

496-
# print("FINAL_DTYPES")
497-
# log.debug(data.dtypes)
498-
# print(data.head(5))
499495
return data
500496

501497

mptcpanalyzer/pdutils.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def debug_dataframe(
5454
def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
5555
"""
5656
Help debugging dataframe loading errors (with dtypes/converters)
57-
chunksize: bool,
57+
chunksize: bool,
5858
"""
5959

6060
chunksize = kwargs.get("chunksize")
@@ -63,22 +63,23 @@ def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
6363
kwargs.pop("chunksize", None)
6464

6565
for field in fields:
66-
print("TESTING field ", field)
66+
print("TESTING field %s (first_try ? %s ) " % (field, first_try))
67+
print(kwargs.get("dtype")[field])
6768
try:
6869
res = pd.read_csv(
69-
fd,
70-
*args,
71-
usecols=[ field],
72-
**kwargs
73-
)
70+
fd,
71+
*args,
72+
usecols=[ field],
73+
**kwargs
74+
)
7475
if chunksize is not None:
7576
for i, chunk in enumerate(res):
7677
# print("chunk %d" % i)
7778
print(chunk)
7879
except TypeError as e:
7980
# TODO retry with chunksize
8081
if first_try:
81-
kwargs.update({"chunksize":chunksize or 40})
82+
kwargs.update({"chunksize": chunksize or 40})
8283
fd.seek(0)
8384
read_csv_debug([field], fd, *args, first_try=False, **kwargs)
8485
else:

mptcpanalyzer/statistics.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,10 @@ def mptcp_compute_throughput(
164164
subflow_stats: List[TcpUnidirectionalStats] = []
165165
for tcpstream, subdf in d:
166166
# subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))]
167-
debug_dataframe(subdf, "subdf for stream %d" % tcpstream)
167+
# debug_dataframe(subdf, "subdf for stream %d" % tcpstream)
168168
dest = subdf.iloc[0, subdf.columns.get_loc(_sender('tcpdest'))]
169169
sf_stats = tcp_get_stats(subdf, tcpstream,
170+
# work around pandas issue
170171
ConnectionRoles(dest),
171172
True)
172173

mptcpanalyzer/tshark.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,12 @@ def add_mptcp_fields(self, advanced=True):
198198
# remove this one ?
199199
self.add_field("mptcp.expected_token", "expected_token", str, False, False)
200200
self.add_field("mptcp.stream", "mptcpstream", 'UInt64', False, False)
201-
self.add_field("tcp.options.mptcp.sendkey", "sendkey", np.float64, False, True)
202-
self.add_field("tcp.options.mptcp.recvkey", "recvkey", np.float64, False, True)
203-
self.add_field("tcp.options.mptcp.recvtok", "recvtok", np.float64, False, True)
201+
202+
# TODO convert to 'UInt64'
203+
self.add_field("tcp.options.mptcp.sendkey", "sendkey", str, False, True)
204+
self.add_field("tcp.options.mptcp.recvkey", "recvkey", str, False, True)
205+
self.add_field("tcp.options.mptcp.recvtok", "recvtok", str, False, True)
206+
204207
self.add_field("tcp.options.mptcp.datafin.flag", "datafin", 'Int64', False, True)
205208
# this is a list really; can contain "2,4"
206209
self.add_field("tcp.options.mptcp.subtype", "subtype", str, False, True)

0 commit comments

Comments
 (0)