Skip to content

Commit 6d17a64

Browse files
committed
saving before the move
1 parent f88314e commit 6d17a64

File tree

5 files changed

+44
-112
lines changed

5 files changed

+44
-112
lines changed

mptcpanalyzer/cli.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ def __init__(self, cfg: MpTcpAnalyzerConfig, stdin=sys.stdin, **kwargs) -> None:
188188
self.debug = True # for now
189189
self.set_posix_shlex = True # need cmd2 >= 0.8
190190

191+
# Pandas specific initialization
192+
# for as long as https://github.com/pydata/numexpr/issues/331 is a problem
193+
pd.set_option('compute.use_numexpr', False)
194+
print("use numexpr?", pd.get_option('compute.use_numexpr', False))
195+
191196
# Load Plots
192197
######################
193198
# you can list available plots under the namespace
@@ -502,6 +507,7 @@ def _print_subflow(x):
502507
# TODO update the stream id autcompletion dynamically ?
503508
# setattr(action_stream, argparse_completer.ACTION_ARG_CHOICES, range(0, 10))
504509

510+
# TODO use filter_dest instead
505511
summary_parser.add_argument(
506512
'destination',
507513
# mp.DestinationChoice,
@@ -533,16 +539,12 @@ def do_summary(self, args, unknown):
533539
ret = mptcp_compute_throughput(
534540
self.data, args.mptcpstream, args.destination
535541
)
536-
# if success is not True:
537-
# self.perror("Throughput computation failed:")
538-
# self.perror(ret)
539-
# return
540542

541543
if args.json:
542544
import json
543545
# TODO use self.poutput
544546
# or use a stream, it must just be testable
545-
val = json.dumps(ret, ensure_ascii=False)
547+
val = json.dumps(dataclasses.asdict(ret), ensure_ascii=False)
546548
self.poutput(val)
547549
return
548550

mptcpanalyzer/data.py

+1-33
Original file line numberDiff line numberDiff line change
@@ -403,16 +403,7 @@ def load_into_pandas(
403403
dtypes = {field.fullname: field.type for _, field in config.fields.items() if field.converter is None}
404404
log.debug("Dtypes before load: %s" % dtypes)
405405
log.debug("Converters before load: %s" % converters)
406-
# test = pd.read_csv(
407-
# fd,
408-
# comment='#',
409-
# sep=config.delimiter,
410-
# nrows=1, # useful for debugging purpose
411-
# )
412-
# log.debug("Dtypes after load:%s\n" % dict(test.dtypes))
413-
414-
# https://stackoverflow.com/questions/52686559/read-csv-get-the-line-where-exception-occured
415-
# print(test.columns)
406+
416407
from .pdutils import read_csv_debug
417408
fields = [f.fullname for _, f in config.fields.items()]
418409
# data = read_csv_debug(fields,
@@ -529,29 +520,6 @@ def mptcpdest_from_connections(df, con: MpTcpConnection) -> pd.DataFrame:
529520

530521
return df
531522

532-
# for tcpdest in ConnectionRoles:
533-
534-
# log.debug("Looking at tcpdestination %s" % tcpdest)
535-
536-
# # pandas trick to avoid losing dtype
537-
# # see https://github.com/pandas-dev/pandas/issues/22361#issuecomment-413147667
538-
# # no need to set _second (as they are just opposite)
539-
# # TODO this should be done somewhere else
540-
# # else summary won't work
541-
# res[_first('tcpdest')][:] = tcpdest
542-
# res[_second('tcpdest')][:] = tcpdest
543-
544-
# # generate_mptcp_direction_query
545-
# if isinstance(main_connection, MpTcpSubflow):
546-
547-
# print("THIS IS A SUBFLOW")
548-
# mptcpdest = main_connection.mptcp_dest_from_tcpdest(tcpdest)
549-
# res[_first('mptcpdest')][:] = mptcpdest
550-
# res[_second('mptcpdest')][:] = mptcpdest
551-
552-
# print("Setting mptcpdest to %s", mptcpdest)
553-
# # if tcpdest == main_connection.mptcpdest
554-
555523

556524
def tcpdest_from_connections(df, con: TcpConnection) -> pd.DataFrame:
557525

mptcpanalyzer/parser.py

+32-63
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
tcpdest_from_connections, mptcpdest_from_connections)
1010
from mptcpanalyzer import (PreprocessingActions, ConnectionRoles, DestinationChoice,
1111
CustomConnectionRolesChoices, TcpStreamId, MpTcpStreamId)
12+
import mptcpanalyzer as mp
1213
from functools import partial
1314
from mptcpanalyzer.connection import MpTcpConnection, TcpConnection
1415

@@ -40,6 +41,9 @@ def _add_dataframe(namespace, dest, df):
4041

4142

4243
class DataframeAction(argparse.Action):
44+
'''
45+
If you need the action to act on a specific dataframe
46+
'''
4347

4448
def __init__(self, df_name: str, **kwargs) -> None:
4549
argparse.Action.__init__(self, **kwargs)
@@ -52,10 +56,6 @@ def add_dataframe(self, namespace, df):
5256

5357

5458

55-
# class StreamId(x):
56-
# return int(x)
57-
58-
5959
class LoadSinglePcap(DataframeAction):
6060
'''
6161
Test action !!
@@ -75,33 +75,6 @@ def __call__(self, parser, namespace, values, option_string=None):
7575

7676
self.add_dataframe (namespace, df)
7777

78-
# def with_argparser_test(argparser: argparse.ArgumentParser,
79-
# preserve_quotes: bool=False) -> Callable[[argparse.Namespace], Optional[bool]]:
80-
# import functools
81-
82-
# # noinspection PyProtectedMember
83-
# def arg_decorator(func: Callable[[cmd2.Statement], Optional[bool]]):
84-
# @functools.wraps(func)
85-
# def cmd_wrapper(instance, cmdline):
86-
# lexed_arglist = cmd2.cmd2.parse_quoted_string(cmdline, preserve_quotes)
87-
# return func(instance, argparser, lexed_arglist)
88-
89-
# # argparser defaults the program name to sys.argv[0]
90-
# # we want it to be the name of our command
91-
# # argparser.prog = func.__name__[len(COMMAND_FUNC_PREFIX):]
92-
93-
# # If the description has not been set, then use the method docstring if one exists
94-
# if argparser.description is None and func.__doc__:
95-
# argparser.description = func.__doc__
96-
97-
# # Set the command's help text as argparser.description (which can be None)
98-
# # cmd_wrapper.__doc__ = argparser.description
99-
100-
# # Mark this function as having an argparse ArgumentParser
101-
# setattr(cmd_wrapper, 'argparser', argparser)
102-
103-
# return cmd_wrapper
104-
10578
# return arg_decorator
10679
# def with_argparser(argparser: argparse.ArgumentParser,
10780
# preserve_quotes: bool=False) -> Callable[[argparse.Namespace], Optional[bool]]:
@@ -153,6 +126,10 @@ def with_argparser_test(
153126
preserve_quotes: bool=False,
154127
preload_pcap: bool=False,
155128
) -> Callable[[argparse.Namespace, List], Optional[bool]]:
129+
"""
130+
Arguments:
131+
preload_pcap: Use the preloaded pcap as a dataframe
132+
"""
156133
import functools
157134

158135
# noinspection PyProtectedMember
@@ -203,7 +180,6 @@ class AppendDestination(DataframeAction):
203180
assume convention on naming
204181
"""
205182

206-
# query
207183
def __init__(self, *args, **kwargs) -> None:
208184
self.already_called = False
209185
# self.destinations = list(ConnectionRoles)
@@ -254,7 +230,8 @@ class MergePcaps(DataframeAction):
254230
"""
255231
assume convention on naming
256232
"""
257-
def __init__(self,
233+
def __init__(
234+
self,
258235
name: str,
259236
protocol: str, # mptcp or tcp ?
260237
loader = TsharkConfig(),
@@ -327,18 +304,13 @@ def __call__(self, parser, namespace, values, option_string=None):
327304
# def __
328305

329306
# don't need the Mptcp flag anymore
330-
def exclude_stream(df_name, mptcp: bool = False):
331-
query = "tcpstream"
332-
if mptcp:
333-
query = "mp" + query
334-
query = query + "!={streamid}"
307+
def exclude_stream(df_name):
308+
query = "{field}!={streamid}"
335309
return partial(FilterStream, query, df_name)
336310

337-
def retain_stream(df_name, mptcp: bool = False):
338-
query = "tcpstream"
339-
if mptcp:
340-
query = "mp" + query
341-
query = query + "=={streamid}"
311+
# TODO va dependre du type en fait
312+
def retain_stream(df_name):
313+
query = "{field}=={streamid}"
342314
return partial(FilterStream, query, df_name)
343315

344316

@@ -424,7 +396,6 @@ class FilterStream(DataframeAction):
424396
def __init__(self, query: str, df_name: str, **kwargs) -> None:
425397
# self.df_name = df_name
426398
self.query_tpl = query
427-
# self.mptcp = mptcp
428399
super().__init__(df_name, **kwargs)
429400

430401
def __call__(self, parser, namespace, values, option_string=None):
@@ -437,31 +408,32 @@ def __call__(self, parser, namespace, values, option_string=None):
437408
# make sure result
438409
df = namespace._dataframes[self.df_name]
439410

440-
# streamid = values
441-
442411
log.debug("Filtering stream %s" % (values))
443412

444413
# if type(values) != list:
445414
# streamids = list(values)
415+
print("received values %r" % values)
446416

447-
# TODO build a query
448-
mptcp = False
449417
field = "tcpstream"
450-
if isinstance(values, TcpStreamId):
451-
pass
452-
453-
elif isinstance(values, MpTcpStreamId):
454-
mptcp = True
418+
if isinstance(values, MpTcpStreamId):
455419
field = "mptcpstream"
420+
print("mptcp instance type ")
421+
elif isinstance(values, TcpStreamId):
422+
pass
456423
else:
457-
parser.error("Unsupported type %s" % type(values))
424+
parser.error("Unsupported 'type' %s. Set it to TcpStreamId or MpTcpStreamId" % type(values))
458425

459426
# super(argparse.Action).__call__(parser, namespace, values, option_string)
460427
setattr(namespace, self.dest, values)
461-
query = self.query_tpl.format(streamid=values)
428+
query = self.query_tpl.format(field=field, streamid=values)
462429

463-
log.debug("Applying query %s" % query)
464-
df.query(query, inplace=True)
430+
log.log(mp.TRACE, "Applying query [%s]" % query)
431+
print(df.head(5))
432+
print(df.dtypes)
433+
434+
import pandas as pd
435+
print("use numexpr?", pd.get_option('compute.use_numexpr', False))
436+
df.query(query, inplace=True, )
465437

466438

467439
def gen_bicap_parser(protocol, dest=False):
@@ -550,12 +522,11 @@ def _pcap(name, pcapAction="store", filterAction="store"):
550522
# help=argparse.SUPPRESS)
551523
# merge_pcap.default = "TEST"
552524
else:
553-
# print("PreprocessingActions.Merge:")
554-
# TODO pas forcement
555525
filterClass = FilterStream
556526
_pcap(df_name, pcapAction=LoadSinglePcap,
557527
filterAction=retain_stream(df_name,
558-
mptcp = bool(bitfield & PreprocessingActions.FilterMpTcpStream))
528+
# mptcp = bool(bitfield & PreprocessingActions.FilterMpTcpStream)
529+
)
559530
)
560531

561532
if bitfield & PreprocessingActions.FilterDestination or direction :
@@ -582,7 +553,7 @@ def _pcap(name, pcapAction="store", filterAction="store"):
582553
if skip_subflows:
583554
parser.add_argument(
584555
'--skip', dest=df_name + "skipped_subflows", type=TcpStreamId,
585-
action=exclude_stream(df_name, mptcp=False),
556+
action=exclude_stream(df_name,),
586557
default=[],
587558
help=("You can type here the tcp.stream of a subflow "
588559
"not to take into account (because"
@@ -601,8 +572,6 @@ class MpTcpAnalyzerParser(argparse_completer.ACArgumentParser):
601572
602573
'''
603574

604-
# def __init__():
605-
606575
# def _parse_known_args(self, arg_strings, namespace):
607576
def parse_known_args(self, args=None, namespace=None):
608577
"""

mptcpanalyzer/pdutils.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def connection(self, streamid):
1616
return TcpConnection.build_from_dataframe(self._obj, streamid)
1717

1818

19+
20+
# https://stackoverflow.com/questions/52686559/read-csv-get-the-line-where-exception-occured
1921
def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
2022
"""
2123
Help debugging dataframe loading errors (with dtypes/converters)
@@ -51,17 +53,8 @@ def read_csv_debug(fields, fd, *args, first_try=True, **kwargs):
5153
raise e
5254

5355
finally:
54-
5556
fd.seek(0)
56-
# else:
57-
# data = pd.read_csv(
58-
# fd,
59-
# *args,
60-
# usecols=[ field],
61-
# **kwargs
62-
# )
63-
64-
# return data
57+
6558

6659

6760
def filter_dataframe(

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def run(self):
104104
],
105105
# test_suite="tests",
106106
cmdclass={
107-
"test": RunTests,
107+
"test": RunTests,
108108
},
109109
zip_safe=False,
110110
)

0 commit comments

Comments
 (0)