26
26
27
27
# TODO might need a converter when saving/loading
28
28
# TODO pandas.api.types.register_extension_dtype()
29
- dtype_role = pd .api .types .CategoricalDtype (categories = list ( ConnectionRoles ) , ordered = True )
29
+ dtype_role = pd .api .types .CategoricalDtype (categories = ConnectionRoles , ordered = True )
30
30
31
31
TCP_DEBUG_FIELDS = ['hash' , 'packetid' , "reltime" , "abstime" ]
32
32
MPTCP_DEBUG_FIELDS = TCP_DEBUG_FIELDS + ['mptcpdest' ]
33
33
34
34
35
- # def _convert_role(x):
36
- # """
37
- # Workaround https://github.com/pandas-dev/pandas/pull/20826
38
- # """
39
- # log.log(mp.TRACE, "converting [%r] into role" % x)
40
- # return ConnectionRoles.from_string (x)
35
+ def _convert_role (x ):
36
+ """
37
+ Workaround https://github.com/pandas-dev/pandas/pull/20826
38
+ """
39
+ log .log (mp .TRACE , "converting [%r] into role" % x )
40
+ return ConnectionRoles (x )
41
41
42
42
def ignore (f1 , f2 ):
43
43
return 0
@@ -96,6 +96,10 @@ def getrealpath(input_file):
96
96
per_pcap_artificial_fields = {
97
97
"mptcpdest" : Field ("mptcpdest" , dtype_role , "MPTCP destination" , False , None ),
98
98
"tcpdest" : Field ("tcpdest" , dtype_role , "TCP destination" , False , None ),
99
+
100
+ # "mptcpdest": Field("mptcpdest", None, "MPTCP destination", False, _convert_role),
101
+ # "tcpdest": Field("tcpdest", None, "TCP destination", False, _convert_role),
102
+
99
103
# TODO use int? as type
100
104
"hash" : Field ("hash" , str , "Hash of fields" , False , None ),
101
105
@@ -287,7 +291,7 @@ def _gen_converters() -> Dict[str, Callable]:
287
291
converters = converters ,
288
292
)
289
293
# at this stage, destinatiosn are nan
290
- debug_dataframe (merged_df , "Merged dataframe" , )
294
+ # debug_dataframe(merged_df, "Merged dataframe", )
291
295
292
296
# log.debug("Column names after loading from cache: %s", merged_df.columns)
293
297
@@ -316,8 +320,8 @@ def _gen_converters() -> Dict[str, Callable]:
316
320
# don't do it here else we might repeat it
317
321
# data["abstime"] += clock_offset
318
322
319
- debug_dataframe (res , "checking merge" , usecols = ["merge_status" ])
320
- print ("%d nan values" % len (res [res .merge_status == np .nan ]))
323
+ # debug_dataframe(res, "checking merge", usecols=["merge_status"])
324
+ # print("%d nan values" % len(res[res.merge_status == np.nan]))
321
325
322
326
# log.debug("Column names: %s", res.columns)
323
327
# log.debug("Dtypes after load:%s\n" % dict(res.dtypes))
@@ -610,13 +614,14 @@ def _rename_column(col_name, suffixes) -> str:
610
614
log .log (mp .TRACE , "renaming inplace" )
611
615
612
616
tdf .rename (columns = rename_func , inplace = True )
613
- debug_dataframe (tdf , "temporary dataframe" )
617
+ # debug_dataframe(tdf, "temporary dataframe")
614
618
total = pd .concat ([total , tdf ], ignore_index = True , sort = False , )
615
619
print ("total df size = %d" % len (total ))
616
620
617
621
# subdf[ _first("tcpdest") == ConnectionRole.Client] .rename(columns=_rename_cols, inplace=True)
618
622
# print(subdf.columns)
619
623
# print(total.columns)
624
+ debug_dataframe (total , "total" )
620
625
621
626
logging .debug ("Converted to sender/receiver format" )
622
627
return total
@@ -641,14 +646,14 @@ def merge_tcp_dataframes_known_streams(
641
646
2/ identify which dataframe is server's/client's
642
647
2/
643
648
644
- Adds a merge_status column
645
649
646
650
Args:
647
651
con1: Tuple dataframe/tcpstream id
648
652
con2: same
649
653
650
654
Returns:
651
- res
655
+ A dataframe with a "merge_status" column and valid tcp/mptcp destinations
656
+
652
657
To ease debug we want to see packets in chronological order
653
658
654
659
"""
@@ -690,14 +695,15 @@ def merge_tcp_dataframes_known_streams(
690
695
# generate_mptcp_direction_query
691
696
if isinstance (main_connection , MpTcpSubflow ):
692
697
693
- print ( "THIS IS A SUBFLOW " )
698
+ log . debug ( "This is a subflow, setting mptcp destinations... " )
694
699
mptcpdest = main_connection .mptcp_dest_from_tcpdest (tcpdest )
695
700
res [_first ('mptcpdest' )][:] = mptcpdest
696
701
res [_second ('mptcpdest' )][:] = mptcpdest
697
702
698
703
log .debug ("Setting mptcpdest to %s" % mptcpdest )
699
704
# if tcpdest == main_connection.mptcpdest
700
705
706
+ debug_dataframe (total , "concanated df" , usecols = ["tcpdest" , "mptcpdest" ])
701
707
# TODO here we should
702
708
total = pd .concat ([res , total ])
703
709
@@ -894,6 +900,7 @@ def map_tcp_packets(
894
900
# con1: TcpConnection, con2: TcpConnection
895
901
) -> pd .DataFrame :
896
902
'''
903
+ Dataframe with format
897
904
'''
898
905
if mode == "hash" :
899
906
res = map_tcp_packets_via_hash (sender_df , receiver_df , explain )
@@ -923,14 +930,14 @@ def map_tcp_packets_via_hash(
923
930
):
924
931
"""
925
932
Merge on hash of different fields
933
+ Resulting dataframe has H1_SUFFIX / H2_SUFFIX
926
934
"""
927
935
log .info ("Merging packets via hash" )
928
936
debug_cols = ["packetid" , "hash" , "reltime" ]
929
937
930
938
from .pdutils import debug_dataframe
931
- debug_dataframe (sender_df , "sender_df" , )
932
- debug_dataframe (receiver_df , "receiver df" )
933
- # print(receiver_df[debug_cols].head(20))
939
+ # debug_dataframe(sender_df, "sender_df", )
940
+ # debug_dataframe(receiver_df, "receiver df")
934
941
# print("sender_df dtype=", sender_df.dtypes.tcpdest)
935
942
# print("receiver_df dtype=", receiver_df.dtypes.tcpdest)
936
943
0 commit comments