import pandas as pd import numpy as np from datetime import datetime from dateutil.relativedelta import relativedelta def modify_year_month(year_month, **kwargs): ''' year_month is an int that represents records from YYYYMM. Modify year_month by period informed in kwargs. Check relativedelta docs.''' dt = datetime(year_month//100,year_month%100,1) + relativedelta(**kwargs) return dt.year*100 + dt.month def generate_df(size, id, start_year_month=202301, year_month_dtype:str='int'): '''Create a dummy df with id, year_month and a random value''' df = pd.DataFrame({'id': [id for v in range(size)], 'year_month': [modify_year_month(start_year_month, months=i) for i in range(size)], 'value': np.random.randint(0,size,size)}) df.year_month = df.year_month.astype(year_month_dtype) return df def merge_dummies_on_id_year_month(first_dtype, second_dtype, size=2): a = generate_df(size, 1, year_month_dtype=first_dtype) b = generate_df(size, 1, year_month_dtype=second_dtype) return a.merge(b, on=['id', 'year_month'], suffixes=('_a', '_b')) dtypes = [ ['int', 'int'], ['int', 'int32[pyarrow]'], ['float', 'float'], ['float', 'float32[pyarrow]'], ] for types in dtypes: try: merge_dummies_on_id_year_month(types[0], types[1]) print('Sucessfully merged types: %s, %s\n'%(types[0], types[1])) except KeyError as e: print('Failed to merge types: %s, %s'%(types[0], types[1])) print('\tError msg: %s\n'%(e))