Skip to content

Commit 93b95d7

Browse files
committed
Tweaks for create_scores
1 parent 8520045 commit 93b95d7

File tree

3 files changed

+99
-82
lines changed

3 files changed

+99
-82
lines changed

src/server/api/admin_api.py

+7
Original file line numberDiff line numberDiff line change
@@ -403,3 +403,10 @@ def hit_gdrs():
403403
# d = read_rfm_edges() # read it again
404404
# print("round-trip d is : \n " + str(d) )
405405
# return "OK"
406+
407+
from server.rfm_funcs.create_scores import create_scores
408+
@admin_api.route("/api/admin/test_create_scores", methods=["GET"])
409+
def hit_create_scores():
410+
current_app.logger.info("Hitting create_scores() ")
411+
tuple_count = create_scores('2021-07-27')
412+
current_app.logger.info("create_scores() processed " + tuple_count + " scores")

src/server/rfm_funcs/create_scores.py

+81-71
Original file line numberDiff line numberDiff line change
@@ -1,111 +1,121 @@
1-
def create_scores(connection, query_date):
1+
from config import engine
2+
3+
import pandas as pd
4+
import numpy as np
5+
from datetime import datetime, date
6+
from collections import Counter
7+
8+
def date_difference(my_date, query_date):
29
'''
3-
requires query date as input-- must be string in the following format "%Y-%m-%d"
4-
returns a list of matching_ids and scores as tuples
5-
will also insert rfm scores into rfm_scores table----see src/server/api/admin_api.py
10+
This function takes in a single date from the donations dataframe (per row) and compares the difference between that date and the date in which matching occurs.
11+
I.e. pipeline matching should provide a query_date so that this can work.
612
'''
7-
# Import dependencies
8-
import pandas as pd
9-
import numpy as np
10-
from datetime import datetime, date
11-
from collections import Counter
12-
from api.admin_api import read_rfm_edges, insert_rfm_scores
13-
1413

15-
# read in data from database via pull_donations_for_rfm() func (reads in as a list of tuples)
16-
df = pd.read_sql(
17-
"""
18-
select pc.matching_id, s.amount, s.close_date
19-
from salesforcedonations s
20-
inner join pdp_contacts pc on pc.source_id = s.contact_id and pc.source_type = 'salesforcecontacts'
21-
where pc.archived_date is null order by matching_id
22-
"""
23-
, connection)
24-
df = pd.DataFrame(df, columns=['matching_id', 'amount', 'close_date'])
14+
d1 = datetime.strptime(str(my_date), "%Y-%m-%d")
15+
d2 = datetime.strptime(str(query_date), "%Y-%m-%d")
16+
diff = (d2 - d1)
17+
return diff
2518

26-
# read in labels and bin edges from table
27-
recency_labels = [5,4,3,2,1]
28-
recency_bins = list(read_rfm_edges('r').values()) #imported from table
2919

30-
frequency_labels = [1,2,3,4,5]
31-
frequency_bins = list(read_rfm_edges('f').values()) #imported from table
20+
def create_scores(query_date):
21+
'''
22+
requires query date as input-- must be string in the following format "%Y-%m-%d"
23+
returns a list of matching_ids and scores as tuples
24+
will also insert rfm scores into rfm_scores table----see src/server/api/admin_api.py
25+
'''
3226

33-
monetary_labels = [ 1,2,3,4,5]
34-
monetary_bins = list(read_rfm_edges('m').values()) #imported from table
27+
with engine.connect() as connection:
3528

29+
# read in data from database via pull_donations_for_rfm() func (reads in as a list of tuples)
30+
df = pd.read_sql(
31+
"""
32+
select pc.matching_id, s.amount, s.close_date
33+
from salesforcedonations s
34+
inner join pdp_contacts pc on pc.source_id = s.contact_id and pc.source_type = 'salesforcecontacts'
35+
where pc.archived_date is null order by matching_id
36+
"""
37+
, connection)
38+
df = pd.DataFrame(df, columns=['matching_id', 'amount', 'close_date'])
3639

37-
########################## recency #########################################
40+
from api.admin_api import read_rfm_edges, insert_rfm_scores # Avoid circular import issues
3841

42+
rfm_dict = read_rfm_edges()
43+
recency_labels = [5,4,3,2,1]
44+
recency_bins = list(rfm_dict['r']) #imported from table
3945

40-
donations_past_year = df
41-
donations_past_year['close_date'] =pd.to_datetime(donations_past_year['close_date']).dt.date
46+
frequency_labels = [1,2,3,4,5]
47+
frequency_bins = list(rfm_dict['f']) #imported from table
4248

43-
# calculate date difference between input date and individual row close date
44-
from rfm_functions import date_difference
45-
days = []
46-
for ii in donations_past_year['close_date']:
47-
days.append(date_difference(ii, str(query_date)))
48-
donations_past_year['days_since'] = days
49+
monetary_labels = [ 1,2,3,4,5]
50+
monetary_bins = list(rfm_dict['m']) #imported from table
4951

50-
grouped_past_year = donations_past_year.groupby('_id').agg({'days_since': ['min']}).reset_index()
5152

52-
grouped_past_year[('days_since', 'min')]= grouped_past_year[('days_since', 'min')].dt.days
53+
########################## recency #########################################
5354

54-
recency_bins.append(grouped_past_year[('days_since', 'min')].max())
55+
donations_past_year = df
56+
donations_past_year['close_date'] =pd.to_datetime(donations_past_year['close_date']).dt.date
5557

56-
grouped_past_year['recency_score'] = pd.cut(grouped_past_year[('days_since','min')], bins= recency_bins, labels=recency_labels, include_lowest = True)
57-
grouped_past_year.rename(columns={('recency_score', ''): 'recency_score'})
58+
# calculate date difference between input date and individual row close date
5859

60+
days = []
61+
for ii in donations_past_year['close_date']:
62+
days.append(date_difference(ii, str(query_date)))
63+
donations_past_year['days_since'] = days
5964

60-
################################## frequency ###############################
65+
grouped_past_year = donations_past_year.groupby('_id').agg({'days_since': ['min']}).reset_index()
6166

62-
df['close_date'] = pd.DatetimeIndex(df['close_date'])
67+
grouped_past_year[('days_since', 'min')]= grouped_past_year[('days_since', 'min')].dt.days
6368

64-
df_grouped = df.groupby(['matching_id', pd.Grouper(key = 'close_date', freq = 'Q')]).count().max(level=0)
69+
recency_bins.append(grouped_past_year[('days_since', 'min')].max())
6570

66-
df_grouped = df_grouped.reset_index()
71+
grouped_past_year['recency_score'] = pd.cut(grouped_past_year[('days_since','min')], bins= recency_bins, labels=recency_labels, include_lowest = True)
72+
grouped_past_year.rename(columns={('recency_score', ''): 'recency_score'})
6773

68-
frequency_bins.append(np.inf)
74+
################################## frequency ###############################
6975

70-
df_frequency = df_grouped[['matching_id' , 'opp_id']]
76+
df['close_date'] = pd.DatetimeIndex(df['close_date'])
7177

72-
df_frequency['frequency_score'] = pd.cut(df_frequency['opp_id'],
73-
bins = frequency_bins, labels=frequency_labels, include_lowest=True)
78+
df_grouped = df.groupby(['matching_id', pd.Grouper(key = 'close_date', freq = 'Q')]).count().max(level=0)
7479

80+
df_grouped = df_grouped.reset_index()
7581

82+
frequency_bins.append(np.inf)
7683

77-
################################## amount ##################################
84+
df_frequency = df_grouped[['matching_id' , 'opp_id']]
7885

79-
monetary_bins.append(np.inf)
86+
df_frequency['frequency_score'] = pd.cut(df_frequency['opp_id'],
87+
bins = frequency_bins, labels=frequency_labels, include_lowest=True)
8088

81-
df_amount = df.groupby(df['matching_id'], as_index=False).amount.max()
89+
################################## amount ##################################
8290

83-
df_amount['amount_score'] = pd.cut(df_amount['amount'], bins= monetary_bins, include_lowest=True, labels = monetary_labels)
91+
monetary_bins.append(np.inf)
8492

93+
df_amount = df.groupby(df['matching_id'], as_index=False).amount.max()
8594

95+
df_amount['amount_score'] = pd.cut(df_amount['amount'], bins= monetary_bins, include_lowest=True, labels = monetary_labels)
8696

8797

88-
# Concatenate rfm scores
89-
# merge monetary df and frequency df
90-
df_semi = df_amount.merge(df_frequency, left_on='matching_id', right_on= 'matching_id')
91-
print(grouped_past_year.head())
92-
print(df_semi.head())
93-
df_final = df_semi.merge(grouped_past_year, left_on='matching_id', right_on= '_id') # merge monetary/frequency dfs to recency df
98+
# Concatenate rfm scores
99+
# merge monetary df and frequency df
100+
df_semi = df_amount.merge(df_frequency, left_on='matching_id', right_on= 'matching_id')
101+
print(grouped_past_year.head())
102+
print(df_semi.head())
103+
df_final = df_semi.merge(grouped_past_year, left_on='matching_id', right_on= '_id') # merge monetary/frequency dfs to recency df
94104

95-
### get avg fm score and merge with df_final
96-
# df_final['f_m_AVG_score'] = df_final[['frequency_score', 'amount_score']].mean(axis=1)
105+
### get avg fm score and merge with df_final
106+
# df_final['f_m_AVG_score'] = df_final[['frequency_score', 'amount_score']].mean(axis=1)
97107

98108

99-
# import function: rfm_concat, which will catenate integers as a string and then convert back to a single integer
100-
from rfm_functions import rfm_concat
101-
rfm_score = rfm_concat(df_final[('recency_score'), ''], df_final['frequency_score'], df_final['amount_score'])
109+
# import function: rfm_concat, which will catenate integers as a string and then convert back to a single integer
110+
from rfm_functions import rfm_concat
111+
rfm_score = rfm_concat(df_final[('recency_score'), ''], df_final['frequency_score'], df_final['amount_score'])
102112

103-
# Append rfm score to final df
104-
df_final['rfm_score'] = rfm_score
113+
# Append rfm score to final df
114+
df_final['rfm_score'] = rfm_score
105115

106-
from rfm_functions import merge_series
107-
score_tuples = merge_series((df_final['matching_id']), df_final['rfm_score'])
116+
from rfm_functions import merge_series
117+
score_tuples = merge_series((df_final['matching_id']), df_final['rfm_score'])
108118

109-
insert_rfm_scores(score_tuples)
119+
insert_rfm_scores(score_tuples)
110120

111-
return score_tuples
121+
return len(score_tuples) # Not sure there's anything to do with them at this point

src/server/rfm_funcs/rfm_functions.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22

33
### A number of RFM functions which are called by the main create_scores function.
44

5-
def date_difference(my_date, query_date):
6-
'''
7-
This function takes in a single date from the donations dataframe (per row) and compares the difference between that date and the date in which matching occurs.
8-
I.e. pipeline matching should provide a query_date so that this can work.
9-
'''
10-
from datetime import datetime, date
11-
12-
d1 = datetime.strptime(str(my_date), "%Y-%m-%d")
13-
d2 = datetime.strptime(str(query_date), "%Y-%m-%d")
14-
diff = (d2 - d1)
15-
return diff
5+
# def date_difference(my_date, query_date):
6+
# '''
7+
# This function takes in a single date from the donations dataframe (per row) and compares the difference between that date and the date in which matching occurs.
8+
# I.e. pipeline matching should provide a query_date so that this can work.
9+
# '''
10+
# from datetime import datetime, date
11+
12+
# d1 = datetime.strptime(str(my_date), "%Y-%m-%d")
13+
# d2 = datetime.strptime(str(query_date), "%Y-%m-%d")
14+
# diff = (d2 - d1)
15+
# return diff
1616

1717

1818

0 commit comments

Comments
 (0)