-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRFM_Analysis.py
201 lines (147 loc) · 7.07 KB
/
RFM_Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "plotly_white"
data = pd.read_csv("rfm_data.csv")
print(data.head())
from datetime import datetime
# Convert 'PurchaseDate' to datetime
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])
# Calculate Recency
data['Recency'] = (datetime.now().date() - data['PurchaseDate'].dt.date).dt.days
# Calculate Frequency
frequency_data = data.groupby('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns={'OrderID': 'Frequency'}, inplace=True)
data = data.merge(frequency_data, on='CustomerID', how='left')
# RFM Segment Distribution
segment_counts = data['Value Segment'].value_counts().reset_index()
segment_counts.columns = ['Value Segment', 'Count']
pastel_colors = px.colors.qualitative.Pastel
# Create the bar chart
fig_segment_dist = px.bar(segment_counts, x='Value Segment', y='Count',
color='Value Segment', color_discrete_sequence=pastel_colors,
title='RFM Value Segment Distribution')
# Update the layout
fig_segment_dist.update_layout(xaxis_title='RFM Value Segment',
yaxis_title='Count',
showlegend=False)
# Show the figure
fig_segment_dist.show()
# Calculate Monetary Value
monetary_data = data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns={'TransactionAmount': 'MonetaryValue'}, inplace=True)
data = data.merge(monetary_data, on='CustomerID', how='left')
print(data.head())
# Define scoring criteria for each RFM value
recency_scores = [5, 4, 3, 2, 1] # Higher score for lower recency (more recent)
frequency_scores = [1, 2, 3, 4, 5] # Higher score for higher frequency
monetary_scores = [1, 2, 3, 4, 5] # Higher score for higher monetary value
# Calculate RFM scores
data['RecencyScore'] = pd.cut(data['Recency'], bins=5, labels=recency_scores)
data['FrequencyScore'] = pd.cut(data['Frequency'], bins=5, labels=frequency_scores)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins=5, labels=monetary_scores)
# Convert RFM scores to numeric type
data['RecencyScore'] = data['RecencyScore'].astype(int)
data['FrequencyScore'] = data['FrequencyScore'].astype(int)
data['MonetaryScore'] = data['MonetaryScore'].astype(int)
# Calculate RFM score by combining the individual scores
data['RFM_Score'] = data['RecencyScore'] + data['FrequencyScore'] + data['MonetaryScore']
print(data.head())
# Create RFM segments based on the RFM score
segment_labels = ['Low-Value', 'Mid-Value', 'High-Value']
data['Value Segment'] = pd.qcut(data['RFM_Score'], q=3, labels=segment_labels)
# RFM Segment Distribution
segment_counts = data['Value Segment'].value_counts().reset_index()
segment_counts.columns = ['Value Segment', 'Count']
pastel_colors = px.colors.qualitative.Pastel
# Create the bar chart
fig_segment_dist = px.bar(segment_counts, x='Value Segment', y='Count',
color='Value Segment', color_discrete_sequence=pastel_colors,
title='RFM Value Segment Distribution')
# Update the layout
fig_segment_dist.update_layout(xaxis_title='RFM Value Segment',
yaxis_title='Count',
showlegend=False)
# Show the figure
fig_segment_dist.show()
segment_product_counts = data.groupby(['Value Segment', 'RFM Customer Segments']).size().reset_index(name='Count')
segment_product_counts = segment_product_counts.sort_values('Count', ascending=False)
fig_treemap_segment_product = px.treemap(segment_product_counts,
path=['Value Segment', 'RFM Customer Segments'],
values='Count',
color='Value Segment', color_discrete_sequence=px.colors.qualitative.Pastel,
title='RFM Customer Segments by Value')
fig_treemap_segment_product.show()
# Filter the data to include only the customers in the Champions segment
champions_segment = data[data['RFM Customer Segments'] == 'Champions']
fig = go.Figure()
fig.add_trace(go.Box(y=champions_segment['RecencyScore'], name='Recency'))
fig.add_trace(go.Box(y=champions_segment['FrequencyScore'], name='Frequency'))
fig.add_trace(go.Box(y=champions_segment['MonetaryScore'], name='Monetary'))
fig.update_layout(title='Distribution of RFM Values within Champions Segment',
yaxis_title='RFM Value',
showlegend=True)
fig.show()
correlation_matrix = champions_segment[['RecencyScore', 'FrequencyScore', 'MonetaryScore']].corr()
# Visualize the correlation matrix using a heatmap
fig_heatmap = go.Figure(data=go.Heatmap(
z=correlation_matrix.values,
x=correlation_matrix.columns,
y=correlation_matrix.columns,
colorscale='RdBu',
colorbar=dict(title='Correlation')))
fig_heatmap.update_layout(title='Correlation Matrix of RFM Values within Champions Segment')
fig_heatmap.show()
import plotly.colors
pastel_colors = plotly.colors.qualitative.Pastel
segment_counts = data['RFM Customer Segments'].value_counts()
# Create a bar chart to compare segment counts
fig = go.Figure(data=[go.Bar(x=segment_counts.index, y=segment_counts.values,
marker=dict(color=pastel_colors))])
# Set the color of the Champions segment as a different color
champions_color = 'rgb(158, 202, 225)'
fig.update_traces(marker_color=[champions_color if segment == 'Champions' else pastel_colors[i]
for i, segment in enumerate(segment_counts.index)],
marker_line_color='rgb(8, 48, 107)',
marker_line_width=1.5, opacity=0.6)
# Update the layout
fig.update_layout(title='Comparison of RFM Segments',
xaxis_title='RFM Segments',
yaxis_title='Number of Customers',
showlegend=False)
fig.show()
# Calculate the average Recency, Frequency, and Monetary scores for each segment
segment_scores = data.groupby('RFM Customer Segments')['RecencyScore', 'FrequencyScore', 'MonetaryScore'].mean().reset_index()
# Create a grouped bar chart to compare segment scores
fig = go.Figure()
# Add bars for Recency score
fig.add_trace(go.Bar(
x=segment_scores['RFM Customer Segments'],
y=segment_scores['RecencyScore'],
name='Recency Score',
marker_color='rgb(158,202,225)'
))
# Add bars for Frequency score
fig.add_trace(go.Bar(
x=segment_scores['RFM Customer Segments'],
y=segment_scores['FrequencyScore'],
name='Frequency Score',
marker_color='rgb(94,158,217)'
))
# Add bars for Monetary score
fig.add_trace(go.Bar(
x=segment_scores['RFM Customer Segments'],
y=segment_scores['MonetaryScore'],
name='Monetary Score',
marker_color='rgb(32,102,148)'
))
# Update the layout
fig.update_layout(
title='Comparison of RFM Segments based on Recency, Frequency, and Monetary Scores',
xaxis_title='RFM Segments',
yaxis_title='Score',
barmode='group',
showlegend=True
)
fig.show()