-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathJSON_version.py
212 lines (185 loc) · 9.16 KB
/
JSON_version.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import openpyxl
import json
def writeOutput(teamstats, filename):
# Make ratings dict for output
ratings = dict()
for t in teamstats:
ratings[t] = teamstats[t]["rating"]
# Sort the teams by their rating
sortedratings = sorted(ratings.items(), key=lambda kv: kv[1], reverse = True)
# Create new sheet for output
wb = openpyxl.load_workbook(filename)
wb.create_sheet('Output')
# For outputting the results into the spreadsheet
out = wb['Output']
for r in range(len(sortedratings) + 1):
# Header row
if r == 0:
out.cell(row = r + 1, column = 1).value = 'Rank'
out.cell(row = r + 1, column = 2).value = 'Team'
out.cell(row = r + 1, column = 3).value = 'Rating'
out.cell(row = r + 1, column = 4).value = 'Win % Rank'
out.cell(row = r + 1, column = 5).value = 'Record'
out.cell(row = r + 1, column = 6).value = 'Win %'
out.cell(row = r + 1, column = 7).value = 'Win Ratio'
out.cell(row = r + 1, column = 8).value = 'SOS Rank'
out.cell(row = r + 1, column = 9).value = 'SOS'
else:
# Nine columns
for col in range(9):
# Col 0 is the rank
if col == 0:
out.cell(row = r + 1, column = col + 1).value = r
# Col 1 is the team name, col 2 is the rating
elif col < 3:
out.cell(row = r + 1, column = col + 1).value = sortedratings[r - 1][col - 1]
# Formula to calculate the rank of winning %
elif col == 3:
out.cell(row = r + 1, column = col + 1).value = "=RANK(F" + str(r + 1) + \
",F2:F" + str(len(sortedratings) + 1) + ")"
# Output record
elif col == 4:
out.cell(row = r + 1, column = col + 1).value = str(teamstats[sortedratings[r - 1][0]]["record"][0]) \
+ "-" + str(teamstats[sortedratings[r - 1][0]]["record"][1])
# Output win % (col 5) and win ratio (col 6)
elif col < 7:
out.cell(row = r + 1, column = col + 1).value = teamstats[sortedratings[r - 1][0]]["info"][col - 5]
# Formula to calculate the SOS rank
elif col == 7:
out.cell(row = r + 1, column = col + 1).value = "=RANK(I" + str(r + 1) + \
",I2:I" + str(len(sortedratings) + 1) + ")"
# Output SOS (col 8)
else:
out.cell(row = r + 1, column = col + 1).value = teamstats[sortedratings[r - 1][0]]["info"][col - 6]
# Save the sheet with the output
wb.save(filename)
def scaleRatings(teamstats):
# Scale the ratings to an average of 100
for i in range(10):
scale_wins = 0
for key in teamstats:
scale_wins += 100 / (100 + teamstats[key]["rating"])
# Divide by half the total number of teams
scale = scale_wins / (len(teamstats) / 2)
# Adjust every team's rating and SOS according to scale
for key in teamstats:
teamstats[key]["rating"] *= scale
teamstats[key]["info"][2] = teamstats[key]["rating"] / teamstats[key]["info"][1]
def calculateRatings(teamstats, games):
# Value to compare every rating to and then do recursion
DELTA = 0.0001
# Main boolean used with while loop for recursion based on flag
done = False
# If all values are within DELTA (i.e. should recursion finish)
flag = True
# Recursion for getting accurate ratings
while not done:
# Initialize the flag to True each time
flag = True
# Clear expected wins each iteration
for t in teamstats:
teamstats[t]["expected"] = 0
# For every game, calculate:
for game in games:
# Weighting factor (1 divided by the sum of the ratings of the 2 teams)
wf = 1 / (teamstats[game[0]]["rating"] + teamstats[game[1]]["rating"])
# Multiply team's rating by weighting factor and add to sum
teamstats[game[0]]["expected"] += (teamstats[game[0]]["rating"] * wf)
# Multiply team's rating by weighting factor and add to sum
teamstats[game[1]]["expected"] += (teamstats[game[1]]["rating"] * wf)
# For every team, calculate:
for key in teamstats:
# New rating for the team equals the team's wins divided by expected wins multiplied by the old rating
teamstats[key]["new_rating"] = (teamstats[key]["record"][0] / teamstats[key]["expected"]) * teamstats[key]["rating"]
# Update the SOS for the team
teamstats[key]["info"][2] = teamstats[key]["new_rating"] / teamstats[key]["info"][1]
# If the difference between old rating and new rating <= DELTA
# If flag is true, that means so far every team's new rating has been within DELTA
# (since flag is initialized to true)
if abs(teamstats[key]["rating"] - teamstats[key]["new_rating"]) <= DELTA \
and abs(teamstats[key]["record"][0] - teamstats[key]["expected"]) <= DELTA and flag:
done = True
# If the difference is greater than DELTA, we must continue the recursion
# If flag is false, one team has already failed the DELTA test and we must continue the recursion
else:
flag = False
done = False
# After going through all teams, update ratings
for t in teamstats:
teamstats[t]["rating"] = teamstats[t]["new_rating"]
def updateInfo(teamstats):
# Update info after every game is read
for key in teamstats:
# If team is undefeated or winless, we'll get a divide by 0 error
# Note: this formula is most effective when there are no undefeated or winless teams
# and a chain of wins (or ties) can be made from every team to any other team
ratio = teamstats[key]["record"][0] / teamstats[key]["record"][1]
# Add the team's win %, win ratio and sos is intialized to 0 (to be summed later)
teaminfo = [teamstats[key]["record"][0] / (teamstats[key]["record"][0] + teamstats[key]["record"][1]), ratio, 0]
# Add the list of team info to the info dict
teamstats[key]["info"] = teaminfo
def readGames(teamstats, teams, data, games, filename):
# Create a Workbook
wb = openpyxl.Workbook()
# Create sheet to output games to so that postseason games can be easily added
out = wb.active
out.title = "Games"
# Row counter
r = 1
# First row
out.cell(row = r, column = 1).value = 'Winning Team'
out.cell(row = r, column = 2).value = 'Winning Score'
out.cell(row = r, column = 3).value = 'Losing Team'
out.cell(row = r, column = 4).value = 'Losing Score'
# Go through all the games and add tuple of (winning team, losing team) to list of games
# In addition output the games to a sheet so that postseason games can be added with ease
for g in data["games"]:
winnerid = g["won"]["tid"]
loserid = g["lost"]["tid"]
games.append((teams[winnerid], teams[loserid]))
# Incrememnt record of each team
teamstats[teams[winnerid]]["record"][0] += 1
teamstats[teams[loserid]]["record"][1] += 1
out.cell(row = r + 1, column = 1).value = teams[winnerid]
out.cell(row = r + 1, column = 2).value = g["won"]["pts"]
out.cell(row = r + 1, column = 3).value = teams[loserid]
out.cell(row = r + 1, column = 4).value = g["lost"]["pts"]
r += 1
# Save the sheet with the output
wb.save(filename)
def extractInfo(teamstats, teams, data):
# Extract information that we need from the data
for t in data["teams"]:
# teams[id] = name
teams[t["tid"]] = t["region"]
# Initialize team dict
teamstats[t["region"]] = dict()
# Initialize record to 0-0
teamstats[t["region"]]["record"] = [0, 0]
# Always start with 100 as every team's rating (using iteration to solve the recursive problem)
teamstats[t["region"]]["rating"] = 100
def main(j, excel):
# Open json file with utf-8 encoding; insert your own json filename
with open(j, encoding = 'utf-8-sig') as json_file:
data = json.load(json_file)
# List of tuples which are the games; each tuple is (winner, loser)
games = []
# Initalize dictionary of teams mapping their id to their name
teams = dict()
# Mega dictionary with all info
# key = team name
# value = dictionary with: rating (int), record (list of [wins, losses]), expected wins (int)
# new rating (int) and info (list of [win %, win ratio, strength of schedule])
teamstats = dict()
extractInfo(teamstats, teams, data)
readGames(teamstats, teams, data, games, excel)
updateInfo(teamstats)
calculateRatings(teamstats, games)
scaleRatings(teamstats)
return teamstats
if __name__ == "__main__":
# Replace with your filenames
j = 'NCBCA 2062 Post-CT.json'
excel = 'Bradley-Terry Spreadsheet JSON NCBCA.xlsx'
teamstats = main(j, excel)
writeOutput(teamstats, excel)