Skip to content

Commit

Permalink
Added NPB Schedule Data
Browse files Browse the repository at this point in the history
  • Loading branch information
armstjc committed Nov 20, 2023
1 parent 0e46997 commit f94c698
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 65 deletions.
19 changes: 15 additions & 4 deletions .github/workflows/get_npb_schedules.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: Update NPB Schedules
name: Update NPB Data
on:
schedule:
- cron: "0 12 * * *"
# push:
# branches: [main]
push:
branches: [main]
# pull_request:
# branches: [main]

Expand Down Expand Up @@ -34,6 +34,7 @@ jobs:
- name: run Python Script
run: |
python get_npb_schedule.py
python get_npb_standings.py
- uses: xresloader/upload-to-github-release@main
env:
Expand All @@ -44,4 +45,14 @@ jobs:
#update_latest_release: true
overwrite: true
verbose: true
default_release_name: "Schedule"
default_release_name: "Schedule"
- uses: xresloader/upload-to-github-release@main
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
file: "standings/*"
branches: "main"
#update_latest_release: true
overwrite: true
verbose: true
default_release_name: "Game_Standings"
134 changes: 77 additions & 57 deletions get_npb_schedule.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from datetime import datetime, timedelta
from datetime import datetime
import time
import pandas as pd
import requests
from tqdm import tqdm


from utls import get_json_from_url
Expand All @@ -13,65 +11,86 @@ def get_npb_schedule(season: int, save_results=False):
"""
sched_df = pd.DataFrame()
row_df = pd.DataFrame()
columns = [
'season',
'ID',
'SeqNo',
'game_id',
'game_kind_id',
'date_jpn',
'time_jpn',
'week_day_jpn',
'stadium_id',
'stadium_name_jpn',
'round',
'DhF',
'game_state',
'game_result',

'home_score',
'home_team_id',
'home_team_short_name',
'home_team_name_en',
'home_team_name_en_short',
'home_team_initial',
'home_section',
'home_text_area',

'away_score',
'away_team_id',
'away_team_short_name',
'away_team_name_en',
'away_team_name_en_short',
'away_team_initial',
'away_section',
'away_text_area',

'stadium_name_short',
'last_updated',
'creation_date'
]

url = f"https://spaia.jp/baseball/npb/api/schedules?Year={season}"
json_data = get_json_from_url(url=url)

for game in tqdm(json_data):
game_id = game['GameID']
row_df = pd.DataFrame(
{"game_id":game_id},
index=[0]
)
row_df['game_kind_id'] = game['Year']

date_str = f"{game['DateJPN']} {game['TimeJPN']}"
game_date = datetime.strptime(date_str, "%Y%m%d %H%M")
del date_str
row_df['season'] = game['Year']
row_df['game_datetime_jpn'] = game_date
row_df['game_datetime_iso'] = game_date - timedelta(hours=9)
row_df['stadium_id'] = game['StadiumID']
row_df['stadium_name_jp'] = game['StadiumName']
row_df['round'] = game['Round']
row_df['dhf'] = game['DhF']
row_df['game_state'] = game['GameState']
row_df['game_result'] = game['GameResult']
row_df['home_score'] = game['HScore']
row_df['away_score'] = game['VScore']

row_df['home_team_id'] = game['HTeamID']
row_df['home_team_short_name'] = game['HTeamNameS']
row_df['home_team_en_name'] = game['HomeTeamNameE']
row_df['home_team_initial'] = game['VisitorTeamInitial']
row_df['home_team_en_initial'] = game['HomeTeamNameES']

row_df['away_team_id'] = game['VTeamID']
row_df['away_team_short_name'] = game['VTeamNameS']
row_df['away_team_en_name'] = game['VisitorTeamNameE']
row_df['away_team_initial'] = game['VisitorTeamInitial']
row_df['away_team_en_initial'] = game['VisitorTeamNameES']

row_df['stadium_short_name'] = game['StadiumNameS'] # ?
sched_df = pd.DataFrame(json_data)

row_df['home_wins'] = game['Win']
row_df['home_losses'] = game['Lose']
row_df['home_draws'] = game['Draw']
row_df['home_batting_avg'] = game['Avg']
row_df['home_pitching_era'] = game['Era']
row_df['home_section'] = game['Home_Section']
row_df['home_jp_description'] = game['Home_TextArea']
sched_df.rename(
columns={
"GameID": "game_id",
"GameKindID": "game_kind_id",
"DateJPN": "date_jpn",
"TimeJPN": "time_jpn",
"WeekDayJPN": "week_day_jpn",
"StadiumID": "stadium_id",
"StadiumName": "stadium_name_jpn",
"Round": "round",
"GameState": "game_state",
"GameResult": "game_result",
"HScore": "home_score",
"VScore": "away_score",
"HTeamID": "home_team_id",
"HTeamNameS": "home_team_short_name",
"VTeamID": "away_team_id",
"VTeamNameS": "away_team_short_name",
"StadiumNameS": "stadium_name_short",
"HomeTeamNameE": "home_team_name_en",
"HomeTeamNameES": "home_team_name_en_short",
"HomeTeamInitial": "home_team_initial",
"VisitorTeamNameE": "away_team_name_en",
"VisitorTeamNameES": "away_team_name_en_short",
"VisitorTeamInitial": "away_team_initial",
"Home_Section": "home_section",
"Home_TextArea": "home_text_area",
"Visitor_Section": "away_section",
"Visitor_TextArea": "away_text_area",
"UpdatedAt": "last_updated",
"Year": "season",
"CreatedAt": "creation_date"
}, inplace=True
)

row_df['away_wins'] = game['Visitor_Win']
row_df['away_losses'] = game['Visitor_Lose']
row_df['away_draws'] = game['Visitor_Draw']
row_df['away_batting_avg'] = game['Visitor_Avg']
row_df['away_pitching_era'] = game['Visitor_Era']
row_df['away_section'] = game['Visitor_Section']
row_df['away_jp_description'] = game['Visitor_TextArea']

row_df['last_updated'] = game['UpdatedAt']
sched_df = pd.concat([sched_df,row_df],ignore_index=True)
sched_df = sched_df[columns]

if save_results == True:
sched_df.to_csv(f"schedules/{season}_npb_schedule.csv", index=False)
Expand All @@ -88,4 +107,5 @@ def get_npb_schedule(season: int, save_results=False):
season=i,
save_results=True
)
print(df)
time.sleep(1)
46 changes: 46 additions & 0 deletions get_npb_standings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from datetime import datetime
import time
import pandas as pd
from tqdm import tqdm


from utls import get_json_from_url


def get_npb_standings_by_game(season: int, save_results=False):
"""
"""
standings_df = pd.DataFrame()
cl_df = pd.DataFrame()
pl_df = pd.DataFrame()

url_1 = f"https://spaia.jp/baseball/npb/api/official_stats_history?GameAssortment=1&Year={season}"
url_2 = f"https://spaia.jp/baseball/npb/api/official_stats_history?GameAssortment=2&Year={season}"

# Central League (1)
json_data = get_json_from_url(url=url_1)
cl_df = pd.DataFrame(json_data)

# Pacific League (2)
json_data = get_json_from_url(url=url_2)
pl_df = pd.DataFrame(json_data)

standings_df = pd.concat([cl_df, pl_df], ignore_index=True)
if save_results == True:
standings_df.to_csv(f"standings/{season}_game_standings.csv",index=False)
standings_df.to_parquet(
f"standings/{season}_game_standings.parquet",index=False)

return standings_df


if __name__ == "__main__":
now = datetime.now()
f_year = now.year - 2
c_year = now.year + 1
for i in tqdm(range(f_year,c_year)):
df =get_npb_standings_by_game(
season=i,
save_results=True
)
print(df)
8 changes: 4 additions & 4 deletions spaia_api_endpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Base: https://spaia.jp/baseball/npb/api/related_players?player_id=1400101
## Same Draft Year
Base: https://spaia.jp/baseball/npb/api/same_draft_year_players?player_id=1400101

# Schedule
# Schedule (done)

## Upcomming Games
Base: https://spaia.jp/baseball/npb/api/game_schedule
Expand Down Expand Up @@ -64,7 +64,7 @@ Base: https://spaia.jp/baseball/npb/api/official_stats_history?GameAssortment=1&
# Teams

## Full Team Rosters
Base: https://spaia.jp/baseball/npb/api/directory?TeamID=2
Base: https://spaia.jp/baseball/npb/api/directory?TeamID=2&year=2023

## Batters
Base: https://spaia.jp/baseball/npb/api/batter_list?team=2&year=2023
Expand All @@ -73,10 +73,10 @@ Base: https://spaia.jp/baseball/npb/api/batter_list?team=2&year=2023
Base: https://spaia.jp/baseball/npb/api/pitcher_list?team=2&year=2023

## Coaching Staff
Base: https://spaia.jp/baseball/npb/api/staff_list?team=2
Base: https://spaia.jp/baseball/npb/api/staff_list?team=2&year=2023

## Full System:
Base: https://spaia.jp/baseball/npb/api/players_by_team?team_id=2
Base: https://spaia.jp/baseball/npb/api/players_by_team?team_id=2&year=2023

# NPB Draft

Expand Down
Empty file added standings/ignore.me
Empty file.

0 comments on commit f94c698

Please sign in to comment.