diff --git a/__pycache__/games.cpython-37.pyc b/__pycache__/games.cpython-37.pyc new file mode 100644 index 0000000..60d05f6 Binary files /dev/null and b/__pycache__/games.cpython-37.pyc differ diff --git a/conf/d5d5c4.PNG b/conf/d5d5c4.PNG index 7060cd1..33d8608 100644 Binary files a/conf/d5d5c4.PNG and b/conf/d5d5c4.PNG differ diff --git a/games.py b/games.py new file mode 100644 index 0000000..e74842e --- /dev/null +++ b/games.py @@ -0,0 +1,58 @@ +import sqlite3 +import pandas as pd +import os + + +class Games: + """An object which contains a record chess games + + Reads chess game metadata from a database allowing a custom + selection that can be filtered by opening moves and elo. Also calculates + the relative daily popularity of every 3 move opening. + + Attributes: + opening: the 3 opening moves played in the game + opening_name: the plain text name for the opening (if available) + elo: the elo band to filter the results on + conn: connection to sqlite3 database + cursor: cursor to execute transactions on the database + df: a DataFrame object containing the chess games, openings of interest + """ + cur_dir = os.path.dirname(os.path.realpath(__file__)) + + def __init__(self, db_name, opening=None, elo=None, opening_name=None): + """ Initialize a Games object containing filtered chess game metadata + + Args: + db_name (str): name of the database with chess game metadata. + opening (str, optional): the 3 set move to filter the results by + elo (str, optional): the elo band to filter the results on + opening_name (str, option): the plain text name of the opening line + """ + self.opening = opening + self.opening_name = opening_name + self.elo = elo + self.db_name = os.path.dirname(__file__) + db_name + self.conn = sqlite3.connect(self.db_name) + self.cursor = self.conn.cursor() + self.df = Games.generate_games_df(self, self.opening, self.elo) + Games.calculate_daily_opening_popularity(self) + self.conn.commit() + self.conn.close() + + def generate_games_df(self, opening=None, elo=None): + """Read from database to DataFrame attribute the games and openings of interest""" + query = 'select * from lichess_openings_summary' + if opening is not None and elo is not None: + query = query + ' WHERE opening="' + opening + '" AND WhiteELO="' + elo + '"' + elif opening is not None: + query = query + ' WHERE opening="' + opening + '"' + elif elo is not None: + query = query + ' WHERE WhiteELO="' + elo + '"' + + df = pd.read_sql_query(query, self.conn) + return df + + def calculate_daily_opening_popularity(self): + """calculate the relative popularity of each opening""" + self.df['opening_percentage_played'] = 100 * (self.df['openingCount'] / self.df['dailyGamesSum']) \ No newline at end of file diff --git a/main.py b/main.py index c72e740..e749991 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ import numpy as np import sqlite3 from datetime import datetime +from games import Games from matplotlib.ticker import PercentFormatter plt.style.use('fivethirtyeight') @@ -14,36 +15,39 @@ def main(): Outputs the findings to a line graph. """ cfg = get_configs() - conn = sqlite3.connect('/data/lichess_bak.db') - cursor = conn.cursor() - opening = "['e4', 'e5', 'Nf3']" - opening_name = 'King\'s Knight' - df = pd.read_sql_query('SELECT * from lichess_openings_summary WHERE Opening == (?)', conn, params=(opening,)) - conn.close() - df['opening'] = 100*(df['openingCount'] / df['dailyGamesSum']) + elo_range = '800-1000' + opening_name = 'Queen\'s Gambit' + opening = "['d4', 'd5', 'c4']" - plot_popularity(df, opening_name) + g = Games('/data/lichess.db', elo=elo_range, opening=opening) + plot_popularity(g.df, opening_name, elo=elo_range) -def plot_popularity(df, opening_name): + +def plot_popularity(df, opening_name, elo=None): """ Creates a line plot of the relative popularity of an opening over a defined period of time. Args: df: the DataFrame containing the date axis and column named 'opening' with the relative popularity of the move per each day opening_name: The plain text name of the opening for use in the legend + elo: the elo range of the players using the opening for use in the legend """ x = df['date'].tolist() x = [datetime.strptime(d, '%Y.%m.%d') for d in x] x = [datetime.strftime(d, '%b-%d') for d in x] - fig, ax = plt.subplots(figsize=(15, 7)) + fig, ax = plt.subplots(figsize=(12, 6)) + # fig, ax = plt.subplots() plt.xticks(np.arange(0, 62, 7)) - ax.plot(x, df['opening']) + ax.plot(x, df['opening_percentage_played']) plt.gca().yaxis.set_major_formatter(PercentFormatter(100, decimals=2)) - plt.legend(labels=['% of games beginning with ' + opening_name], loc=2) + if elo is not None: + plt.legend(labels=['% of games beginning with ' + opening_name + ' for ' + elo + ' ELO players'], loc=2) + else: + plt.legend(labels=['% of games beginning with ' + opening_name], loc=2) plt.title('Popularity of ' + opening_name + ' on lichess.org') - plt.annotate("♕ Release of 'The Queen\'s Gambit'", (22.7, 3.915)) + plt.annotate("♕ Release of 'The Queen\'s Gambit'", (22.6, 1.73)) plt.show() exit() diff --git a/pgn_parser.py b/pgn_parser.py index ade8b81..91d1178 100644 --- a/pgn_parser.py +++ b/pgn_parser.py @@ -21,7 +21,7 @@ def __init__(self, games=default_pgn): def generate_liches_db(self, games): """Method to read a lichess .pgn text file of games to a sqlite database. Args: - games: plain text format of recorded chess games. + games: PGN text file of recorded chess games. """ with open(games) as f: with tqdm.tqdm(total=os.path.getsize(games), unit_scale=0.00000001) as pbar: @@ -62,23 +62,24 @@ def get_moves(self, line): def write_to_db(self, line, a, b, c, d, e): + """Once at the final line of metadata for a particular game (the pgn notation), write the row + containing the relevant game information to the database. + """ if line[:1] == '1' or line[:2] == ' 0' or line[:2] == ' 1': self._cursor.execute('INSERT INTO lichess VALUES(?,?,?,?,?)', (a, b, c, d, e)) def add_moves(moves): + """If available, get the first three moves from the PGN chess notation of the game""" opening_moves = [] - # get first move try: opening_moves.append(moves[1]) except IndexError: pass - # get second move try: opening_moves.append(moves[6]) except IndexError: - pass - # get third move + pass try: opening_moves.append(moves[12]) except IndexError: