diff --git a/data_restate.ipynb b/data_restate.ipynb new file mode 100644 index 0000000..50d8938 --- /dev/null +++ b/data_restate.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sessions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions = pd.read_excel('KOAA - Sessions.xlsx', sheet_name='data')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions.drop(columns=['Note: Metrics are usually taken mid-month','Main KOAA site','Site totals'],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions.rename(columns={'Unnamed: 1': 'Month'}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions=sessions.fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions[sessions.columns[sessions.columns!='Month']] = sessions[sessions.columns[sessions.columns!='Month']].astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "sessions['Month'] = pd.to_datetime(sessions['Month']).dt.strftime('%b %Y')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions.set_index('Month',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r_session=sessions[:133]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r_session = round(r_session*0.66)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r_session=r_session.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r_session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s = pd.read_excel('ga4_session_23.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s.set_index('Sessions',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s = ga4s.fillna(0).astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_names = {}\n", + "for n in ga4s.columns:\n", + " newName = n+\" 2023\"\n", + " col_names[n] = newName\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s.rename(columns=col_names,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s.drop(columns='Jan 2023',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s=ga4s.T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s=ga4s.rename(columns={\n", + " 'Philly':'Philadelphia',\n", + " 'Milwauee':'Milwaukee',\n", + " 'FairField County':'Fairfield County',\n", + " 'Fort Worth':'Ft Worth',\n", + " 'Salt lake City':'Salt Lake',\n", + " 'Longisland':'Nassau / Long Island',\n", + " 'Ann Arbor - Detroit':'Ann Arbor/Detroit',\n", + " 'Saint Louis':'St. Louis' ,\n", + " 'Jacksonvillie':'Jacksonville',\n", + " 'Houstan':'Houston'\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions.drop(columns='Suffolk',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4s = ga4s[sessions.columns]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "sessions =pd.concat([sessions,ga4s])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sessions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PageViews" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf = pd.read_excel('pageview_per_month.xlsx',sheet_name='pageview_per_month')\n", + "ga4p = pd.read_excel('pageview_per_month.xlsx',sheet_name='ga4_2023')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "pagedf['Month'] = pd.to_datetime(pagedf['Month']).dt.strftime('%b %Y')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p.drop(columns='Jan',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf.set_index('Month',inplace=True)\n", + "ga4p.set_index('Views',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_names = {}\n", + "for n in ga4p.columns:\n", + " newName = n+\" 2023\"\n", + " col_names[n] = newName" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p.rename(columns=col_names,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf=pagedf.fillna(0).astype(int)\n", + "ga4p=ga4p.fillna(0).astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p = ga4p.T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p=ga4p.rename(columns={\n", + " 'Philly':'Philadelphia',\n", + " 'Milwauee':'Milwaukee',\n", + " 'FairField County':'Fairfield County',\n", + " 'FortWorth':'Ft Worth',\n", + " 'Salt lake City':'Salt Lake',\n", + " 'Longisland':'Nassau / Long Island',\n", + " 'Ann Arbor - Detroit':'Ann Arbor/Detroit',\n", + " 'Saint Louis':'St. Louis' ,\n", + " 'Jacksonvillie':'Jacksonville',\n", + " 'Houstan':'Houston'\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf.drop(columns=['Suffolk', 'Main KOAA site', 'Site totals'],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga4p = ga4p[pagedf.columns]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf=pagedf[:133]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf=pd.concat([pagedf,ga4p])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pagedf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with pd.ExcelWriter('final_report.xlsx') as w:\n", + " sessions.to_excel(w,sheet_name='sessions_report', index = True)\n", + " pagedf.to_excel(w, sheet_name='pageviews_report', index = True)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}