diff --git a/CLI/R5.FFDB.CLI/prerelease_todos.txt b/CLI/R5.FFDB.CLI/prerelease_todos.txt index 1c57e7e..a4126cb 100644 --- a/CLI/R5.FFDB.CLI/prerelease_todos.txt +++ b/CLI/R5.FFDB.CLI/prerelease_todos.txt @@ -1,4 +1,7 @@ - +Update the HasBeenInitialized for the db providers. +it should check that all of the following are met: +- ALL tables/collections have been created +- ALL teams have been added - documentation - get image/diagram of db schemas (both mongo and postgres) for alpha release discussion diff --git a/DevTester/DevProgram.cs b/DevTester/DevProgram.cs index 848d42f..1dbfdbe 100644 --- a/DevTester/DevProgram.cs +++ b/DevTester/DevProgram.cs @@ -109,6 +109,23 @@ private static FfdbEngine GetConfiguredMongoEngine() return GetConfiguredEngine(setup); } + private static FfdbEngine FOR_DOCS() + { +var setup = new EngineSetup(); + +setup + .SetRootDataDirectoryPath(@"C:\path\to\data\dir\") + .UseMongo(new MongoConfig + { + ConnectionString = "connection_string", + DatabaseName = "db_name" + }); + +FfdbEngine engine = setup.Create(); + + return GetConfiguredEngine(setup); + } + private static FfdbEngine GetConfiguredEngine(EngineSetup setup) { setup diff --git a/Engine/R5.FFDB.Engine/EngineSetup.cs b/Engine/R5.FFDB.Engine/EngineSetup.cs index 55132c1..c0c12bd 100644 --- a/Engine/R5.FFDB.Engine/EngineSetup.cs +++ b/Engine/R5.FFDB.Engine/EngineSetup.cs @@ -69,6 +69,12 @@ public EngineSetup UseMongo(MongoConfig config) return this; } + public EngineSetup UseCustomDbProvider(Func dbProviderFactory) + { + _dbProviderFactory = dbProviderFactory; + return this; + } + public EngineSetup SkipRosterFetch() { _programOptions.SkipRosterFetch = true; diff --git a/FFDB.sln b/FFDB.sln index 5a92d09..0bebd46 100644 --- a/FFDB.sln +++ b/FFDB.sln @@ -41,6 +41,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CLI", "CLI", "{BEE84D90-771 EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "R5.Internals.PostgresMapper.Tests", "R5.Internals\Tests\R5.Internals.PostgresMapper.Tests\R5.Internals.PostgresMapper.Tests.csproj", "{BE2C19EF-2FA1-46EE-8185-4CD1930ECFE8}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{1028CD87-3D5A-4B3F-8A6E-9B5335E5638E}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + .gitignore = .gitignore + LICENSE = LICENSE + README.md = README.md + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU diff --git a/README.md b/README.md index c339da2..6c821bd 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ If you'd like to use the engine to create a different database, or even postgres Below's a list of data categories and stats supported: -- Players - names, physical profile like height and weight, and other misc college +- Players - names, physical profile like height and weight, and other misc things like college - Teams - Roster information - mappings between player-to-team - Player Stats - split by season-and-week. Further categorized by type such as passing, rushing, etc. @@ -70,8 +70,8 @@ If you're curious, you can read more about the Engine design further down to see - [The Engine](#the-engine) - [design overview](#design-overview) - [engine setup](#engine-setup) - - [processors API](#processors-api) -- [Extending with DbProvider](#extending-with-dbprovider) + - [engine and processors API](#engine-and-processors-api) +- [Extending with DbProvider](#extending-with-database-provider) - [logging](#logging) - [implementing the contract](#implementing-the-contract) @@ -213,13 +213,282 @@ The engine is what does all the real work behind the scenes - the CLI is just an The diagram above depicts how the various data is fetched. Here's a quick rundown: -1. A HTTP request is made to the data source. It's optionally saved to disk. -2. The response is mapped to a versioned model, and optionally saved to disk. By _versioned_ means that the model is specific to the version of the source. For example, player stats are currently fetched from NFL's fantasy API v2. When they deprecate v2 and move onto v3, we may also need to update our models, resulting in a new versioned model. +1. An HTTP request is made to the data source. The response is optionally saved to disk. +2. The original source data is mapped to a versioned model, and optionally saved to disk. By _versioned_, I mean that the model is specific to the version of the source. For example, player stats are currently fetched from NFL's fantasy API v2. When they deprecate v2 and move onto v3, we may also need to update our models, resulting in a new _versioned_ model. 3. The versioned model is mapped to the core model used by the Engine. 4. The core models are passed to the configured `DbProvider`, which ultimately maps it to the database specific models (eg SQL or Document) and persists it to the database store. +The middle section labeled _FFDB Engine_ literally represents the stages that are handle by the Engine. Things were designed such that this nice boundary is created, and it's agnostic to the original data sources. It doesn't care where the data is coming from, or what the original format is, as long as it provides the correct mappers that can eventually turn things into the required core engine models. +_A data source dies away, and the Engine breaks. What now?_ +Given the explanation above, the Engine itself wouldn't need any modifications. _Someone_ would need to find a new source for this data, and create a new implementation of the `ICoreDataSource` interface, and "that's it". + +The reality is that this is a non-trivial task. Because the Engine currently relies on NFL's official player IDs, we would need a complete list of mappings for IDs between the new source and NFL's. This additional source could also be added ahead of time, with slight modifications to the Engine, to provide redundancy but that's a lot of extra work I'm unwilling to commit to at this time. + +##### Engine Setup + +To programmatically create the Engine, we use the `EngineSetup` class. Here's an example of the simplest valid setup you could use: + +``` +var setup = new EngineSetup(); + +setup + .SetRootDataDirectoryPath(@"C:\path\to\data\dir\") + .UseMongo(new MongoConfig + { + ConnectionString = "connection_string", + DatabaseName = "db_name" + }); + +FfdbEngine engine = setup.Create(); +``` + +This would configure a `FfdbEngine` instance with a data path, using mongo as its data store. It would exclude some other configurable things such as logging. + +Below's the complete list of methods available on the `EngineSetup` class. + +###### SetRootDataDirectoryPath(string path) +Sets the path to the data directory where the files are optionally persisted. + +###### UsePostgreSql(PostgresConfig config) +Sets the Engine to interface with a PostgreSql data store. The `PostgresConfig` class definition is: + +``` +public class PostgresConfig +{ + public string Host { get; set; } + public string DatabaseName { get; set; } + public string Username { get; set; } + public string Password { get; set; } +} +``` + +###### UseMongo(MongoConfig config) +Sets the Engine to interface with a Mongo data store. The `MongoConfig` class definition is: + +``` +public class MongoConfig +{ + public string ConnectionString { get; set; } + public string DatabaseName { get; set; } +} +``` + +###### UseCustomDbProvider(Func dbProviderFactory) +Sets the Engine to use a custom database provider that you implement. This is done by providing a factory function, which receives an `IAppLogger` instance that you can use for logging. + +###### SkipRosterFetch() +Sets the engine to skip fetching roster information. The reason for doing so was described earlier in the docs. + +###### SaveToDisk() +Will save the _versioned_ models to disk. + +###### SaveOriginalSourceFiles() +Will save the original source data (HTTP response) to disk. Again, this is probably something you don't need (it takes up almost 300MB of space) + +###### EnableFetchingFromDataRepo() +The data repository concept was described earlier. Use this method to enable it. On either the repo being disabled, or failures on fetch, the Engine will simply revert to fetching from the original source. + +###### WebRequest.SetThrottle(int milliseconds) +Sets a static delay amount to be used between HTTP requests. Lets try to play nicely with the original sources. + +###### WebRequest.SetRandomizedThrottle(int min, int max) +Set a min and max delay amount, also in milliseconds, to be used between HTTP requests. + +###### WebRequest.AddHeader(string key, string value) +Add a custom HTTP header to be included for every request. + +###### WebRequest.AddDefaultBrowserHeaders() +Adds a `User-Agent` header that will attempt to spoof the HTTP request as being from a browser. + +Currently using `"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"` + +###### Logging.SetLogDirectory(string directoryPath) +Set the directory path where log files will be stored. This is also the only method required to have any logging at all. + +###### Logging.SetMaxBytes(long maxBytes) +Set the max bytes before a new log file is created. + +###### Logging.SetRollingInterval(RollingInterval interval) +Set an interval (such as days, hours, minutes) between creations of new log files. + +###### Logging.RollOnFileSizeLimit() +Will auto create new log files if the max bytes amount is reached. + +###### Logging.UseDebugLogLevel() +Set to enable much more detailed logging. You probably don't want to use this unless you're providing logs for a bug/issue. + +###### Logging.SetMessageTemplate(string template) +Set the log message template/format. This is somewhat specific to `Serilog`, which is the logging lib the Engine uses. + +###### Logging.UseCustomLogger(Microsoft.Extensions.Logging.ILogger logger) +Provide your own `ILogger` instance to be used. + +##### Engine and Processors API + +The methods available on the Engine are located either on the `FfdbEngine` itself, or as processor class properties on the engine. + +_FfdbEngine_ + +###### Task RunInitialSetupAsync(bool skipAddingStats) +Runs the intial setup including things such as creating database tables, adding stats, etc. + +###### Task\ HasBeenInitializedAsync() +Determines whether the database has been initialized (has the _initial setup_ been run successfully?) + +###### Task\ GetLatestWeekAsync() +Gets the latest available week, as officially determined by the NFL. + +###### Task> GetAllUpdatedWeeksAsync() +Gets the complete list of weeks already updated and existing in the database. + +###### Task\ GetDataRepoStateAsync() +Returns an object representing the current state of the data repository. The `DataRepoState` class is defined as: + +``` +public class DataRepoState +{ + public DateTime Timestamp { get; set; } + public bool Enabled { get; set; } +} +``` + +The `Timestamp` represents when the repo was last updated. If `Enabled` is false, the Engine will not make requests to the data repo. + +_StatsProcessor_ + +Access the following methods using `engine.Stats.MethodName()` + +###### Task AddMissingAsync() +Adds all missing stats (those currently not existing in your database) + +###### Task AddForWeekAsync(WeekInfo week) +###### Task AddForWeekAsync(int season, int week) +This overloaded method adds all stats for one specified week. + +_TeamProcessor_ + +Access the following methods using `engine.Team.MethodName()` + +###### Task UpdateRosterMappingsAsync() +Updates the player-to-team mapping information in the database. + +_PlayerProcessor_ + +Access the following methods using `engine.Player.MethodName()` + +###### Task UpdateCurrentlyRosteredAsync() +Updates the dynamic player information for those currently rostered on a team. + +--- + +#### Extending with Database Provider + +As mentioned before, you're not limited to using the natively-supported `PostgreSql` or `Mongo` options as your data store. The Engine simply takes in an instance of `IDatabaseProvider` to interface with whatever implementation is out there. + +To do this, you'll need a reference to the _R5.FFDB.Core_ library, which can be fetched from nuget: + +- (nuget link here) + +Here, we'll walk through that interface and its contract, so you can understand not only the literal API the Engine expects to work with, but also the underlying behavior and assumptions that are relevant. + +Here's the `IDatabaseProvider` interface definition: + +``` +public interface IDatabaseProvider +{ + IDatabaseContext GetContext(); +} +``` + +Ah! So it's really not this interface that defines all the necessary functionality for the Engine to work. This has one single method, that returns an `IDatabaseContext`. Your `IDatabaseProvider` implementation will most likely take in configuration information to connect to the db, setup logging, etc. You can always reference how the built-in db providers were implemented as needed. + +Lets explore this `IDatabaseContext` interface: + +``` +public interface IDatabaseContext +{ + Task InitializeAsync(); + Task HasBeenInitializedAsync(); + + IPlayerDbContext Player { get; } + IPlayerStatsDbContext PlayerStats { get; } + ITeamDbContext Team { get; } + ITeamStatsDbContext TeamStats { get; } + IUpdateLogDbContext UpdateLog { get; } + IWeekMatchupsDbContext WeekMatchups { get; } +} +``` + +###### Task InitializeAsync() +This should setup the database tables/collections, schemas, and whatever else you would classify as required initial work. Additionally, this should also add entries for all the NFL teams. + +_Important design note:_ + +The database context methods, in general, should be implemented to simply try to add/create whatever is passed in as arguments. For example, if a method accepts a list of stats, it _should_ attempt to add all of them. It doesn't need to concern itself with whether or not some of the stats have _already_ been added or not. That logic is handled by the Engine, and makes it easier for you to implement your own database providers. + +However, the initialize method, is the one exception. Because the intial setup tasks are entirely specific to a given database, it's up to you to make sure that it can be re-run many times without exceptions or undesired results. For example, if 5 of 10 tables had already been created before the program failed, re-running it should only attempt to create the remaining 5. + +###### Task\ HasBeenInitializedAsync() +This should return a `bool` indicating whether the database has been initialized. The Engine will use this to block certain commands until the setup has been complete. + +The _IPlayerDbContext_ interfaces defines these methods: + +###### Task> GetAllAsync() +Returns a list of all players that currently exist in your database. + +###### Task AddAsync(PlayerAdd player); +Take the argument that contains player information and adds it to your database. + +###### Task UpdateAsync(Guid id, PlayerUpdate update); +Update the player given the information contained in the `PlayerUpdate` instance. + +The _IPlayerStatsDbContext_ interfaces defines these methods: + +###### Task> GetPlayerNflIdsAsync(WeekInfo week); +Return a list of NFL IDs for _all_ players that have played for a given week. If you store player stats in a single table/collection, you can simply return the player's NFL IDs if you store it in those entries. If the stats are spread out within multiple tables, you may need joins to extract this information. + +###### Task AddAsync(List\ stats); +Take the list of player stats information and add it to your database. + +The _ITeamDbContext_ interfaces defines these methods: + +###### Task> GetExistingTeamIdsAsync(); +Get the list of Team IDs for those currently existing in your database. If all have been added, this should always return a list of 32 ids. + +###### Task AddAsync(List\ teams); +Add the list of teams to your database. + +###### Task UpdateRosterMappingsAsync(List\ rosters); +Take the list of roster information and update the player-to-team mappings in your database. + +The _ITeamStatsDbContext_ interfaces defines these methods: + +###### Task> GetAsync(WeekInfo week); +Return the list of team stats for a given week. + +###### Task AddAsync(List\ stats); +Take the list of team stats and add it to your database. + +The _IUpdateLogDbContext_ interfaces defines these methods: + +###### Task> GetAsync(); +Get a list of all the weeks that have already been updated for your database. + +###### Task AddAsync(WeekInfo week); +Update your database to indicate that the given week has been complete updated. How you store this information is irrelevant to the Engine as it's an implementation detail. + +###### Task\ HasUpdatedWeekAsync(WeekInfo week); +Returns a `bool` indicating whether a given week has been updated in your database. + +The _IWeekMatchupsDbContext_ interfaces defines these methods: + +###### Task> GetAsync(WeekInfo week); +Returns the list of weekly matchups (between teams) for a given week. + +###### Task AddAsync(List\ matchups); +Take the list of matchups and add it to your database. ---