diff --git a/.gitignore b/.gitignore index 55555549..3a3cf6f2 100644 --- a/.gitignore +++ b/.gitignore @@ -148,4 +148,5 @@ Digraph.gv* .pytype flake8.txt -data \ No newline at end of file +data +.models \ No newline at end of file diff --git a/examples/AutoCyber/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb b/examples/AutoCyber/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb index 238a14f7..b94b33ed 100644 --- a/examples/AutoCyber/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb +++ b/examples/AutoCyber/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb @@ -1052,7 +1052,7 @@ } ], "source": [ - "from AutomotiveCybersecurity.util import evaluate_event_graph\n", + "from util import evaluate_event_graph\n", "\n", "evaluate_event_graph(graph, prepared_data['test_files'])" ] diff --git a/examples/HelloWorld/helloworld_graph.py b/examples/HelloWorld/graph.py similarity index 100% rename from examples/HelloWorld/helloworld_graph.py rename to examples/HelloWorld/graph.py diff --git a/examples/HelloWorld/graph_tutorial.ipynb b/examples/HelloWorld/graph_tutorial.ipynb new file mode 100644 index 00000000..379787ad --- /dev/null +++ b/examples/HelloWorld/graph_tutorial.ipynb @@ -0,0 +1,107 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# H1st Graph Guide" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "H1st Graph is an execution flow chart that allows the incorporation of ML as well as human expert models.\n", + "\n", + "This is an example of a very simple graph which prints hello for each even number `x` in the input stream,\n", + "using a conditional `RuleBasedModel` which is a `h1.Model` node and a HelloPrinter which is a `h1.Action` node." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import h1st as h1\n", + "\n", + "class RuleBasedModel(h1.Model):\n", + " \"\"\"\n", + " Simple rule-based model that \"predicts\" if a given value is an even number\n", + " \"\"\"\n", + " def predict(self, input_data: dict) -> dict:\n", + " predictions = [{'prediction': x % 2 == 0, 'value': x} for x in input_data[\"values\"]]\n", + " return {\"predictions\": predictions}\n", + "\n", + "class HelloPrinter(h1.Action):\n", + " \"\"\"Print hello to the inputs value\"\"\"\n", + " def call(self, command, inputs):\n", + " # Note that H1st does the conditional/filtering orchestration already.\n", + " # All we need to do here is just to print.\n", + " for d in inputs[\"predictions\"]:\n", + " print(\"Hello world {}!\".format(d[\"value\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The H1st graph itself is created by `add()`ing nodes incrementally.\n", + "\n", + "Note that the first branch is a `h1.Decision` which redirects the data flow into the later `yes` and `no` nodes based on the RuleBasedModel's predictions`.\n", + "\n", + "In terms of data flow, the RuleBasedModel node produces a dict of which is then used by `h1.Decision` to redirect the data stream by looking at the `result_field=predictions` dict key.\n", + "\n", + "H1st graph by default operates in batch mode, meaning that `h1.Decision` looks at `{\"predictions\": [{\"prediciton\": True, ...}, {\"prediction\": False, }]}` and redirect True/False decision points to the to the right `yes/no` branch as a list." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello world 0!\n", + "Hello world 2!\n", + "Hello world 4!\n" + ] + } + ], + "source": [ + "g = h1.Graph()\n", + "g.start()\n", + "g.add(h1.Decision(RuleBasedModel(), result_field=\"predictions\"))\n", + "g.add(yes=HelloPrinter(), no=h1.NoOp())\n", + "g.end()\n", + "\n", + "results = g.predict({\"values\": range(6)})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/HelloWorld/ml_model_tutorial.ipynb b/examples/HelloWorld/ml_model_tutorial.ipynb new file mode 100644 index 00000000..e0e3d63d --- /dev/null +++ b/examples/HelloWorld/ml_model_tutorial.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# H1st ML Model Guide" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The H1st Model is one of the core concepts of H1st, and it is central to the way H1st works. Model presents a uniform interface to its users, whether the underlying model is boolean logic, fuzzy logic derived from human’s intuition, a Scikit-learn random forest, or a Tensorflow neural network. This makes it possible for you to use and combine Models in Graphs or Ensembles easily.\n", + "\n", + "The easiest way to understand H1st model is actually implementing it. H1st model provides all the interfaces to manage the life cycle of the model. \n", + "\n", + "Below is an example of H1st Model that utilizes an underlying Scikit-learn model for digits classification." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import svm, datasets, metrics\n", + "import h1st as h1\n", + "\n", + "class MLModel(h1.MLModel):\n", + " def __init__(self):\n", + " # This is the native SKLearn model\n", + " # H1st can automatically save/load this \"self.model\" property if it's a SKlearn or tf.keras.Model\n", + " self.model = svm.SVC(gamma=0.001, C=100.)\n", + "\n", + " def get_data(self):\n", + " digits = datasets.load_digits()\n", + " return {\n", + " \"x\": digits.data,\n", + " \"y\": digits.target\n", + " }\n", + "\n", + " def explore_data(self, data):\n", + " pass\n", + "\n", + " def prep(self, data):\n", + " x = data[\"x\"]\n", + " y = data[\"y\"]\n", + " num_tests = 10\n", + " return {\n", + " \"train_x\": x[num_tests:],\n", + " \"train_y\": y[num_tests:],\n", + " \"test_x\": x[0:num_tests],\n", + " \"test_y\": y[0:num_tests]\n", + " }\n", + "\n", + " def train(self, prepared_data):\n", + " self.model.fit(prepared_data[\"train_x\"], prepared_data[\"train_y\"])\n", + "\n", + " def evaluate(self, data):\n", + " pred_y = self.predict({\"x\": data[\"test_x\"]})\n", + " # self.metrics can also be persisted automatically by H1st\n", + " self.metrics = metrics.accuracy_score(data[\"test_y\"], pred_y)\n", + " return self.metrics\n", + "\n", + " def predict(self, input_data: dict) -> dict:\n", + " \"\"\"\n", + " We expect an array of input data rows in the \"x\" field of the input_data dict\n", + " \"\"\"\n", + " return self.model.predict(input_data[\"x\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To create an H1st model, you can start by create a new class and subclass from the `h1.Model`.\n", + "\n", + "Then we populate the methods to `get_data()` to get the data, `prep()` to preprocess it, and of course `train()`, `evaluate()` and `predict()`.\n", + "\n", + "This is how the model is used. Pay close attention to the parameters of the methods and note that the train-val data splitting is done in prep(), and that most `data` parameters should be Python dictionaries where the data scientists can creatively decide how to use the keys & values such as `train_x`, `test_x`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'x': array([[ 0., 0., 5., ..., 0., 0., 0.],\n", + " [ 0., 0., 0., ..., 10., 0., 0.],\n", + " [ 0., 0., 0., ..., 16., 9., 0.],\n", + " ...,\n", + " [ 0., 0., 1., ..., 6., 0., 0.],\n", + " [ 0., 0., 2., ..., 12., 0., 0.],\n", + " [ 0., 0., 10., ..., 12., 1., 0.]]), 'y': array([0, 1, 2, ..., 8, 9, 8])}\n", + "accuracy_score = 0.9000\n" + ] + } + ], + "source": [ + "m = MLModel()\n", + "raw_data = m.get_data()\n", + "print(raw_data)\n", + "\n", + "prepared_data = m.prep(raw_data)\n", + "\n", + "m.train(prepared_data)\n", + "m.evaluate(prepared_data)\n", + "print(\"accuracy_score = %.4f\" % m.metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The beauty of this API is that we can keep same workflow steps for all kinds of models, whether they are boolean/fuzzy logic or ML models!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-09-30 00:34:46,129 INFO h1st.model_repository.model_repository: Saving metrics property...\n", + "2020-09-30 00:34:46,131 INFO h1st.model_repository.model_repository: Saving model property...\n", + "2020-09-30 00:34:48,722 INFO h1st.model_repository.model_repository: Loading version 01EKEYYQKGY5FJ8BFE90KY2A01 ....\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy_score of loaded model = 0.9000\n" + ] + } + ], + "source": [ + "h1.init(MODEL_REPO_PATH=\".models\")\n", + "version_id = m.persist()\n", + "\n", + "m = MLModel().load(version_id)\n", + "print(\"accuracy_score of loaded model = %.4f\" % m.metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "H1st AI supports out-of-the-box easy persisting & loading of `sklearn` and `tf.keras` models to a model repository (other types can be added).\n", + "\n", + "This makes it much easier to include model in larger workflows such as in H1st Graphs or Ensembles. It can enable data science teams to be much more productive.\n", + "\n", + "A model repository is simply a folder on local disk or S3. We call `h1.init()` specifying `MODEL_REPO_PATH`. Alternative it can be automatically picked up in the project's `config.py`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/HelloWorld/rule_based_model_tutorial.ipynb b/examples/HelloWorld/rule_based_model_tutorial.ipynb new file mode 100644 index 00000000..c31f318b --- /dev/null +++ b/examples/HelloWorld/rule_based_model_tutorial.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# H1st Rule-based Model Guide" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rule-based model is an example of usingour human knowledge to solve a problem. You could also use boolean logic, fuzzy logic, or make decision based on statistcs or myriad other ways that humans do to solve problems.\n", + "\n", + "Rule-based model is very useful to solve the cold start problem, where data is not available.\n", + "\n", + "In H1st framework, a human rule model can be implemented by subclassing the `h1.Model` class and implementing only the `predict()` function. Basically, it’s a just a model with no training (though training is not forbidden and is sometimes is useful for human models too).\n", + "\n", + "This particular simple model \"predicts\" if each given value in a stream is an even number or not." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import h1st as h1\n", + "\n", + "class RuleBasedModel(h1.Model):\n", + " \"\"\"\n", + " Simple rule-based model that \"predicts\" if a given value is an even number\n", + " \"\"\"\n", + " def predict(self, input_data: dict) -> dict:\n", + " predictions = [{'prediction': x % 2 == 0, 'value': x} for x in input_data[\"values\"]]\n", + " return {\"predictions\": predictions}" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RuleBasedModel's predictions for [0, 1, 2, 3, 4, 5] are [{'prediction': True, 'value': 0}, {'prediction': False, 'value': 1}, {'prediction': True, 'value': 2}, {'prediction': False, 'value': 3}, {'prediction': True, 'value': 4}, {'prediction': False, 'value': 5}]\n" + ] + } + ], + "source": [ + "m = RuleBasedModel()\n", + "xs = list(range(6))\n", + "results = m.predict({\"values\": xs})\n", + "predictions = results[\"predictions\"]\n", + "print(f\"RuleBasedModel's predictions for {xs} are {predictions}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/HelloWorld/tests/test_hello_world.py b/examples/HelloWorld/tests/test_hello_world.py index aba05f37..b7ae414c 100644 --- a/examples/HelloWorld/tests/test_hello_world.py +++ b/examples/HelloWorld/tests/test_hello_world.py @@ -3,7 +3,7 @@ from HelloWorld.rule_based_model import RuleBasedModel from HelloWorld.ml_model import MLModel -from HelloWorld.helloworld_graph import create_graph +from HelloWorld.graph import create_graph class TestHellloWorld(unittest.TestCase): diff --git a/tutorials/1. Automotive Cybersecurity - A Cold Start Problem.ipynb b/tutorials/1. Automotive Cybersecurity - A Cold Start Problem.ipynb new file mode 120000 index 00000000..82781cda --- /dev/null +++ b/tutorials/1. Automotive Cybersecurity - A Cold Start Problem.ipynb @@ -0,0 +1 @@ +../examples/AutoCyber/1. Automotive Cybersecurity - A Cold Start Problem.ipynb \ No newline at end of file diff --git a/tutorials/2. Monolithic AD & ML Approaches and Why They are Unsatisfactory.ipynb b/tutorials/2. Monolithic AD & ML Approaches and Why They are Unsatisfactory.ipynb new file mode 120000 index 00000000..e9424eab --- /dev/null +++ b/tutorials/2. Monolithic AD & ML Approaches and Why They are Unsatisfactory.ipynb @@ -0,0 +1 @@ +../examples/AutoCyber/2. Monolithic AD & ML Approaches and Why They are Unsatisfactory.ipynb \ No newline at end of file diff --git a/tutorials/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb b/tutorials/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb new file mode 120000 index 00000000..8dceca0b --- /dev/null +++ b/tutorials/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb @@ -0,0 +1 @@ +../examples/AutoCyber/3. Using H1st.AI to Encode Human Insights as a Model and Harmonize Human + ML in a H1st.Graph.ipynb \ No newline at end of file diff --git a/tutorials/4. Summary & Further Resources.ipynb b/tutorials/4. Summary & Further Resources.ipynb new file mode 120000 index 00000000..8d19c420 --- /dev/null +++ b/tutorials/4. Summary & Further Resources.ipynb @@ -0,0 +1 @@ +../examples/AutoCyber/4. Summary & Further Resources.ipynb \ No newline at end of file diff --git a/tutorials/H1st-logo.png b/tutorials/H1st-logo.png new file mode 100644 index 00000000..5cee22b9 Binary files /dev/null and b/tutorials/H1st-logo.png differ diff --git a/tutorials/Introduction.ipynb b/tutorials/Introduction.ipynb new file mode 120000 index 00000000..0b0ef5b3 --- /dev/null +++ b/tutorials/Introduction.ipynb @@ -0,0 +1 @@ +../examples/AutoCyber/Introduction.ipynb \ No newline at end of file diff --git a/tutorials/README.md b/tutorials/README.md new file mode 100644 index 00000000..f8084198 --- /dev/null +++ b/tutorials/README.md @@ -0,0 +1,3 @@ +Building the HTML from the notebooks, requires jupyter-book `pip install jupyter-book`, then `jupyter-book build .`. + +Note that some notebooks are simlinks from `../examples/AutoCyber` and `../examples/HelloWorld` folders. \ No newline at end of file diff --git a/tutorials/_config.yml b/tutorials/_config.yml new file mode 100644 index 00000000..c7bccc74 --- /dev/null +++ b/tutorials/_config.yml @@ -0,0 +1,21 @@ +# Book settings +title: Human-First AI Guide +author: H1st.AI +logo: H1st-logo.png + +repository: + url : https://github.com/h1st-ai/h1st + +html: + favicon : H1st-logo.png + use_repository_button : true + baseurl : "http://h1st.ai/" + google_analytics_id : "UA-40192392-7" + extra_navbar : > +