+ About + +
+ + + +These Github Pages contain a hands-on tutorial providing a gentle introduction to GEGEGELATI usage.
+ +Tutorial authors:
+-
+
- Karol Desnos +
diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/about/index.html b/about/index.html new file mode 100644 index 0000000..77ba4fd --- /dev/null +++ b/about/index.html @@ -0,0 +1,278 @@ + + + + + + +
+ + +These Github Pages contain a hands-on tutorial providing a gentle introduction to GEGEGELATI usage.
+ +'+result.length+' Result(s) found
'); + for (var item in result) { + var ref = result[item].ref; + if(store[ref].teaser){ + var searchitem = + ''+store[ref].excerpt.split(" ").splice(0,20).join(" ")+'...
'+ + ''+store[ref].excerpt.split(" ").splice(0,20).join(" ")+'...
'+ + ''+result.length+' Result(s) found
'); + for (var item in result) { + var ref = result[item].ref; + if(store[ref].teaser){ + var searchitem = + ''+store[ref].excerpt.split(" ").splice(0,20).join(" ")+'...
'+ + ''+store[ref].excerpt.split(" ").splice(0,20).join(" ")+'...
'+ + '\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&E(e).children("tbody")[0]||e}function Ie(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Le(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function De(e,t){var n,r,o,i;if(1===t.nodeType){if(V.hasData(e)&&(i=V.get(e).events))for(o in V.remove(t,"handle events"),i)for(n=0,r=i[o].length;n").append(E.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,i||[e.responseText,t,e])})}),this},E.expr.pseudos.animated=function(t){return E.grep(E.timers,function(e){return t===e.elem}).length},E.offset={setOffset:function(e,t,n){var r,o,i,a,s=E.css(e,"position"),u=E(e),l={};"static"===s&&(e.style.position="relative"),i=u.offset(),r=E.css(e,"top"),a=E.css(e,"left"),a=("absolute"===s||"fixed"===s)&&-1<(r+a).indexOf("auto")?(o=(s=u.position()).top,s.left):(o=parseFloat(r)||0,parseFloat(a)||0),null!=(t=b(t)?t.call(e,n,E.extend({},i)):t).top&&(l.top=t.top-i.top+o),null!=t.left&&(l.left=t.left-i.left+a),"using"in t?t.using.call(e,l):("number"==typeof l.top&&(l.top+="px"),"number"==typeof l.left&&(l.left+="px"),u.css(l))}},E.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){E.offset.setOffset(this,t,e)});var e,n=this[0];return n?n.getClientRects().length?(e=n.getBoundingClientRect(),n=n.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],o={top:0,left:0};if("fixed"===E.css(r,"position"))t=r.getBoundingClientRect();else{for(t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;e&&(e===n.body||e===n.documentElement)&&"static"===E.css(e,"position");)e=e.parentNode;e&&e!==r&&1===e.nodeType&&((o=E(e).offset()).top+=E.css(e,"borderTopWidth",!0),o.left+=E.css(e,"borderLeftWidth",!0))}return{top:t.top-o.top-E.css(r,"marginTop",!0),left:t.left-o.left-E.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){for(var e=this.offsetParent;e&&"static"===E.css(e,"position");)e=e.offsetParent;return e||re})}}),E.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,o){var i="pageYOffset"===o;E.fn[t]=function(e){return F(this,function(e,t,n){var r;return m(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n?r?r[o]:e[t]:void(r?r.scrollTo(i?r.pageXOffset:n,i?n:r.pageYOffset):e[t]=n)},t,e,arguments.length)}}),E.each(["top","left"],function(e,n){E.cssHooks[n]=Ge(y.pixelPosition,function(e,t){if(t)return t=Ve(e,n),We.test(t)?E(e).position()[n]+"px":t})}),E.each({Height:"height",Width:"width"},function(a,s){E.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,i){E.fn[i]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),o=r||(!0===e||!0===t?"margin":"border");return F(this,function(e,t,n){var r;return m(e)?0===i.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?E.css(e,t,o):E.style(e,t,n,o)},s,n?e:void 0,n)}})}),E.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){E.fn[t]=function(e){return this.on(t,e)}}),E.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),E.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){E.fn[n]=function(e,t){return 0
The objective of this tutorial is to guide you through the training of your first Tangled Program Graph (TPG) with the Gegelati library. +People following this tutorial are assumed to be already acquainted with basic structural and evolutionary concepts of TPGs. +In case you’re not already familiar with these concepts, a basic introduction to these concepts and to Gegelati can be found in the linked 10-minute video.
+ +The following topics are covered in this tutorial:
+This tutorial does not require any particular prerequisites. +While being fluent in C++ certainly is an asset to follow this tutorial, bits of that must be written during the tutorial are given in this assignment.
+ +This tutorial requires a C++ development environment compatible with the C++17 standard. +Compatibility of this tutorial was tested with MS Visual Studio Community Edition (MSVC) 2019, and GCC v9.
+ +Some scripts embedded in the given files of this tutorial require the availability of a bash environment. +While bash is natively supported in Linux and Mac OS, an installation is required on Windows. +We advise you to use the all-in-one binaries maintained by the git community, and available here!. +When installing the tool, make sure that the tool is added to the PATH environment variable.
+ +CMake is a utility tool whose purpose is to ease the portability of complex C/C++ applications by generating projects for most popular IDEs (CLion, Visual Studio, Makefile, QT Creator, …), on major operating systems (Linux, Windows, Mac OS).
+To achieve this purpose, source code files and project dependencies are specified in a configuration file, called CMakeLists.txt
, using a specific description language.
+When CMake is launched, it automatically generates a project for a specified IDE, where all dependencies to third-party libraries are configured.
CMake version 3.12 or newer must be installed for this tutorial. +To check if the CMake tool is already available on your workstation simply type the following command:
+cmake --version
+
The latest version of CMake can be downloaded at the following URL: https://cmake.org/download/. +For a simple installation, choose the binary version for windows. +During the installation process, select the “ADD TO PATH FOR ALL USERS” option. +Reboot your system at the end of the installation.
+Several third party libraries and tools need to be installed for this tutorial: Gegelati, SDL2
, SDL2_Image
, SDL2_ttf
, and curl.
+The installation process for different OSes is given below.
All library binaries will be downloaded automatically when running the CMake project.
+When using MSVC, all DLLs are copied automatically in the executable folders.
+When using other compilers, if the library are not found during the build process, please refer to the /tutorial-gegelati/lib/ReadMe.md
file for solutions.
The SDL library (SDL2
, SDL2_Image
, SDL2_ttf
, and curl) are available in most package repository.
+For example, on Ubuntu, simply type the following command:
sudo apt install libsdl2-dev libsdl2-image-dev libsdl2-ttf-dev curl
+
To install Gegelati, you must build it and install it on your machine. +The following commands should do the work to build the latest release:
+git clone -b master https://github.com/gegelati/gegelati.git
+cd gegelati/bin
+cmake ..
+cmake --build . --target install # On Linux
+
The SDL library (SDL2
, SDL2_Image
, and SDL2_ttf
) are available in Homebrew.
+Simply type the following command:
brew install sdl2 sdl2_image sdl2_ttf
+
To install Gegelati, you must build it and install it on your machine. +The following commands should do the work to build the latest release:
+git clone -b master https://github.com/gegelati/gegelati.git
+cd gegelati/bin
+cmake ..
+cmake --build . --target install # On Mac OS
+
To download the source files needed to follow this tutorial, simply download the following archive: gegelati-tutorial.zip.
+Unzip these file in your favorite folder.
+In the remainder of this tutorial, the parent folder where the files were unzipped will be referred as the /gegelati-tutorial
folder.
To create the C++ project for your default build chain (Makefile on linux and Mac OS, MSVC -when installed- on windows), simply type the following commands in a bash terminal:
+cd gegelati-tutorial/bin
+cmake ..
+
To select another IDE, use the cmake -G "<MY_FAVORITE_IDE>" ..
command instead.
+The list of available IDEs can be obtained by typing cmake -G
.
Alternatively, some IDEs, like Clion, natively support CMake projects.
+In this case, simply open a new project in the /gegelati-tutorial
folder.
In all cases, make sure that no error appears during the generation of the CMake project.
+ +The generated C++ project contains 2 targets : manual-control
, and tpg-training
. Please note that at this point, only the manual-control
target can be built successfully. In order to build the tpg-training
target, additional code will be added in Section 2, when interfacing Gegelati with the learning environment.
The learning environment used in this tutorial is an inverted pendulum.
+ + + +The pendulum itself is a physical simulation of a weight attached to a pivot with a rigid cord, and subject to both gravity and friction. +In this learning environment, it is possible to apply a clockwise or a counter clockwise torque to the pendulum to influence its swinging.
+ +As illustrated in the previous GIF, the objective of a learning agent trained within this learning environment is to stabilize the pendulum in the upward position. +It is important to note that the maximum torque that can be applied to the pendulum is not strong enough to bring the pendulum to the upward position. +Hence, the only way to bring the pendulum to this position is to progressively gain some momentum with accelerated swings.
+ +To better appreciate the difficulty of this task, the first learning agent trained in this tutorial relies on a well known machine learning technique: your brain!
+ +To build the first executable of this tutorial, open the project generated with CMake; then build the manual-control
target and run the produced executable.
+In case of an error, make sure you are only building the right target, as other targets cannot be built successfully at this point in the tutorial.
Once the build is complete, run the executable. +Using the keyboard, you can try to manually balance the pendulum with 7 different actions:
+Good luck holding the pendulum in the upward position for more than a few seconds.
+ +The simulation parameters of the pendulum can be changed when instantiating the corresponding C++ class.
+The default parameter values were carefully chosen to give human beings a chance at stabilizing the pendulum.
+Feel free to try other configurations by parameterizing the construction of the pendulum, in the /gegelati-tutorial/src/manual/main-manual.cpp
Pendulum
constructor documentation (Click to expand)/**
+* \brief Default constructor for a pendulum.
+*
+* \param[in] a the initial angular position of the pendulum. Default value
+* is M_PI, that is downard position.
+* \param[in] c the initial angular velocity of the pendulum. Default value
+* is 0.0.
+* \param[in] maxSpeed Maximum speed of the pendulum
+* \param[in] maxTorque maximum torque applicable to the pendulum.
+* \param[in] timeDelta Simulation step duration.
+* \param[in] gravity Universal constant
+* \param[in] mass Weight of the pendulum.
+* \param[in] length Length of the pendulum.
+* \param[in] friction Friction slowing down the pendulum proportionally to
+* its velovity.
+*/
+Pendulum(double a = M_PI, double v = 0.0, double maxSpeed = 16.0,
+ double maxTorque = 1.8, double timeDelta = 0.01, double gravity = 9.81,
+ double mass = 0.8, double length = 1.0, double friction = 0.005);
+
To train a Tangled Program Graph with Gegelati, the library needs to interact with the learning environment.
+To achieve this purpose, the learning environment must be presented to the library using a standardized API.
+In the case of Gegelati, this interfacing is done by specializing the Learn::LearningEnvironment
abstract class.
The following steps will guide you through the creation of a PendulumWrapper
class, inheriting from the Learn::LearningEnvironment
class and interfacing the Pendulum
class with Gegelati.
PendulumWrapper
skeleton.The pendulum_wrapper.h
and pendulum_wrapper.cpp
files already contain the skeleton of the PendulumWrapper
class, which you’ll complete throughout the next steps.
+To make the class compilable, this code already defines empty methods overriding all the pure virtual methods from the Learn::LearningEnvironment
.
+Comments in the header file explain the purpose of each method.
Pendulum
attributesA first specific attribute of the PendulumWrapper
class is already declared: the actions
vector.
/* From pendulum_wrapper.h */
+/// Array containing the actions available for the TPG.
+/// These actions are expressed as real numbers in [-1, 1], and will be multiplied
+/// by the MAX_TORQUE of the Pendulum to apply the corresponding action.
+static const std::vector<double> actions;
+
Your first task is to update the definition of this vector in the pendulum_wrapper.cpp
file, so that the 7 actions available to you in the manual version are also the one made available to the TPG.
+It should be noted that the size of this vector
is automatically used in the PendulumWrapper
constructor to initialize the LearningEnvironment
parent class with the number of actions available.
+A single line of code needs to me modified in this task.
const std::vector<double> PendulumWrapper::actions{ -1.0, -0.66, -0.33, 0.0, 0.33, 0.66, 1.0 };
+
Add an instance of the Pendulum
class to the attributes of the PendulumWrapper
class.
+Don’t forget to include the pendulum.h
file and update the constructor of the class to initialize the Pendulum
instance, keeping default parameters for now.
/* pendulum_wrapper.h: After other includes */
+#include <pendulum.h>
+
/* pendulum_wrapper.h: After actions declaration */
+ /// Pendulum interfaced with the GEGELATI Lib
+ Pendulum pendulum;
+
/* pendulum_wrapper.cpp*/
+PendulumWrapper::PendulumWrapper() : LearningEnvironment(actions.size()), pendulum()
+{
+}
+
In this step, you will expose the angle and velocity attributes of the pendulum so that they can be observed by the Gegelati library.
+ +During the training process, the Gegelati library accesses the data from the environment using the getDataSources()
method.
+This method returns the observable state of the environment as a vector of references to Data::DataHandler
.
The Data::DataHandler
interface class provides a set of services that simplifies the training of TPGs.
+In particular, in addition to methods for accessing the data, the Data::DataHandler
supports methods for dynamically checking what the addressing space of a data source is, or which data types can be provided by a data source.
+It is also possible for a Data::DataHandler
to give access to data with a data type differing from the native type used for storing the data.
For example, with a Primitive2DArray<char>(10,10)
data handler storing a 2D array of char[10][10]
, individual elements of the array can be accessed using the native data type char
; but it is also possible to access a 3-by-2 subregion of data by requesting an access to data with type char[3][2]
.
+For more details on these features, please refer to the doxygen documentation built and delivered with the Gegelati library.
In the case of the pendulum, we will use the Data::PointerWrapper<T>
specialization of the Data::DataHandler
class, which acts as an interface between a native pointer (T *
) and Gegelati.
+Only the following 2 methods of this class needs to be used in this tutorial:
/**
+ * \brief Constructor for the PointerWrapper class.
+ *
+ * \param[in] ptr the pointer managed by the PointerWrapper.
+ */
+PointerWrapper(T* ptr = nullptr);
+
+/**
+ * \brief Set the pointer of the PointerWrapper.
+ *
+ * \param[in] ptr the new pointer managed by the PointerWrapper.
+ *
+ */
+void setPointer(T* ptr);
+
Instantiate two instances of the Data::PointerWrapper<T>
class as attributes of the PendulumWrapper
.
+In the constructor of the PendulumWrapper
, make these two Data::PointerWrapper
point to the angle
and velocity
attributes of the pendulum
.
+Then, update the getDataSources()
method to return a vector referring to these two Data::PointerWrapper
.
+This task requires less than 10 lines of C++ code.
C++ tip: Pushing a variable T var
to a std::vector<std::references_wrapper<T>> vect
is done with a simple call to vect.push_back(var)
.
This solution is based on a vector of Data::PointerWrapper<double>
.
+Alternative solutions based on two Data::PointerWrapper<double>
are perfectly viable.
/* pendulum_wrapper.h : After pendulum declaration */
+/// DataHandler wrappers
+std::vector<Data::PointerWrapper<double>> data;
+
/* pendulum_wrapper.cpp */
+PendulumWrapper::PendulumWrapper() : LearningEnvironment(actions.size()), pendulum(), data(2)
+{
+ data.at(0).setPointer(&this->pendulum.getAngle());
+ data.at(1).setPointer(&this->pendulum.getVelocity());
+}
+
+std::vector<std::reference_wrapper<const Data::DataHandler>> PendulumWrapper::getDataSources()
+{
+ std::vector<std::reference_wrapper<const Data::DataHandler>> result;
+ result.push_back(this->data.at(0));
+ result.push_back(this->data.at(1));
+ return result;
+}
+
After exposing the pendulum attributes to the learning agent, this step will give it the ability to act on the pendulum.
+ +The number of discrete actions that can be taken by the learning agent is given by the getNbActions()
method from the learning environment.
+The value returned by this method is already set when calling the constructor of the LearningEnvironment
parent class of the PendulumWrapper
.
To execute an action, the learning agent calls the doAction(int)
method of the learning environment with an argument corresponding to the action to execute.
Implement the PendulumWrapper::doAction(int)
method using the actions defined in the actions
attribute.
+To apply a torque to the pendulum, the Pendulum::applyTorque(double)
method must be used.
/**
+* \brief Apply the given torque to the pendulum and advance the simulation.
+*
+* \param[in] torque the torque applied to the pendulum. If the given value
+* is not within the [-MAX_TORQUE, MAX_TORQUE] interval, it will be
+* saturated to the closest bound.
+*/
+void applyTorque(double torque);
+
Two lines of C++ code are sufficient for this task.
+ +/* pendulum_wrapper.cpp */
+void PendulumWrapper::doAction(uint64_t actionID)
+{
+ // Retrieve the torque corresponding to the ID
+ double torque = this->actions[actionID] * pendulum.MAX_TORQUE;
+
+ // Apply it to the pendulum
+ this->pendulum.applyTorque(torque);
+}
+
To train the TPG, the reinforcement learning process requires making many attempts to stabilize it in the upward position.
+Between each attempt, the initial position of the pendulum is reset using the reset()
method.
+Implement this method so that the pendulum managed by the PendulumWrapper
always starts in a downward static state.
+Two lines of code are needed for this task.
At this point, the arguments of the reset
methods can be ignored.
+Their utility will be covered in a more advanced tutorial.
/* pendulum_wrapper.cpp */
+void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode)
+{
+ this->pendulum.setAngle(M_PI);
+ this->pendulum.setVelocity(0.0);
+}
+
In this last step, you are going to implement the reward mechanism used by Gegelati to identify the best strategies for controlling the pendulum.
+ +The objective of this learning environment is to steady the pendulum in the upward position.
+While it is easy to measure the distance of the pendulum to the upward position at each simulation step, appreciating the steadiness of the pendulum in this position can only be achieved over time.
+Hence, the computation of the reward will be accumulated in a double
attribute of the PendulumWrapper
.
+At each simulation step $i$, this reward will be updated as follows:
where $\theta$ is the angular distance to the upward position, $\omega$ is the velocity of the pendulum, and $\tau$ the torque applied to the pendulum. +Intuitively, the purpose of this equation is to minimize the angular distance to the upward position as a primary criteria, but also the velocity of the pendulum when reaching this position, and the torque applied to the pendulum to reach and stay in this position.
+ +Implement the rewarding mechanism in the PendulumWrapper
class by:
accumulatedReward
attribute.doAction(int)
method.getScore()
method.reset(int, LearningMode)
method.
+Less than 10 new lines of code are needed for this task.C++ tip: The double fmod(double, double)
method can be used to compute the modulo of two double
numbers.
/* pendulum_wrapper.h : After the PointerWrapper */
+ double accumulatedReward;
+
/* pendulum_wrapper.cpp */
+void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode)
+{
+ this->pendulum.setAngle(M_PI);
+ this->pendulum.setVelocity(0.0);
+
+ this->accumulatedReward = 0.0;
+}
+
+void PendulumWrapper::doAction(uint64_t actionID)
+{
+ // Retrieve the torque corresponding to the ID
+ double torque = this->actions[actionID] * pendulum.MAX_TORQUE;
+
+ // Apply it to the pendulum
+ this->pendulum.applyTorque(torque);
+
+ // Get the angle value between -M_PI and M_PI (0 being the upward position)
+ double angle = fmod(this->pendulum.getAngle() + M_PI, 2.0 * M_PI) - M_PI;
+
+ // Compute a reward based on the angle to the upward position, the velocity and the torque.
+ // All three values should be minimized.
+ double reward = -(0.1f*(angle * angle) + 0.01f * (this->pendulum.getVelocity() * this->pendulum.getVelocity()) + 0.001f * (torque * torque));
+
+ // Accumulate the reward
+ accumulatedReward += reward;
+}
+
+double PendulumWrapper::getScore(void) const
+{
+ return accumulatedReward;
+}
+
The pendulum_wrapper.h
and pendulum_wrapper.cpp
files resulting from this tutorial can be downloaded at the following link: pendulum_wrapper_solution.zip.
The code needed to interface the pendulum with Gegelati is now complete. +This section will (finally) let you train a TPG with the pendulum learning environment.
+ +The main-training.cpp
file contains the entry point of the executable built with the tpg-training
target.
+The program is structured as follows:
gegelati-tutorial/params.json
file.PendulumWrapper
learning environment.Learn::LearningAgent
class. This utility class will manage most aspects of the training process of the TPG, including its initialization, mutations, and evaluation of the fitness of its roots within the learning environment.Build and run the tpg-training
target to observe the TPG training process in action.
+For a faster training, it is strongly advised to build the project in Release mode.
The first output of the training process are the logs generated in the console. +An example of log is presented hereafter:
+ +Pendulum TPG training.
+ Train
+ Gen NbVert Min Avg Max T_mutat T_eval T_total
+ 0 164 -7624.63 -1383.36 -980.33 0.00 1.51 1.51
+ 1 167 -4848.23 -1263.68 -980.33 7.28 1.56 10.38
+ 2 170 -6862.46 -1242.03 -844.01 2.63 1.53 14.55
+ 3 170 -7707.18 -1181.00 -844.01 7.60 1.48 23.65
+ 4 168
+
The generated logs contain a table that can be exported in the CSV format by giving a file path to the LABasicLogger
constructor.
+This table contains the following columns:
A few insights on these logs:
+Learn::LearningAgent::getBestRoot()
method.The second output of the training process is the display of the pendulum. +While the score presented in the logs are not easily interpretable, this display makes it possible to appreciate how well the trained TPGs are doing.
+ +With default pendulum parameters and meta-parameters, the learning agent should be able to stabilize the pendulum in less than 25 generations. +During the first generations, the learning agent usually learns how to swing the pendulum closer to the upward position. +Then it learns how to slow down the pendulum when it approaches the upward position. +Finally, it finds a way to stabilize the pendulum.
+ +In this tutorial, you have learned:
+While this tutorial illustrates the basic concepts of TPG training, Gegelati offers many features to play with TPGs. +The following tutorials guide you through these features, starting from the result of this tutorial. +These tutorial can be followed in any order, so feel free to explore them based on your personal interests and objectives.
+Several other open-source applications are available in the GitHub repository of Gegelati. +Feel free to explore them to get a better understanding of the wonderful abilities of TPGs.
+The objective of this tutorial is to experiment with the DOT file format supported in Gegelati to export trained Tangled Program Graphs (TPGs), to visualize their topology, and to import them back in a program.
+ +The following topics are covered in this tutorial:
+File::TPGGraphDotExporter
class to serialize pre-trained TPGs into DOT files.File::TPGGraphDotImporter
class.The starting point of this tutorial is the C++ project obtained at the end of the GEGELATI introductory tutorial. While completing the introductory tutorial is strongly advised, a copy of the project resulting from this tutorial can be downloaded at the following link: pendulum_wrapper_solution.zip.
+ +DOT is a popular description language that makes it possible to describe graphs with a few lines of code. +With a simple declarative syntax, labeled directed or undirected graphs with homogeneous or heterogeneous types of vertices can be described. +In it simplest form, the DOT syntax (mostly) focuses on the description of the topology of graphs, leaving out graphical and layout concerns. +These graphical and layouting concerns are handled automatically by dedicated visualization tools, such as the open-source GraphViz tool.
+ +A simple example of graph described with the DOT language is presented in the following excerpt:
+ +digraph mygraph {
+ root -> A;
+ root -> B -> C;
+ A -> A;
+ B -> A;
+}
+
The visualization of this graph with xdot
produces the following output:
In Gegelati, the DOT language is used as the serialization file format for exporting, visualizing and importing TPGs. +The general structure used for storing TPGs is as follows:
+ +/* Header */
+digraph{
+ graph[pad = "0.212, 0.055" bgcolor = lightgray]
+ node[shape=circle style = filled label = ""]
+
+ /* Team vertex */
+ T0 [fillcolor="#1199bb"]
+
+ /* Program */
+ P0 [fillcolor="#cccccc" shape=point] //-7|7|0|-4|9|
+
+ /* Program P0 instructions (invisible) */
+ I0 [shape=box style=invis label="2|5&2|1#0|4\n1|3&0|7#0|5\n"]
+ P0 -> I0[style=invis]
+
+ /* Action vertex */
+ A0 [fillcolor="#ff3366" shape=box margin=0.03 width=0 height=0 label="0"]
+
+ /* TPG Edge */
+ T0 -> P0 -> A0
+
+ /* Root list */
+ { rank= same T0 }
+}
+
To export TPGs in the DOT format, Gegelati provides the File::TPGGraphDotExporter
class.
+Each instance of the TPGGraphDotExporter
class is associated, on construction to a TPG::TPGGraph
.
+The constructor of the exporter class is declared as follows:
/**
+ * \brief Constructor for the exporter.
+ *
+ * \param[in] filePath initial path to the file where the dot content
+ * will be written.
+ * \param[in] graph const reference to the graph whose content will
+ * be exported in dot.
+ * \throws std::runtime_error in case no file could be opened at the
+ * given filePath.
+ */
+TPGGraphDotExporter (const char *filePath, const TPG::TPGGraph &graph)
+
While the path of the file where the TPG graph is written can be modified, using the TPGGraphDotExporter::setNewFilePath(char*)
method, the TPG associated to the exporter on construction can not be changed.
+The reason for this constraint is that for a TPG that was already exported, following exports of the TPGs, even after mutations, will keep identical names for teams, programs and actions present in both TPGs, and will create new names for new graphs elements.
+Thanks to this features, it is easier to keep track of surviving teams throughout the evolution process.
To print the trained TPG after each generation of the training process, edit the /gegelati-tutorial/src/training/main-training.cpp
file as follows:
TPGGraphDotExporter
before entering the iterative training process. To retrieve a pointer to the trained TPG, use the following method of the learning agent: Learn::LearningAgent::getTPGGraph()
./gegelati-tutorial/dat/
folder. You can use the ROOT_DIR
macro within the c++ code to target the /gegelati-tutorial
folder automatically.
+To trigger the printing of a file, use the TPGGraphDotExporter::print()
method./* main-training.cpp */
+// Create an exporter for all graphs
+File::TPGGraphDotExporter dotExporter(ROOT_DIR "/dat/tpg_0000.dot", *la.getTPGGraph());
+
+// Train for params.nbGenerations generations
+for (int i = 0; i < params.nbGenerations && !exitProgram; i++) {
+ la.trainOneGeneration(i);
+
+ // Export dot
+ char buff[150];
+ sprintf(buff, ROOT_DIR "/dat/tpg_%04d.dot", i);
+ dotExporter.setNewFilePath(buff);
+ dotExporter.print();
+
+ // ...
+}
+
During the training process, the pseudo-random nature of the graph and program mutations causes the apparition of useless elements.
+ +Training roots: +At the end of the training process, the TPG needs to be exported for further utilization, for example for inferring the pre-trained TPG, as will be done later in this tutorial. +The TPGs exported in the previous step contained all roots present in the TPG at a given generation, which is useful to better understand the training process, but also to pause a training process and restart it later.
+ +When exporting the TPG resulting from the training, only the graph stemming from the root team providing the best results needs to be exported.
+To keep only the TPG stemming from the best root, the Learn::LearningAgent::keepBestPolicy()
method should be used.
Hitchhiker programs: +In TPG graphs, so-called “hitchhiker” programs may appear. +A hitchhiker program is a program that belongs to a team with a valuable behavior without contributing to this useful behavior itself. +A team has a valuable behavior if it helps the TPG to which it belongs to obtain better rewards. +A hitchhiker program is a program that belongs to a valuable team, but that never produces a winning bid when programs of this team are executed. +Because the team has a valuable behavior, it will survive with many generations, with all its programs, including the useless hitchhiker program.
+ +To identify these hitchhiker programs, the execution of TPG graphs must be instrumented in order to keep track of how many times each team was visited, and how many times each program produced a winning bid. +This instrumentation of the TPG graph is achieved by specifying a specialized TPG factory when instantiating the learning agent. +This can be achieved as follows:
+Learn::LearningAgent la(pendulumLE, instructionSet, params, TPG::TPGInstrumentedFactory());
+
After the training process, hitchhiker programs can be cleaned from the TPG using a helper method from this factory, as follows:
+// Clean unused vertices and teams
+std::shared_ptr<TPG::TPGGraph> tpg = la.getTPGGraph();
+TPG::TPGInstrumentedFactory().clearUnusedTPGGraphElements(*tpg);
+
Introns: +In Programs, it is very common to observe so-called “intron” instructions that do not directly contribute to the data path leading to the result returned by the program. +While these instructions are automatically detected and skipped during program execution, they may still be valuable during the training process, as they act as dormant genes that may be activated again during future mutations.
+ +When exporting a TPG graph, these introns only pollute the exported graph, and should thus be removed using the TPG::TPGGraph::clearProgramIntrons()
method.
Update the instatiation of the Learn::LearningAgent
to use the TPG::TPGInstrumentedFactory
.
+Then, after the end of the iterative training process:
/* main-training.cpp: After the for loop. */
+
+// Clean unused vertices and teams
+std::shared_ptr<TPG::TPGGraph> tpg = la.getTPGGraph();
+TPG::TPGInstrumentedFactory().clearUnusedTPGGraphElements(*tpg);
+
+// Keep only the best root
+la.keepBestPolicy();
+
+// Clean introns
+tpg->clearProgramIntrons();
+
+// Print the resulting TPG
+dotExporter.setNewFilePath(ROOT_DIR "/dat/best_tpg.dot");
+dotExporter.print();
+
To visualize TPGs described with DOT, a dedicated tool can be installed on your computer, such as graphviz. +Alternatively, several website propose online viewers for graphs described with the DOT language. +For example, Edotor, GraphvizOnline, or GraphViz Visual Editor can be used to follow this tutorial.
+ +An excerpt of the visual representation of a TPG produced by GraphViz is presented hereafter:
+ + +The large colored circles in the graph represents the teams of the TPG. +At the top of the image, the two darker teams are root teams of the TPG. +Lighter teams are internal teams of the TPG, referenced in the graph stemming from at least one root of the TPG. +The red rectangles represent the actions of the TPG. +The integer numbers in the action rectangles represent the numbers associated to the discrete actions available in the learning environment. +Finally, arrows linking teams to other teams of actions are separated in two halves: the first one linking the team to a program, represented with a tiny circle, and the second one linking the program to its destination team or action. +In case several edges starting from different teams share a common program and destination, a single arrow exists between the program and its destination.
+ +The training meta-parameters used in this tutorial, specified in gegelati-tutorial/params.json
, specify that the trained TPG should contain 150 roots at each generation, 80% of which are removed during the decimation process.
+Hence, the DOT graph exported after each generation contain 30 root teams, which make them quite large when visualized.
The first observable feature of TPGs during the training process are their maximum depth from root to actions. +When the learning agent and the trained TPG are first initialized, the number of created root is equal to the number of actions available the TPG. +Each of this root is connected to two actions, such that each action is itself connected to two roots. +Hence, at initialization, the depth of the TPG is 1 between roots and actions.
+ +During the any iteration of the natural selection training process, additional roots are added to the TPGs to reach the desired 150 roots. +These roots are obtained by cloning and mutating existing teams from the TPG. +During this mutation process, program of mutated team can change their destination among surviving teams from the previous generation, but can never point towards another root introduced at the same generation. +Hence, the maximum depth of the TPG can increase, at most, by one at each generation. +This is why, when observing the TPG resulting from the first generation, the graph should contain +30 roots with a maximum depth of 2 between roots and actions.
+ +In practice, the maximum depth of the TPG remains relatively stable, unless one of the root teams discovers a new valuable strategy. +Indeed, in the absence of a reward breakthrough most newly introduced team, which may be responsible for an increase of the TPG depth, won’t survive any generation. +Thanks to this property, the depth of the TPG graphs automatically reflects the complexity of the strategy deployed for maximizing their rewards. +Hence, when visualizing the TPGs obtained during the first generation, you will most likely not notice a big change in the maximum depth of the TPG.
+ +When a root team with a valuable behavior appears, it will survive for many generations, thus increasing its chance of being itself referenced by a new root team bringing further improvement of the TPG reward. +When becoming an internal (i.e. non-root) team of the TPG, a team is protected from decimation, which further increases its life-span, and its chance of being referenced during future mutations. +This natural self preservation of valuable behavior is called the emergent hierarchal structure of TPGs.
+ +Visualize the TPG obtained during throughout the training process, and the structure of the best TPG exported when exiting the training process.
+ +Once a pre-trained TPG is exported, an import feature is indispensable to enable using this TPG for inference purposes. +In this step, you will create an inference executable base on a TPG exported in the DOT format.
+ +main-inference.cpp
file and place it in the gegelati-tutorial/src/inference/
folder: Download Link.gegelati-tutorial/CMakeLists.txt
file:
+ # Sub project for inference
+file(GLOB
+ inference_files
+ ./src/inference/*.cpp
+ ./src/inference/*.h
+ ./src/training/instructions.*
+ ./src/training/pendulum_wrapper.*
+ params.json
+)
+include_directories(${GEGELATI_INCLUDE_DIRS} ${SDL2_INCLUDE_DIR} ${SDL2IMAGE_INCLUDE_DIR} ${SDL2TTF_INCLUDE_DIR} "./src/" "./src/training/")
+add_executable(tpg-inference ${pendulum_files} ${inference_files})
+target_link_libraries(tpg-inference ${GEGELATI_LIBRARIES} ${SDL2_LIBRARY} ${SDL2IMAGE_LIBRARY} ${SDL2TTF_LIBRARY})
+target_compile_definitions(tpg-inference PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}")
+
Open the gegelati-tutorial/src/inference/main-inference.cpp
file which is pre-filled with the code needed to load and infer a TPG from a dot file.
+The program is structured as follows:
PendulumWrapper
learning environment and the associated learning agent. While the learning agent is not strictly needed for inference purposes, as it provides a simple API to initialize the execution environment, which is easier to use it in this example. Also note that this learning agent can be reused as a basis to restart the training of a previously saved TPG.File::TPGGraphDotImporter
class with the following lines.
+ // Load the TPG from the file
+File::TPGGraphDotImporter importer(ROOT_DIR "/dat/best_tpg.dot", la.getEnvironment(), *la.getTPGGraph());
+importer.importGraph();
+
It is important to note that the importer does not create its own TPG, but fills and replace the one created by the learning agent.
+TPG::TPGExecutionEngine
which will manage the inference of the loaded TPG graph.params.maxNbActionsPerEval
actions of the TPG on the pendulum.TPG::TPGExecutionEngine
, which starts an execution of the TPG from a specified TPG vertex, with the current state of the pendulum learning environment. This executions produces a trace
, which corresponds to the list of actors visited during one execution of the TPG. The last vertex visited in the trace
is the action selected by the TPG.Build and run the best TPG saved from a previous training.
+Check that the result is identical to the score obtained during the training.
+This score is automatically printed in the console after params.maxNbActionsPerEval
actions are performed, and before restarting the simulation.
In this tutorial, you have seen how to and visualize TPGs during the training process, and also how to import them back for inference. +The code presented in this tutorial can serve as a basis for many purposes, and notably to restart the training of a TPG saved during the training process.
+ + +