From 0f20ddfa644fbcc7df56fabd25403366a0856a31 Mon Sep 17 00:00:00 2001
From: Fox-IT Security Research Team <srt@fox-it.com>
Date: Wed, 20 Jul 2022 11:29:44 +0200
Subject: [PATCH] The flow.record project

---
 .github/workflows/dissect-ci.yml       |   7 +
 .gitignore                             |  11 +
 COPYRIGHT                              |   5 +
 LICENSE                                | 661 ++++++++++++++++++++
 MANIFEST.in                            |   2 +
 README.md                              | 105 ++++
 examples/filesystem.py                 | 108 ++++
 examples/passivedns.py                 |  71 +++
 examples/records.json                  |   2 +
 examples/tcpconn.py                    |  43 ++
 flow/record/__init__.py                |  79 +++
 flow/record/adapter/__init__.py        |  64 ++
 flow/record/adapter/archive.py         |  32 +
 flow/record/adapter/avro.py            | 192 ++++++
 flow/record/adapter/broker.py          |  47 ++
 flow/record/adapter/csvfile.py         |  43 ++
 flow/record/adapter/elastic.py         |  43 ++
 flow/record/adapter/jsonfile.py        |  68 +++
 flow/record/adapter/line.py            |  37 ++
 flow/record/adapter/mongo.py           |  91 +++
 flow/record/adapter/splunk.py          |  82 +++
 flow/record/adapter/stream.py          |  51 ++
 flow/record/adapter/text.py            |  50 ++
 flow/record/adapter/xlsx.py            |  65 ++
 flow/record/base.py                    | 807 +++++++++++++++++++++++++
 flow/record/fieldtypes/__init__.py     | 491 +++++++++++++++
 flow/record/fieldtypes/credential.py   |   9 +
 flow/record/fieldtypes/net/__init__.py |  15 +
 flow/record/fieldtypes/net/ip.py       |  80 +++
 flow/record/fieldtypes/net/ipv4.py     | 137 +++++
 flow/record/fieldtypes/net/tcp.py      |   9 +
 flow/record/fieldtypes/net/udp.py      |   9 +
 flow/record/jsonpacker.py              | 101 ++++
 flow/record/packer.py                  | 167 +++++
 flow/record/selector.py                | 714 ++++++++++++++++++++++
 flow/record/stream.py                  | 293 +++++++++
 flow/record/tools/__init__.py          |   0
 flow/record/tools/geoip.py             | 194 ++++++
 flow/record/tools/rdump.py             | 169 ++++++
 flow/record/utils.py                   |  87 +++
 flow/record/whitelist.py               |  40 ++
 pyproject.toml                         |   9 +
 setup.cfg                              |   9 +
 setup.py                               |  26 +
 tests/__init__.py                      |   0
 tests/selector_explain_example.py      |  32 +
 tests/standalone_test.py               |  16 +
 tests/test_compiled_selector.py        |  37 ++
 tests/test_fieldtype_ip.py             | 238 ++++++++
 tests/test_fieldtypes.py               | 458 ++++++++++++++
 tests/test_json_packer.py              |  25 +
 tests/test_json_record_adapter.py      |  71 +++
 tests/test_packer.py                   | 216 +++++++
 tests/test_rdump.py                    | 178 ++++++
 tests/test_record.py                   | 613 +++++++++++++++++++
 tests/test_record_adapter.py           | 381 ++++++++++++
 tests/test_record_descriptor.py        | 142 +++++
 tests/test_regression.py               | 376 ++++++++++++
 tests/test_selector.py                 | 504 +++++++++++++++
 tests/test_splunk_adapter.py           | 112 ++++
 tests/utils_inspect.py                 |  58 ++
 tox.ini                                |  58 ++
 62 files changed, 8840 insertions(+)
 create mode 100644 .github/workflows/dissect-ci.yml
 create mode 100644 .gitignore
 create mode 100644 COPYRIGHT
 create mode 100644 LICENSE
 create mode 100644 MANIFEST.in
 create mode 100644 README.md
 create mode 100644 examples/filesystem.py
 create mode 100644 examples/passivedns.py
 create mode 100644 examples/records.json
 create mode 100644 examples/tcpconn.py
 create mode 100644 flow/record/__init__.py
 create mode 100644 flow/record/adapter/__init__.py
 create mode 100644 flow/record/adapter/archive.py
 create mode 100644 flow/record/adapter/avro.py
 create mode 100644 flow/record/adapter/broker.py
 create mode 100644 flow/record/adapter/csvfile.py
 create mode 100644 flow/record/adapter/elastic.py
 create mode 100644 flow/record/adapter/jsonfile.py
 create mode 100644 flow/record/adapter/line.py
 create mode 100644 flow/record/adapter/mongo.py
 create mode 100644 flow/record/adapter/splunk.py
 create mode 100644 flow/record/adapter/stream.py
 create mode 100644 flow/record/adapter/text.py
 create mode 100644 flow/record/adapter/xlsx.py
 create mode 100644 flow/record/base.py
 create mode 100644 flow/record/fieldtypes/__init__.py
 create mode 100644 flow/record/fieldtypes/credential.py
 create mode 100644 flow/record/fieldtypes/net/__init__.py
 create mode 100644 flow/record/fieldtypes/net/ip.py
 create mode 100644 flow/record/fieldtypes/net/ipv4.py
 create mode 100644 flow/record/fieldtypes/net/tcp.py
 create mode 100644 flow/record/fieldtypes/net/udp.py
 create mode 100644 flow/record/jsonpacker.py
 create mode 100644 flow/record/packer.py
 create mode 100644 flow/record/selector.py
 create mode 100644 flow/record/stream.py
 create mode 100644 flow/record/tools/__init__.py
 create mode 100644 flow/record/tools/geoip.py
 create mode 100644 flow/record/tools/rdump.py
 create mode 100644 flow/record/utils.py
 create mode 100644 flow/record/whitelist.py
 create mode 100644 pyproject.toml
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/selector_explain_example.py
 create mode 100644 tests/standalone_test.py
 create mode 100644 tests/test_compiled_selector.py
 create mode 100644 tests/test_fieldtype_ip.py
 create mode 100644 tests/test_fieldtypes.py
 create mode 100644 tests/test_json_packer.py
 create mode 100644 tests/test_json_record_adapter.py
 create mode 100644 tests/test_packer.py
 create mode 100644 tests/test_rdump.py
 create mode 100644 tests/test_record.py
 create mode 100644 tests/test_record_adapter.py
 create mode 100644 tests/test_record_descriptor.py
 create mode 100644 tests/test_regression.py
 create mode 100644 tests/test_selector.py
 create mode 100644 tests/test_splunk_adapter.py
 create mode 100644 tests/utils_inspect.py
 create mode 100644 tox.ini

diff --git a/.github/workflows/dissect-ci.yml b/.github/workflows/dissect-ci.yml
new file mode 100644
index 0000000..4602eeb
--- /dev/null
+++ b/.github/workflows/dissect-ci.yml
@@ -0,0 +1,7 @@
+name: Dissect CI
+on: [push, pull_request, workflow_dispatch]
+
+jobs: 
+  ci:
+    uses: fox-it/dissect-workflow-templates/.github/workflows/dissect-ci-template-self-hosted.yml@main
+    secrets: inherit
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a89302b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+coverage.xml
+.coverage
+dist/
+.eggs/
+*.egg-info/
+*.pyc
+__pycache__/
+.pytest_cache/
+.tox/
+
+flow/record/version.py
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..c055a21
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,5 @@
+Dissect is released as open source by Fox-IT (https://www.fox-it.com) part of NCC Group Plc (https://www.nccgroup.com)
+
+Developed by the Dissect Team (dissect@fox-it.com) and made available at https://github.com/fox-it/flow.record
+
+License terms: AGPL3 (https://www.gnu.org/licenses/agpl-3.0.html)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..be3f7b2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..4b4dd26
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+exclude .gitignore
+exclude .github
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ef882ba
--- /dev/null
+++ b/README.md
@@ -0,0 +1,105 @@
+# flow.record
+
+A library for defining and creating structured data (called records) that can be streamed to disk or piped to other
+tools that use `flow.record`.
+
+Records can be read and transformed to other formats by using output adapters, such as CSV and JSON.
+
+For more information on how Dissect uses this library, please see [the
+documentation](https://dissect.readthedocs.io/en/latest/tools/rdump.html#what-is-a-record).
+
+## Usage
+
+This library contains the tool `rdump`. With `rdump` you can read, write, interact, and manipulate records from `stdin`
+or from record files saved on disk. Please refer to `rdump -h` or to the [`rdump`
+documentation](https://dissect.readthedocs.io/en/latest/tools/rdump.html) for all parameters.
+
+Records are the primary output type when using the various functions of `target-query`. The following command shows how
+to pipe record output from `target-query` to `rdump`:
+
+```shell
+user@dissect~$ target-query -f runkeys targets/EXAMPLE.vmx | rdump
+<windows/registry/run hostname='EXAMPLE' domain='EXAMPLE.local' ts=2022-12-09 12:06:20.037806+00:00 name='OneDriveSetup' path='C:/Windows/SysWOW64/OneDriveSetup.exe /thfirstsetup' key='HKEY_CURRENT_USER\\Software\\Microsoft\\Windows\\CurrentVersion\\Run' hive_filepath='C:\\Windows/ServiceProfiles/LocalService/ntuser.dat' username='LocalService' user_sid='S-1-5-19' user_home='%systemroot%\\ServiceProfiles\\LocalService'>
+<...>
+```
+
+## Programming example
+
+Define a `RecordDescriptor` (schema) and then create a few records and write them to disk
+
+```python
+from flow.record import RecordDescriptor, RecordWriter
+
+# define our descriptor
+MyRecord = RecordDescriptor("my/record", [
+    ("net.ipaddress", "ip"),
+    ("string", "description"),
+])
+
+# define some records
+records = [
+    MyRecord("1.1.1.1", "cloudflare dns"),
+    MyRecord("8.8.8.8", "google dns"),
+]
+
+# write the records to disk
+with RecordWriter("output.records.gz") as writer:
+    for record in records:
+        writer.write(record)
+```
+
+The records can then be read from disk using the `rdump` tool or by instantiating a `RecordReader` when using the
+library.
+
+```shell
+$ rdump output.records.gz
+<my/record ip=net.ipaddress('1.1.1.1') description='cloudflare dns'>
+<my/record ip=net.ipaddress('8.8.8.8') description='google dns'>
+```
+
+### Selectors
+
+We can also use `selectors` for filtering and selecting records using a query (Python like syntax), e.g.:
+
+```shell
+$ rdump output.records.gz -s '"google" in r.description'
+<my/record ip=net.ipaddress('8.8.8.8') description='google dns'>
+
+$ rdump output.records.gz -s 'r.ip in net.ipnetwork("1.1.0.0/16")'
+<my/record ip=net.ipaddress('1.1.1.1') description='cloudflare dns'>
+```
+
+## Build and test instructions
+
+This project uses `tox` to build source and wheel distributions. Run the following command from the root folder to build
+these:
+
+```bash
+tox -e build
+```
+
+The build artifacts can be found in the `dist/` directory.
+
+`tox` is also used to run linting and unit tests in a self-contained environment. To run both linting and unit tests
+using the default installed Python version, run:
+
+```bash
+tox
+```
+
+For a more elaborate explanation on how to build and test the project, please see [the
+documentation](https://dissect.readthedocs.io/en/latest/contributing/developing.html#building-testing).
+
+## Contributing
+
+The Dissect project encourages any contribution to the codebase. To make your contribution fit into the project, please
+refer to [the style guide](https://dissect.readthedocs.io/en/latest/contributing/style-guide.html).
+
+## Copyright and license
+
+Dissect is released as open source by Fox-IT (<https://www.fox-it.com>) part of NCC Group Plc
+(<https://www.nccgroup.com>).
+
+Developed by the Dissect Team (<dissect@fox-it.com>) and made available at <https://github.com/fox-it/dissect>.
+
+License terms: AGPL3 (<https://www.gnu.org/licenses/agpl-3.0.html>). For more information, see the LICENSE file.
diff --git a/examples/filesystem.py b/examples/filesystem.py
new file mode 100644
index 0000000..a8f5524
--- /dev/null
+++ b/examples/filesystem.py
@@ -0,0 +1,108 @@
+import os
+import stat
+
+from datetime import datetime
+
+from flow.record import RecordDescriptor, RecordWriter
+
+FilesystemFile = RecordDescriptor("""
+filesystem/unix/entry
+    string path;
+    varint inode;
+    varint dev;
+    unix_file_mode mode;
+    filesize size;
+    uint32 uid;
+    uint32 gid;
+    datetime ctime;
+    datetime mtime;
+    datetime atime;
+    string link;
+""")
+
+
+def hash_file(path, t):
+    f = open(path, "rb")
+    while 1:
+        d = f.read(4096)
+        if d == "":
+            break
+    f.close()
+
+
+class FilesystemIterator:
+    basepath = None
+
+    def __init__(self, basepath):
+        self.basepath = basepath
+        self.recordType = FilesystemFile
+
+    def classify(self, source, classification):
+        self.recordType = FilesystemFile.base(_source=source, _classification=classification)
+
+    def iter(self, path):
+        path = os.path.abspath(path)
+        return self._iter(path)
+
+    def _iter(self, path):
+        if path.startswith("/proc"):
+            return
+
+        st = os.lstat(path)
+
+        abspath = path
+        if self.basepath and abspath.startswith(self.basepath):
+            abspath = abspath[len(self.basepath):]
+
+        ifmt = stat.S_IFMT(st.st_mode)
+
+        link = None
+        if ifmt == stat.S_IFLNK:
+            link = os.readlink(path)
+
+        yield self.recordType(
+            path=abspath,
+            inode=int(st.st_ino),
+            dev=int(st.st_dev),
+            mode=st.st_mode,
+            size=st.st_size,
+            uid=st.st_uid,
+            gid=st.st_gid,
+            ctime=datetime.fromtimestamp(st.st_ctime),
+            mtime=datetime.fromtimestamp(st.st_mtime),
+            atime=datetime.fromtimestamp(st.st_atime),
+            link=link,
+        )
+
+        if ifmt == stat.S_IFDIR:
+            for i in os.listdir(path):
+                if i in (".", ".."):
+                    continue
+
+                fullpath = os.path.join(path, i)
+                for e in self.iter(fullpath):
+                    yield e
+
+chunk = []
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('target', metavar="TARGET", nargs="*")
+    parser.add_argument('-s', dest='source', help="Source")
+    parser.add_argument('-c', dest='classification', help="Classification")
+    parser.add_argument('-b', dest='base', help="Base directory")
+
+    args = parser.parse_args()
+
+    stream = RecordWriter()
+
+    fsiter = FilesystemIterator(args.base)
+
+    if args.source or args.classification:
+        fsiter.classify(args.source, args.classification)
+
+    for path in args.target:
+        for r in fsiter.iter(path):
+            stream.write(r)
diff --git a/examples/passivedns.py b/examples/passivedns.py
new file mode 100644
index 0000000..be05359
--- /dev/null
+++ b/examples/passivedns.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env pypy
+import record
+import sys
+import datetime
+
+import net.ipv4
+
+from fileprocessing import DirectoryProcessor
+
+
+def ts(s):
+    return datetime.datetime.fromtimestamp(float(s))
+
+
+def ip(s):
+    return net.ipv4.Address(s)
+
+
+class SeparatedFile:
+    fp = None
+    seperator = None
+    format = None
+
+    def __init__(self, fp, seperator, format):
+        self.fp = fp
+        self.seperator = seperator
+        self.format = format
+
+    def __iter__(self):
+        desc = record.RecordDescriptor([i[0] for i in PASSIVEDNS_FORMAT])
+        recordtype = desc.recordType
+
+        for l in self.fp:
+            p = l.strip().split(self.seperator)
+
+            r = {}
+            for i in range(len(self.format)):
+                field = self.format[i]
+
+                v = p[i]
+                if field[1]:
+                    v = field[1](v)
+
+                r[field[0]] = v
+
+            yield recordtype(**r)
+
+
+def PassiveDnsFile(fp):
+    return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
+
+PASSIVEDNS_FORMAT = [
+    ("ts", ts),
+    ("src", ip),
+    ("dst", ip),
+    ("family", None),
+    ("query", None),
+    ("query_type", None),
+    ("result", None),
+    ("ttl", int),
+    ("x", None),
+]
+
+
+def main():
+    rs = record.RecordOutput(sys.stdout)
+    for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
+        rs.write(r)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/records.json b/examples/records.json
new file mode 100644
index 0000000..30a415a
--- /dev/null
+++ b/examples/records.json
@@ -0,0 +1,2 @@
+{"_type": "recorddescriptor", "_data": ["text/paste", [["string", "key"], ["datetime", "date"], ["datetime", "expire_date"], ["wstring", "title"], ["wstring", "content"], ["wstring", "user"], ["wstring", "syntax"]]]}
+{"_classification": "PUBLIC", "_generated": "2019-03-19T09:11:04.706581", "_source": "external/pastebin", "_type": "record", "_recorddescriptor": ["text/paste", 831446724], "_version": 1, "content": "This is the content of a sampe pastebin record", "date": "2019-03-19T09:09:47", "expire_date": "1970-01-01T00:00:00", "key": "Q42eWSaF", "syntax": "text", "title": "A sample pastebin record", "user": ""}
diff --git a/examples/tcpconn.py b/examples/tcpconn.py
new file mode 100644
index 0000000..46fa7c4
--- /dev/null
+++ b/examples/tcpconn.py
@@ -0,0 +1,43 @@
+import random
+
+from datetime import datetime
+from flow import record
+
+conn = record.RecordDescriptor("""
+network/traffic/tcp/connection
+    datetime ts;
+    net.ipv4.Address src;
+    net.tcp.Port srcport;
+    net.ipv4.Address dst;
+    net.tcp.Port dstport;
+""")
+
+ip_list = [
+    "127.0.0.1",
+    "1.2.3.4",
+    "212.33.1.45",
+    "4.4.4.4",
+    "8.8.8.8",
+    "212.1.6.1",
+]
+
+port_list = [
+    22,
+    53,
+    80,
+    443,
+    5555
+]
+
+rs = record.RecordWriter()
+
+for i in range(500):
+    r = conn(
+        ts=datetime.now(),
+        src=random.choice(ip_list),
+        srcport=random.choice(port_list),
+        dst=random.choice(ip_list),
+        dstport=random.choice(port_list)
+    )
+
+    rs.write(r)
diff --git a/flow/record/__init__.py b/flow/record/__init__.py
new file mode 100644
index 0000000..1d29015
--- /dev/null
+++ b/flow/record/__init__.py
@@ -0,0 +1,79 @@
+import os
+
+import gzip
+
+from flow.record.base import (
+    RECORD_VERSION,
+    FieldType,
+    Record,
+    GroupedRecord,
+    RecordDescriptor,
+    RecordAdapter,
+    RecordField,
+    RecordReader,
+    RecordWriter,
+    open_path,
+    stream,
+    extend_record,
+    dynamic_fieldtype,
+    DynamicDescriptor,
+    RecordDescriptorError,
+)
+from flow.record.jsonpacker import JsonRecordPacker
+from flow.record.stream import (
+    RecordOutput,
+    RecordPrinter,
+    RecordPacker,
+    RecordStreamWriter,
+    RecordStreamReader,
+    PathTemplateWriter,
+    RecordArchiver,
+    record_stream,
+)
+
+__all__ = [
+    'RECORD_VERSION', 'FieldType', 'Record', 'GroupedRecord',
+    'RecordDescriptor', 'RecordAdapter', 'RecordField', 'RecordReader',
+    'RecordWriter', 'RecordOutput', 'RecordPrinter', 'RecordPacker',
+    'JsonRecordPacker', 'RecordStreamWriter', 'RecordStreamReader',
+    'open_path', 'stream', 'dynamic_fieldtype', 'DynamicDescriptor',
+    'PathTemplateWriter', 'RecordArchiver', 'RecordDescriptorError',
+    'record_stream', 'extend_record',
+]
+
+
+class View:
+    fields = None
+
+    def __init__(self, fields):
+        self.fields = fields
+
+    def __iter__(self, fields):
+        pass
+
+
+class RecordDateSplitter:
+    basepath = None
+    out = None
+
+    def __init__(self, basepath):
+        self.basepath = basepath
+        self.out = {}
+
+    def getstream(self, t):
+        if t not in self.out:
+            path = os.path.join(self.basepath, "-".join(["{:2d}".format(v) for v in t]) + ".rec.gz")
+            f = gzip.GzipFile(path, "wb")
+            rs = RecordStreamWriter(f)
+            self.out[t] = rs
+        return self.out[t]
+
+    def write(self, r):
+        t = (r.ts.year, r.ts.month, r.ts.day)
+        rs = self.getstream(t)
+        rs.write(r)
+        rs.fp.flush()
+
+    def close(self):
+        for rs in self.out.values():
+            rs.close()
diff --git a/flow/record/adapter/__init__.py b/flow/record/adapter/__init__.py
new file mode 100644
index 0000000..f244376
--- /dev/null
+++ b/flow/record/adapter/__init__.py
@@ -0,0 +1,64 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)  # make this namespace extensible from other packages
+import abc
+
+
+def with_metaclass(meta, *bases):
+    """Create a base class with a metaclass. Python 2 and 3 compatible."""
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(type):
+
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+
+        @classmethod
+        def __prepare__(cls, name, this_bases):
+            return meta.__prepare__(name, bases)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+class AbstractWriter(with_metaclass(abc.ABCMeta, object)):
+
+    @abc.abstractmethod
+    def write(self, rec):
+        """Write a record."""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def flush(self):
+        """Flush any buffered writes."""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def close(self):
+        """Close the Writer, no more writes will be possible."""
+        raise NotImplementedError
+
+    def __del__(self):
+        self.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.flush()
+        self.close()
+
+
+class AbstractReader(with_metaclass(abc.ABCMeta, object)):
+
+    @abc.abstractmethod
+    def __iter__(self):
+        """Return a record iterator."""
+        raise NotImplementedError
+
+    def close(self):
+        """Close the Reader, can be overriden to properly free resources."""
+        pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
diff --git a/flow/record/adapter/archive.py b/flow/record/adapter/archive.py
new file mode 100644
index 0000000..0086282
--- /dev/null
+++ b/flow/record/adapter/archive.py
@@ -0,0 +1,32 @@
+from flow.record.adapter import AbstractReader, AbstractWriter
+from flow.record.stream import RecordArchiver
+
+
+class ArchiveWriter(AbstractWriter):
+    writer = None
+
+    def __init__(self, path, **kwargs):
+        self.path = path
+
+        path_template = kwargs.get("path_template")
+        name = kwargs.get("name")
+
+        self.writer = RecordArchiver(self.path, path_template=path_template, name=name)
+
+    def write(self, r):
+        self.writer.write(r)
+
+    def flush(self):
+        # RecordArchiver already flushes after every write
+        pass
+
+    def close(self):
+        if self.writer:
+            self.writer.close()
+        self.writer = None
+
+
+class ArchiveReader(AbstractReader):
+
+    def __init__(self, path, **kwargs):
+        raise NotImplementedError
diff --git a/flow/record/adapter/avro.py b/flow/record/adapter/avro.py
new file mode 100644
index 0000000..c870d72
--- /dev/null
+++ b/flow/record/adapter/avro.py
@@ -0,0 +1,192 @@
+import json
+from importlib.util import find_spec
+from datetime import datetime, timedelta, timezone
+
+import fastavro
+
+from flow import record
+from flow.record.utils import is_stdout
+from flow.record.selector import make_selector
+from flow.record.adapter import AbstractReader, AbstractWriter
+
+
+AVRO_TYPE_MAP = {
+    "boolean": "boolean",
+    "datetime": "long",
+    "filesize": "long",
+    "uint16": "int",
+    "uint32": "int",
+    "float": "float",
+    "string": "string",
+    "unix_file_mode": "long",
+    "varint": "long",
+    "wstring": "string",
+    "uri": "string",
+    "digest": "bytes",
+    "bytes": "bytes",
+}
+
+RECORD_TYPE_MAP = {
+    "boolean": "boolean",
+    "int": "varint",
+    "long": "varint",
+    "float": "float",
+    "string": "string",
+    "bytes": "bytes",
+}
+
+EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+
+
+class AvroWriter(AbstractWriter):
+    fp = None
+    writer = None
+
+    def __init__(self, path, key=None, **kwargs):
+        self.fp = record.open_path(path, "wb")
+
+        self.desc = None
+        self.schema = None
+        self.parsed_schema = None
+        self.writer = None
+        self.codec = 'snappy' if find_spec('snappy') else 'deflate'
+
+    def write(self, r):
+        if not self.desc:
+            self.desc = r._desc
+            self.schema = descriptor_to_schema(self.desc)
+            self.parsed_schema = fastavro.parse_schema(self.schema)
+            self.writer = fastavro.write.Writer(self.fp, self.parsed_schema, codec=self.codec)
+
+        if self.desc != r._desc:
+            raise Exception("Mixed record types")
+
+        self.writer.write(r._packdict())
+
+    def flush(self):
+        if self.writer:
+            self.writer.flush()
+
+    def close(self):
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
+        self.writer = None
+
+
+class AvroReader(AbstractReader):
+    fp = None
+
+    def __init__(self, path, selector=None, **kwargs):
+        self.fp = record.open_path(path, "rb")
+        self.selector = make_selector(selector)
+
+        self.reader = fastavro.reader(self.fp)
+        self.schema = self.reader.schema
+        if not self.schema:
+            raise Exception("Missing Avro schema")
+
+        self.desc = schema_to_descriptor(self.schema)
+
+        # Store the fieldnames that are of type "datetime"
+        self.datetime_fields = set(
+            name
+            for name, field in self.desc.get_all_fields().items()
+            if field.typename == "datetime"
+        )
+
+    def __iter__(self):
+        for obj in self.reader:
+            # Convert timestamp-micros fields back to datetime fields
+            for field_name in self.datetime_fields:
+                value = obj.get(field_name, None)
+                if isinstance(value, (int, float)) and value > 0xffffffff:
+                    obj[field_name] = EPOCH + timedelta(microseconds=value)
+
+            rec = self.desc.recordType(**obj)
+            if not self.selector or self.selector.match(rec):
+                yield rec
+
+    def close(self):
+        if self.fp:
+            self.fp.close()
+        self.fp = None
+
+
+def descriptor_to_schema(desc):
+    namespace, _, name = desc.name.rpartition("/")
+    schema = {
+        "type": "record",
+        "namespace": namespace,
+        "name": name,
+        "doc": json.dumps(desc._pack()),
+        "fields": [],
+    }
+
+    fields = []
+    for rf in desc.get_all_fields().values():
+        field_name = rf.name
+        field_type = rf.typename
+        field_schema = {
+            "name": field_name,
+        }
+
+        if field_type == "datetime":
+            field_schema["type"] = [{"type": "long", "logicalType": "timestamp-micros"}, {"type": "null"}]
+        else:
+            avro_type = AVRO_TYPE_MAP.get(field_type)
+            if not avro_type:
+                raise Exception("Unsupported Avro type: {}".format(field_type))
+
+            field_schema["type"] = [avro_type, "null"]
+
+        fields.append(field_schema)
+
+    schema["fields"] = fields
+    return schema
+
+
+def schema_to_descriptor(schema):
+    doc = schema.get("doc")
+
+    # Sketchy record descriptor detection
+    if doc and doc.startswith("[\"") and doc.endswith("]]]"):
+        name, fields = json.loads(doc)
+    else:
+        # No embedded record descriptor, attempt to generate one from the schema
+        name = "/".join([schema.get("namespace", ""), schema.get("name", "")]).replace(".", "/").strip("/")
+        fields = []
+
+        for f in schema.get("fields", []):
+            field_name = f["name"]
+            if field_name.startswith("_"):
+                continue
+
+            field_type = avro_type_to_flow_type(f["type"])
+            fields.append([field_type, field_name])
+
+    return record.RecordDescriptor(name, fields)
+
+
+def avro_type_to_flow_type(ftype):
+    ftypes = [ftype] if not isinstance(ftype, list) else ftype
+
+    # If a field can be null, it has an additional type of "null"
+    # So iterate over all the types, and break when we have a valid one
+    for t in ftypes:
+        if isinstance(t, dict):
+            if t.get("type") == "array":
+                item_type = avro_type_to_flow_type(t.get("items"))
+                return "{}[]".format(item_type)
+            else:
+                logical_type = t.get("logicalType")
+                if logical_type and "time" in logical_type or "date" in logical_type:
+                    return "datetime"
+
+        if t == "null":
+            continue
+
+        if t in RECORD_TYPE_MAP:
+            return RECORD_TYPE_MAP[t]
+
+    raise TypeError("Can't map avro type to flow type: {}".format(t))
diff --git a/flow/record/adapter/broker.py b/flow/record/adapter/broker.py
new file mode 100644
index 0000000..6a2dfaf
--- /dev/null
+++ b/flow/record/adapter/broker.py
@@ -0,0 +1,47 @@
+from flow.record.adapter import AbstractWriter, AbstractReader
+from flow.broker import Publisher, Subscriber
+
+
+class BrokerWriter(AbstractWriter):
+    publisher = None
+
+    def __init__(self, uri, source=None, classification=None, **kwargs):
+        self.publisher = Publisher(uri, **kwargs)
+        self.source = source
+        self.classification = classification
+
+    def write(self, r):
+        record = r._replace(
+            _source=self.source or r._source,
+            _classification=self.classification or r._classification,
+        )
+        self.publisher.send(record)
+
+    def flush(self):
+        if self.publisher:
+            self.publisher.flush()
+
+    def close(self):
+        if self.publisher:
+            if hasattr(self.publisher, "stop"):
+                # Requires flow.broker >= 1.1.1
+                self.publisher.stop()
+            else:
+                self.publisher.wait()
+        self.publisher = None
+
+
+class BrokerReader(AbstractReader):
+    subscriber = None
+
+    def __init__(self, uri, name=None, selector=None, **kwargs):
+        self.subscriber = Subscriber(uri, **kwargs)
+        self.subscription = self.subscriber.select(name, str(selector))
+
+    def __iter__(self):
+        return iter(self.subscription)
+
+    def close(self):
+        if self.subscriber:
+            self.subscriber.stop()
+        self.subscriber = None
diff --git a/flow/record/adapter/csvfile.py b/flow/record/adapter/csvfile.py
new file mode 100644
index 0000000..cbb6622
--- /dev/null
+++ b/flow/record/adapter/csvfile.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import
+
+import sys
+from csv import DictWriter
+
+from flow.record import open_path
+from flow.record.utils import is_stdout
+from flow.record.adapter import AbstractWriter
+
+
+class CsvfileWriter(AbstractWriter):
+    fp = None
+
+    def __init__(self, path, fields=None, exclude=None, **kwargs):
+        mode = "w"
+        if sys.version_info[0] < 3:
+            mode = "wb"
+        self.fp = open_path(path, mode)
+        self.desc = None
+        self.writer = None
+        self.fields = fields
+        self.exclude = exclude
+        if isinstance(self.fields, str):
+            self.fields = self.fields.split(",")
+        if isinstance(self.exclude, str):
+            self.exclude = self.exclude.split(",")
+
+    def write(self, r):
+        rdict = r._asdict(fields=self.fields, exclude=self.exclude)
+        if not self.desc or self.desc != r._desc:
+            self.desc = r._desc
+            self.writer = DictWriter(self.fp, rdict)
+            self.writer.writeheader()
+        self.writer.writerow(rdict)
+
+    def flush(self):
+        if self.fp:
+            self.fp.flush()
+
+    def close(self):
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
diff --git a/flow/record/adapter/elastic.py b/flow/record/adapter/elastic.py
new file mode 100644
index 0000000..38c1b1c
--- /dev/null
+++ b/flow/record/adapter/elastic.py
@@ -0,0 +1,43 @@
+import elasticsearch
+import elasticsearch.helpers
+
+from flow.record.adapter import AbstractWriter, AbstractReader
+
+
+def index_stream(index, it):
+    for r in it:
+        d = r.dict()
+        if "Value" in d:
+            del d["Value"]
+
+        yield {
+            "_index": index,
+            "_type": "event_" + str(d["EventID"]),
+            "_source": d,
+        }
+
+
+class ElasticWriter(AbstractWriter):
+
+    def __init__(self, index, **kwargs):
+        self.index = index
+
+        self.es = elasticsearch.Elasticsearch()
+
+    # def writeblob(self, src):
+    #   count = elasticsearch.helpers.bulk(es, index_stream("logtest", src))
+
+    def write(self, r):
+        self.es.index({"_index": self.index, "_type": r._desc.name, "_source": r.dict()})
+
+    def flush(self):
+        pass
+
+    def close(self):
+        pass
+
+
+class ElasticReader(AbstractReader):
+
+    def __iter__(self, r, **kwargs):
+        raise NotImplementedError()
diff --git a/flow/record/adapter/jsonfile.py b/flow/record/adapter/jsonfile.py
new file mode 100644
index 0000000..16ab985
--- /dev/null
+++ b/flow/record/adapter/jsonfile.py
@@ -0,0 +1,68 @@
+import json
+from flow import record
+from flow.record import JsonRecordPacker
+from flow.record.utils import is_stdout
+from flow.record.selector import make_selector
+from flow.record.adapter import AbstractWriter, AbstractReader
+from flow.record.fieldtypes import fieldtype_for_value
+
+
+class JsonfileWriter(AbstractWriter):
+    fp = None
+
+    def __init__(self, path, indent=None, **kwargs):
+        self.fp = record.open_path(path, "w")
+        if isinstance(indent, str):
+            indent = int(indent)
+        self.packer = JsonRecordPacker(indent=indent)
+        self.packer.on_descriptor.add_handler(self.packer_on_new_descriptor)
+
+    def packer_on_new_descriptor(self, descriptor):
+        self._write(descriptor)
+
+    def _write(self, obj):
+        record_json = self.packer.pack(obj)
+        self.fp.write(record_json + u"\n")
+
+    def write(self, r):
+        self._write(r)
+
+    def flush(self):
+        if self.fp:
+            self.fp.flush()
+
+    def close(self):
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
+
+
+class JsonfileReader(AbstractReader):
+    fp = None
+
+    def __init__(self, path, selector=None, **kwargs):
+        self.selector = make_selector(selector)
+        self.fp = record.open_path(path, "r")
+        self.packer = JsonRecordPacker()
+
+    def close(self):
+        if self.fp:
+            self.fp.close()
+        self.fp = None
+
+    def __iter__(self):
+        for line in self.fp:
+            obj = self.packer.unpack(line)
+            if isinstance(obj, record.Record):
+                if not self.selector or self.selector.match(obj):
+                    yield obj
+            elif isinstance(obj, record.RecordDescriptor):
+                pass
+            else:
+                # fallback for plain jsonlines (non flow.record format)
+                jd = json.loads(line)
+                fields = [(fieldtype_for_value(val, "string"), key) for key, val in jd.items()]
+                desc = record.RecordDescriptor("json/record", fields)
+                obj = desc(**jd)
+                if not self.selector or self.selector.match(obj):
+                    yield obj
diff --git a/flow/record/adapter/line.py b/flow/record/adapter/line.py
new file mode 100644
index 0000000..b38f906
--- /dev/null
+++ b/flow/record/adapter/line.py
@@ -0,0 +1,37 @@
+from flow.record.adapter import AbstractWriter
+from flow.record import open_path
+from flow.record.utils import is_stdout
+
+
+class LineWriter(AbstractWriter):
+    """Prints all fields and values of the Record on a separate line."""
+
+    fp = None
+
+    def __init__(self, path, fields=None, exclude=None, **kwargs):
+        self.fp = open_path(path, "wb")
+        self.count = 0
+        self.fields = fields
+        self.exclude = exclude
+        if isinstance(self.fields, str):
+            self.fields = self.fields.split(",")
+        if isinstance(self.exclude, str):
+            self.exclude = self.exclude.split(",")
+
+    def write(self, rec):
+        rdict = rec._asdict(fields=self.fields, exclude=self.exclude)
+        self.count += 1
+        self.fp.write("--[ RECORD {} ]--\n".format(self.count).encode())
+        if rdict:
+            fmt = "{{:>{width}}} = {{}}\n".format(width=max(len(k) for k in rdict))
+        for (key, value) in rdict.items():
+            self.fp.write(fmt.format(key, value).encode())
+
+    def flush(self):
+        if self.fp:
+            self.fp.flush()
+
+    def close(self):
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
diff --git a/flow/record/adapter/mongo.py b/flow/record/adapter/mongo.py
new file mode 100644
index 0000000..69c34c5
--- /dev/null
+++ b/flow/record/adapter/mongo.py
@@ -0,0 +1,91 @@
+import bson
+from flow import record
+from flow.record.adapter import AbstractReader, AbstractWriter
+from flow.record.selector import make_selector
+from pymongo import MongoClient
+
+
+def parse_path(path):
+    elements = path.strip("/").split("/", 2)  # max 3 elements
+    if len(elements) == 2:
+        return "localhost", elements[0], elements[1]
+    if len(elements) == 3:
+        return tuple(elements)
+    raise ValueError("Invalid mongo path")
+
+
+class MongoWriter(AbstractWriter):
+    client = None
+
+    def __init__(self, path, key=None, **kwargs):
+        dbhost, dbname, collection = parse_path(path)
+
+        self.key = key
+        self.client = MongoClient(host=dbhost)
+        self.db = self.client[dbname]
+        self.collection = self.db[collection]
+        self.coll_descriptors = self.db["_descriptors"]
+        self.descriptors = {}
+
+    def write(self, r):
+        d = r._packdict()
+        d["_type"] = r._desc.identifier
+
+        if r._desc.identifier not in self.descriptors:
+            self.coll_descriptors.find_and_modify(
+                {"name": r._desc.identifier},
+                {"name": r._desc.identifier, "descriptor": r._desc._pack()},
+                upsert=True)
+
+        if self.key:
+            # i = self.collection.replace({self.key: d[self.key]}, d) # PyMongo3
+            self.collection.find_and_modify({self.key: d[self.key]}, d, upsert=True)  # PyMongo2
+        else:
+            self.collection.insert(d)
+
+    def flush(self):
+        pass
+
+    def close(self):
+        if self.client:
+            self.client.close()
+        self.client = None
+
+
+class MongoReader(AbstractReader):
+    client = None
+
+    def __init__(self, path, selector=None, **kwargs):
+        dbhost, dbname, collection = parse_path(path)
+
+        self.selector = make_selector(selector)
+        self.client = MongoClient(host=dbhost)
+        self.db = self.client[dbname]
+        self.collection = self.db[collection]
+        self.coll_descriptors = self.db["_descriptors"]
+        self.descriptors = {}
+
+    def close(self):
+        if self.client:
+            self.client.close()
+        self.client = None
+
+    def __iter__(self):
+        desc = None
+        for r in self.collection.find():
+            if r["_type"] not in self.descriptors:
+                packed_desc = self.coll_descriptors.find({"name": r["_type"]})[0]["descriptor"]
+                self.descriptors[r["_type"]] = record.RecordDescriptor(*packed_desc)
+
+            desc = self.descriptors[r["_type"]]
+
+            del r["_id"]
+            del r["_type"]
+
+            for k in list(r.keys()):
+                if isinstance(r[k], bson.int64.Int64):
+                    r[k] = int(r[k])
+
+            obj = desc(**r)
+            if not self.selector or self.selector.match(obj):
+                yield obj
diff --git a/flow/record/adapter/splunk.py b/flow/record/adapter/splunk.py
new file mode 100644
index 0000000..8d6c0de
--- /dev/null
+++ b/flow/record/adapter/splunk.py
@@ -0,0 +1,82 @@
+import socket
+import logging
+
+from flow.record.adapter import AbstractReader, AbstractWriter
+from flow.record.utils import to_str, to_bytes, to_base64
+
+
+log = logging.getLogger(__package__)
+
+RESERVED_SPLUNK_FIELDS = set([
+    '_indextime',
+    '_time',
+    'index',
+    'punct',
+    'source',
+    'sourcetype',
+    'tag',
+])
+
+
+def splunkify(record, tag=None):
+    ret = []
+
+    ret.append(f'type="{record._desc.name}"')
+
+    if tag is None:
+        ret.append('rdtag=None')
+    else:
+        ret.append(f'rdtag="{tag}"')
+
+    for field in record._desc.fields:
+        val = getattr(record, field)
+        if val is None:
+            ret.append(f'{field}=None')
+        else:
+            val = to_base64(val) if isinstance(val, bytes) else to_str(val)
+            val = val.replace('\\', '\\\\').replace('"', '\\"')
+            if field in RESERVED_SPLUNK_FIELDS:
+                field = f'rd_{field}'
+            ret.append(f'{field}="{val}"')
+
+    return " ".join(ret)
+
+
+class SplunkWriter(AbstractWriter):
+    sock = None
+
+    def __init__(self, path, tag=None, **kwargs):
+        p = path.strip("/").split("/")
+        host, port = p[0].split(":")
+        port = int(port)
+
+        self.tag = tag
+        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
+        self.sock.connect((host, port))
+        self.descriptors = {}
+        self._warned = False
+
+    def write(self, record):
+        if not self._warned and 'rdtag' in record._desc.fields:
+            self._warned = True
+            log.warning(
+                "Record has 'rdtag' field which conflicts with the Splunk adapter -- "
+                "Splunk output will have duplicate 'rdtag' fields",
+            )
+        rec = splunkify(record, tag=self.tag)
+        data = to_bytes(rec) + b"\n"
+        self.sock.sendall(data)
+
+    def flush(self):
+        pass
+
+    def close(self):
+        if self.sock:
+            self.sock.close()
+        self.sock = None
+
+
+class SplunkReader(AbstractReader):
+
+    def __init__(self, path, selector=None, **kwargs):
+        raise NotImplementedError()
diff --git a/flow/record/adapter/stream.py b/flow/record/adapter/stream.py
new file mode 100644
index 0000000..c07ba4b
--- /dev/null
+++ b/flow/record/adapter/stream.py
@@ -0,0 +1,51 @@
+from flow import record
+from flow.record.utils import is_stdout
+from flow.record.adapter import AbstractReader, AbstractWriter
+
+
+class StreamWriter(AbstractWriter):
+    fp = None
+    stream = None
+
+    def __init__(self, path, clobber=True, **kwargs):
+        self.fp = record.open_path(path, "wb", clobber=clobber)
+        self.stream = record.RecordOutput(self.fp)
+
+    def write(self, r):
+        self.stream.write(r)
+
+    def flush(self):
+        if self.stream and hasattr(self.stream, "flush"):
+            self.stream.flush()
+        if self.fp:
+            self.fp.flush()
+
+    def close(self):
+        if self.stream:
+            self.stream.close()
+        self.stream = None
+
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
+
+
+class StreamReader(AbstractReader):
+    fp = None
+    stream = None
+
+    def __init__(self, path, selector=None, **kwargs):
+        self.fp = record.open_path(path, "rb")
+        self.stream = record.RecordStreamReader(self.fp, selector=selector)
+
+    def __iter__(self):
+        return iter(self.stream)
+
+    def close(self):
+        if self.stream:
+            self.stream.close()
+        self.stream = None
+
+        if self.fp:
+            self.fp.close()
+        self.fp = None
diff --git a/flow/record/adapter/text.py b/flow/record/adapter/text.py
new file mode 100644
index 0000000..1e8ce06
--- /dev/null
+++ b/flow/record/adapter/text.py
@@ -0,0 +1,50 @@
+from flow.record import open_path
+from flow.record.utils import is_stdout
+from flow.record.adapter import AbstractWriter
+
+REPLACE_LIST = [
+    (r"\r", "\r"),
+    (r"\n", "\n"),
+    (r"\t", "\t"),
+]
+
+
+class DefaultMissing(dict):
+    def __missing__(self, key):
+        return key.join("{}")
+
+
+class TextWriter(AbstractWriter):
+    """Records are printed as textual representation with repr() or using `format_spec`."""
+
+    fp = None
+
+    def __init__(self, path, flush=True, format_spec=None, **kwargs):
+        self.fp = open_path(path, "wb")
+        self.auto_flush = flush
+        self.format_spec = format_spec
+
+        # Allow some special characters in format template
+        if self.format_spec:
+            for old, new in REPLACE_LIST:
+                self.format_spec = self.format_spec.replace(old, new)
+
+    def write(self, rec):
+        if self.format_spec:
+            buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
+        else:
+            buf = repr(rec)
+        self.fp.write(buf.encode() + b"\n")
+
+        # because stdout is usually line buffered we force flush here if wanted
+        if self.auto_flush:
+            self.flush()
+
+    def flush(self):
+        if self.fp:
+            self.fp.flush()
+
+    def close(self):
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
diff --git a/flow/record/adapter/xlsx.py b/flow/record/adapter/xlsx.py
new file mode 100644
index 0000000..311af1d
--- /dev/null
+++ b/flow/record/adapter/xlsx.py
@@ -0,0 +1,65 @@
+import openpyxl
+
+from flow import record
+from flow.record.utils import is_stdout
+from flow.record.selector import make_selector
+from flow.record.adapter import AbstractWriter, AbstractReader
+
+
+class XlsxWriter(AbstractWriter):
+    fp = None
+    wb = None
+
+    def __init__(self, path, **kwargs):
+        self.fp = record.open_path(path, "wb")
+        self.wb = openpyxl.Workbook()
+        self.ws = self.wb.active
+        self.desc = None
+        # self.ws.title = "Records"
+
+    def write(self, r):
+        if not self.desc:
+            self.desc = r._desc
+            self.ws.append(r._desc.fields)
+
+        self.ws.append(r._asdict().values())
+
+    def flush(self):
+        if self.wb:
+            self.wb.save(self.fp)
+
+    def close(self):
+        if self.wb:
+            self.wb.close()
+        self.wb = None
+
+        if self.fp and not is_stdout(self.fp):
+            self.fp.close()
+        self.fp = None
+
+
+class XlsxReader(AbstractReader):
+    fp = None
+
+    def __init__(self, path, selector=None, **kwargs):
+        self.selector = make_selector(selector)
+        self.fp = record.open_path(path, "rb")
+        self.desc = None
+        self.wb = openpyxl.load_workbook(self.fp)
+        self.ws = self.wb.active
+
+    def close(self):
+        if self.fp:
+            self.fp.close()
+        self.fp = None
+
+    def __iter__(self):
+        desc = None
+        for row in self.ws.rows:
+            if not desc:
+                desc = record.RecordDescriptor([col.value.replace(" ", "_").lower() for col in row])
+                continue
+
+            obj = desc(*[col.value for col in row])
+            if not self.selector or self.selector.match(obj):
+                yield obj
diff --git a/flow/record/base.py b/flow/record/base.py
new file mode 100644
index 0000000..f1730c2
--- /dev/null
+++ b/flow/record/base.py
@@ -0,0 +1,807 @@
+import importlib
+import io
+import re
+import os
+import sys
+import gzip
+import struct
+import logging
+import keyword
+import hashlib
+import functools
+import collections
+try:
+    # Python 2
+    import urlparse
+except ImportError:
+    # Python 3
+    import urllib.parse as urlparse
+try:
+    import lz4.frame as lz4
+    HAS_LZ4 = True
+except ImportError:
+    HAS_LZ4 = False
+try:
+    import bz2
+    HAS_BZ2 = True
+except ImportError:
+    HAS_BZ2 = False
+try:
+    import zstandard as zstd
+    HAS_ZSTD = True
+except ImportError:
+    HAS_ZSTD = False
+
+from collections import OrderedDict
+from operator import itemgetter as _itemgetter
+from .whitelist import WHITELIST, WHITELIST_TREE
+from .utils import to_str, to_native_str
+
+log = logging.getLogger(__package__)
+
+RECORD_VERSION = 1
+RESERVED_FIELDS = OrderedDict([
+    ("_source", "string"),
+    ("_classification", "string"),
+    ("_generated", "datetime"),
+    # For compatibility reasons, always add new reserved fields BEFORE
+    # the _version field, but AFTER the second to last field
+    ("_version", "varint"),
+])
+
+# Compression Headers
+GZIP_MAGIC = b"\x1f\x8b"
+BZ2_MAGIC = b"BZh"
+LZ4_MAGIC = b"\x04\x22\x4d\x18"
+ZSTD_MAGIC = b"\x28\xb5\x2f\xfd"
+
+RE_VALID_FIELD_NAME = re.compile(r"^_?[a-zA-Z][a-zA-Z0-9_]*(?:\[\])?$")
+RE_VALID_RECORD_TYPE_NAME = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(/[a-zA-Z][a-zA-Z0-9_]*)*$")
+
+RECORD_CLASS_TEMPLATE = """
+from datetime import datetime
+from itertools import zip_longest
+
+class {name}(Record):
+    _desc = desc
+    _field_types = {field_types}
+
+    __slots__ = {slots_tuple}
+
+    def __init__(__self, {args}):
+{init_code}
+
+    @classmethod
+    def _unpack(__cls, {args}):
+{unpack_code}
+"""
+
+
+class Peekable:
+    """Wrapper class for adding .peek() to a file object."""
+
+    def __init__(self, fd):
+        self.fd = fd
+        self.buffer = None
+
+    def peek(self, size):
+        if self.buffer is not None:
+            raise BufferError("Only 1 peek allowed")
+        data = self.fd.read(size)
+        self.buffer = io.BytesIO(data)
+        return data
+
+    def read(self, size=None):
+        data = b""
+        if self.buffer is None:
+            data = self.fd.read(size)
+        else:
+            data = self.buffer.read(size)
+            if len(data) < size:
+                data += self.fd.read(size - len(data))
+                self.buffer = None
+        return data
+
+    def close(self):
+        self.buffer = None
+        self.fd.close()
+        self.fd = None
+
+
+class RecordDescriptorError(Exception):
+    pass
+
+
+class FieldType:
+
+    def _typename(self):
+        t = type(self)
+        t.__module__.split(".fieldtypes.")[1] + "." + t.__name__
+
+    @classmethod
+    def default(cls):
+        """Return the default value for the field in the Record template."""
+        return None
+
+    @classmethod
+    def _unpack(cls, data):
+        return data
+
+
+class Record:
+    __slots__ = ()
+
+    def __eq__(self, other):
+        if not isinstance(other, Record):
+            return False
+        return self._pack() == other._pack()
+
+    def _pack(self, unversioned=False):
+        values = []
+        for k in self.__slots__:
+            v = getattr(self, k)
+            v = v._pack() if isinstance(v, FieldType) else v
+
+            # Skip version field if requested (only for compatibility reasons)
+            if unversioned and k == "_version" and v == 1:
+                continue
+            else:
+                values.append(v)
+
+        return self._desc.identifier, tuple(values)
+
+    def _packdict(self):
+        return dict(
+            (k, v._pack() if isinstance(v, FieldType) else v)
+            for k, v in ((k, getattr(self, k)) for k in self.__slots__))
+
+    def _asdict(self, fields=None, exclude=None):
+        exclude = exclude or []
+        if fields:
+            return OrderedDict((k, getattr(self, k)) for k in fields if k in self.__slots__ and k not in exclude)
+        return OrderedDict((k, getattr(self, k)) for k in self.__slots__ if k not in exclude)
+
+    def __setattr__(self, k, v):
+        """Enforce setting the fields to their respective types."""
+        # NOTE: This is a HOT code path
+        field_type = self._field_types.get(k)
+        if v is not None and k in self.__slots__ and field_type:
+            if not isinstance(v, field_type):
+                v = field_type(v)
+        super().__setattr__(k, v)
+
+    def _replace(self, **kwds):
+        result = self.__class__(*map(kwds.pop, self.__slots__, (getattr(self, k) for k in self.__slots__)))
+        if kwds:
+            raise ValueError('Got unexpected field names: {kwds!r}'.format(kwds=list(kwds)))
+        return result
+
+    def __repr__(self):
+        return "<{} {}>".format(
+            self._desc.name,
+            " ".join("{}={!r}".format(k, getattr(self, k)) for k in self._desc.fields))
+
+
+class GroupedRecord(Record):
+    """
+    GroupedRecord acts like a normal Record, but can contain multiple records.
+
+    See it as a flat Record view on top of multiple Records.
+    If two Records have the same fieldname, the first one will prevail.
+    """
+
+    def __init__(self, name, records):
+        super().__init__()
+        self.name = to_str(name)
+        self.records = []
+        self.descriptors = []
+        self.flat_fields = []
+
+        # to avoid recursion in __setattr__ and __getattr__
+        self.__dict__["fieldname_to_record"] = OrderedDict()
+
+        for rec in records:
+            if isinstance(rec, GroupedRecord):
+                for r in rec.records:
+                    self.records.append(r)
+                    self.descriptors.append(r._desc)
+            else:
+                self.records.append(rec)
+                self.descriptors.append(rec._desc)
+
+            all_fields = rec._desc.get_all_fields()
+            required_fields = rec._desc.get_required_fields()
+            for field in all_fields.values():
+                fname = field.name
+                if fname in self.fieldname_to_record:
+                    continue
+                self.fieldname_to_record[fname] = rec
+                if fname not in required_fields:
+                    self.flat_fields.append(field)
+        # flat descriptor to maintain compatibility with Record
+
+        self._desc = RecordDescriptor(self.name, [(f.typename, f.name) for f in self.flat_fields])
+
+    def get_record_by_type(self, type_name):
+        """
+        Get record in a GroupedRecord by type_name.
+
+        Args:
+            type_name (str): The record type name (for example wq/meta).
+
+        Returns:
+            None or the record
+
+        """
+        for record in self.records:
+            if record._desc.name == type_name:
+                return record
+        return None
+
+    def _asdict(self, fields=None, exclude=None):
+        exclude = exclude or []
+        keys = self.fieldname_to_record.keys()
+        if fields:
+            return OrderedDict((k, getattr(self, k)) for k in fields if k in keys and k not in exclude)
+        return OrderedDict((k, getattr(self, k)) for k in keys if k not in exclude)
+
+    def __repr__(self):
+        return "<{} {}>".format(self.name, self.records)
+
+    def __setattr__(self, attr, val):
+        if attr in getattr(self, "fieldname_to_record", {}):
+            x = self.fieldname_to_record.get(attr)
+            return setattr(x, attr, val)
+        return object.__setattr__(self, attr, val)
+
+    def __getattr__(self, attr):
+        x = self.__dict__.get("fieldname_to_record", {}).get(attr)
+        if x:
+            return getattr(x, attr)
+        raise AttributeError(attr)
+
+    def _pack(self):
+        return (
+            self.name,
+            tuple(record._pack() for record in self.records),
+        )
+
+    def _replace(self, **kwds):
+        new_records = []
+        for record in self.records:
+            new_records.append(
+                record.__class__(*map(kwds.pop, record.__slots__, (getattr(self, k) for k in record.__slots__)))
+            )
+        if kwds:
+            raise ValueError('Got unexpected field names: {kwds!r}'.format(kwds=list(kwds)))
+        return GroupedRecord(self.name, new_records)
+
+
+def is_valid_field_name(name, check_reserved=True):
+    if check_reserved:
+        if name in RESERVED_FIELDS:
+            return False
+    else:
+        if name in RESERVED_FIELDS:
+            return True
+
+    if name.startswith("_"):
+        return False
+
+    if not RE_VALID_FIELD_NAME.match(name):
+        return False
+
+    return True
+
+
+def parse_def(definition):
+    record_type = None
+    fields = []
+    for line in definition.split("\n"):
+        line = line.strip()
+
+        if not line:
+            continue
+
+        if not record_type:
+            record_type = line
+        else:
+            _type, name = re.split(r"\s+", line.rstrip(";"))
+
+            fields.append((_type, name))
+
+    return record_type, fields
+
+
+class RecordField:
+    name = None
+    typename = None
+    type = None
+
+    def __init__(self, name, typename):
+        if not is_valid_field_name(name, check_reserved=False):
+            raise RecordDescriptorError("Invalid field name: {}".format(name))
+
+        self.name = to_str(name)
+        self.typename = to_str(typename)
+
+        self.type = fieldtype(typename)
+
+    def __repr__(self):
+        return "<RecordField {} ({})>".format(self.name, self.typename)
+
+
+class RecordFieldSet(list):
+    pass
+
+
+class RecordDescriptor:
+    name = None
+    fields = None
+    recordType = None
+    _desc_hash = None
+
+    def __init__(self, name, fields=None):
+        name = to_str(name)
+
+        if isinstance(fields, RecordDescriptor):
+            # Clone fields
+            fields = fields.get_field_tuples()
+        elif not fields:
+            name, fields = parse_def(name)
+
+        fields = list([(to_native_str(k), to_str(v)) for k, v in fields])
+
+        contains_keyword = False
+        for fieldtype, fieldname in fields:
+            if not is_valid_field_name(fieldname):
+                raise RecordDescriptorError("Field '{}' is an invalid or reserved field name.".format(fieldname))
+
+            # Reserved Python keywords are allowed as field names, but at a cost.
+            # When a Python keyword is used as a field name, you can't use it as a kwarg anymore
+            # You'll be forced to either use *args or a expanding a dict to kwargs to initialize a record
+            # E.g. Record('from_value', 'and_value') or Record(**{'from': 1, 'and': 2})
+            # You'll also only be able to get or set reserved attributes using getattr or setattr.
+            # Record initialization will also be slower, due to a different (slower) implementation
+            # that is compatible with this method of initializing records.
+            if keyword.iskeyword(fieldname):
+                contains_keyword = True
+
+        self.fields = OrderedDict([(n, RecordField(n, _type)) for _type, n in fields])
+        all_fields = self.get_all_fields()
+        self.name = name
+
+        if not RE_VALID_RECORD_TYPE_NAME.match(name):
+            raise RecordDescriptorError("Invalid record type name")
+
+        args = ""
+        init_code = ""
+        unpack_code = ""
+
+        if len(all_fields) >= 255 and not (sys.version_info >= (3, 7)) or contains_keyword:
+            args = "*args, **kwargs"
+            init_code = (
+                "\t\tfor k, v in zip_longest(__self.__slots__, args):\n" +
+                "\t\t\tsetattr(__self, k, kwargs.get(k, v))\n" +
+                "\t\t_generated = __self._generated\n")
+            unpack_code = (
+                "\t\tvalues = dict([(f, __cls._field_types[f]._unpack(kwargs.get(f, v)) " +
+                "if kwargs.get(f, v) is not None else None) for f, v in zip_longest(__cls.__slots__, args)])\n" +
+                "\t\treturn __cls(**values)")
+        else:
+            args = ", ".join(["{}=None".format(k) for k in all_fields])
+            unpack_code = "\t\treturn __cls(\n"
+            for field in all_fields.values():
+                if field.type.default == FieldType.default:
+                    default = FieldType.default()
+                else:
+                    default = "_field_{field.name}.type.default()".format(field=field)
+                init_code += "\t\t__self.{field} = {field} if {field} is not None else {default}\n".format(
+                    field=field.name, default=default)
+                unpack_code += (
+                    "\t\t\t{field} = _field_{field}.type._unpack({field}) " +
+                    "if {field} is not None else {default},\n").format(
+                        field=field.name, default=default)
+            unpack_code += "\t\t)"
+
+        init_code += "\t\t__self._generated = _generated or datetime.utcnow()\n\t\t__self._version = RECORD_VERSION"
+        # Store the fieldtypes so we can enforce them in __setattr__()
+        field_types = "{\n"
+        for field in all_fields:
+            field_types += "\t\t{field!r}: _field_{field}.type,\n".format(field=field)
+        field_types += "\t}"
+
+        code = RECORD_CLASS_TEMPLATE.format(
+            name=name.replace("/", "_"),
+            args=args,
+            slots_tuple=tuple(all_fields.keys()),
+            init_code=init_code,
+            unpack_code=unpack_code,
+            field_types=field_types,
+        )
+
+        code = code.replace("\t", "    ")
+        c = compile(code, "<record code>", "exec")
+
+        data = {
+            "desc": self, "Record": Record, "OrderedDict": OrderedDict,
+            "_itemgetter": _itemgetter, "_property": property,
+            "RECORD_VERSION": RECORD_VERSION,
+        }
+        for field in all_fields.values():
+            data["_field_{}".format(field.name)] = field
+
+        exec(c, data)
+
+        self.recordType = data[name.replace("/", "_")]
+
+        self.identifier = (self.name, self.descriptor_hash)
+
+    @staticmethod
+    def get_required_fields():
+        """
+        Get required fields.
+
+        Returns:
+            OrderedDict
+
+        """
+        required_fields = OrderedDict([(k, RecordField(k, v)) for k, v in RESERVED_FIELDS.items()])
+        return required_fields
+
+    def get_all_fields(self):
+        """
+        Get all fields including required meta fields.
+
+        Returns:
+            OrderedDict
+
+        """
+        required_fields = self.get_required_fields()
+        fields = self.fields.copy()
+        fields.update(required_fields)
+        return fields
+
+    def getfields(self, typename):
+        if isinstance(typename, DynamicFieldtypeModule):
+            name = typename.gettypename()
+        else:
+            name = typename
+
+        return RecordFieldSet(field for field in self.fields.values() if field.typename == name)
+
+    def __call__(self, *args, **kwargs):
+        return self.recordType(*args, **kwargs)
+
+    def init_from_dict(self, rdict, raise_unknown=False):
+        """Create a new Record initialized with key, value pairs from `rdict`.
+
+        If `raise_unknown=True` then fields on `rdict` that are unknown to this
+        RecordDescriptor will raise a TypeError exception due to initializing
+        with unknown keyword arguments. (default: False)
+
+        Returns:
+            Record
+
+        """
+
+        if not raise_unknown:
+            rdict = {k: v for k, v in rdict.items() if k in self.recordType.__slots__}
+        return self.recordType(**rdict)
+
+    def init_from_record(self, record, raise_unknown=False):
+        """Create a new Record initialized with data from another `record`.
+
+        If `raise_unknown=True` then fields on `record` that are unknown to this
+        RecordDescriptor will raise a TypeError exception due to initializing
+        with unknown keyword arguments. (default: False)
+
+        Returns:
+            Record
+
+        """
+        return self.init_from_dict(record._asdict(), raise_unknown=raise_unknown)
+
+    def extend(self, fields):
+        """Returns a new RecordDescriptor with the extended fields
+
+        Returns:
+            RecordDescriptor
+        """
+        new_fields = list(self.get_field_tuples()) + fields
+        return RecordDescriptor(self.name, new_fields)
+
+    def get_field_tuples(self):
+        """Returns a tuple containing the (typename, name) tuples, eg:
+
+        (('boolean', 'foo'), ('string', 'bar'))
+
+        Returns:
+            tuple
+        """
+        return tuple((self.fields[f].typename, self.fields[f].name) for f in self.fields)
+
+    @staticmethod
+    @functools.lru_cache(maxsize=256)
+    def calc_descriptor_hash(name, fields):
+        """Calculate and return the (cached) descriptor hash as a 32 bit integer.
+
+        The descriptor hash is the first 4 bytes of the sha256sum of the descriptor name and field names and types.
+        """
+        h = hashlib.sha256(name.encode("utf-8"))
+        for (typename, name) in fields:
+            h.update(name.encode("utf-8"))
+            h.update(typename.encode("utf-8"))
+        return struct.unpack(">L", h.digest()[:4])[0]
+
+    @property
+    def descriptor_hash(self):
+        """Returns the (cached) descriptor hash"""
+        if not self._desc_hash:
+            self._desc_hash = self.calc_descriptor_hash(self.name, self.get_field_tuples())
+        return self._desc_hash
+
+    def __hash__(self):
+        return hash((self.name, self.get_field_tuples()))
+
+    def __eq__(self, other):
+        if isinstance(other, RecordDescriptor):
+            return self.name == other.name and self.get_field_tuples() == other.get_field_tuples()
+        return NotImplemented
+
+    def __repr__(self):
+        return "<RecordDescriptor {}, hash={:04x}>".format(self.name, self.descriptor_hash)
+
+    def definition(self, reserved=True):
+        """Return the RecordDescriptor as Python definition string.
+
+        If `reserved` is True it will also return the reserved fields.
+        """
+        fields = []
+        for ftype in self.get_all_fields().values():
+            if not reserved and ftype.name.startswith("_"):
+                continue
+            fields.append(
+                '    ("{ftype.typename}", "{ftype.name}"),'.format(ftype=ftype))
+        fields_str = "\n".join(fields)
+        return 'RecordDescriptor("{}", [\n{}\n])'.format(self.name, fields_str)
+
+    def base(self, **kwargs_sink):
+        def wrapper(**kwargs):
+            kwargs.update(kwargs_sink)
+            return self.recordType(**kwargs)
+
+        return wrapper
+
+    def _pack(self):
+        return self.name, [(field.typename, field.name) for field in self.fields.values()]
+
+    @staticmethod
+    def _unpack(name, fields):
+        return RecordDescriptor(name, fields)
+
+
+def DynamicDescriptor(name, fields):
+    return RecordDescriptor(name, [("dynamic", field) for field in fields])
+
+
+def open_path(path, mode, clobber=True):
+    """
+    Open `path` using `mode` and returns a file object.
+
+    It handles special cases if path is meant to be stdin or stdout.
+    And also supports compression based on extension or file header of stream.
+
+    Args:
+        path (str): Filename or path to filename to open
+        mode (str): Could be "r", "rb" to open file for reading, "w", "wb" for writing
+        clobber (bool): Overwrite file if it already exists if `clobber=True`, else raises IOError.
+
+    """
+    binary = "b" in mode
+    fp = None
+    if mode in ("w", "wb"):
+        out = True
+    elif mode in ("r", "rb"):
+        out = False
+    else:
+        raise ValueError("mode string can only be 'r', 'rb', 'w', or 'wb', not {!r}".format(mode))
+
+    # check for stdin or stdout
+    is_stdio = path in (None, "", "-")
+
+    # check if output path exists
+    if not is_stdio and not clobber and os.path.exists(path) and out:
+        raise IOError("Output file {!r} already exists, and clobber=False".format(path))
+
+    # check path extension for compression
+    if path:
+        if path.endswith(".gz"):
+            fp = gzip.GzipFile(path, mode)
+        elif path.endswith(".bz2"):
+            if not HAS_BZ2:
+                raise RuntimeError('bz2 python module not available')
+            fp = bz2.BZ2File(path, mode)
+        elif path.endswith(".lz4"):
+            if not HAS_LZ4:
+                raise RuntimeError('lz4 python module not available')
+            fp = lz4.open(path, mode)
+        elif path.endswith((".zstd", ".zst")):
+            if not HAS_ZSTD:
+                raise RuntimeError('zstandard python module not available')
+            if not out:
+                dctx = zstd.ZstdDecompressor()
+                fp = dctx.stream_reader(open(path, "rb"))
+            else:
+                cctx = zstd.ZstdCompressor()
+                fp = cctx.stream_writer(open(path, "wb"))
+
+    # normal file or stdio for reading or writing
+    if not fp:
+        if is_stdio:
+            if binary:
+                fp = getattr(sys.stdout, "buffer", sys.stdout) if out else getattr(sys.stdin, "buffer", sys.stdin)
+            else:
+                fp = sys.stdout if out else sys.stdin
+        else:
+            fp = io.open(path, mode)
+        # check if we are reading a compressed stream
+        if not out and binary:
+            if not hasattr(fp, "peek"):
+                fp = Peekable(fp)
+            peek_data = fp.peek(4)
+            if peek_data[:2] == GZIP_MAGIC:
+                fp = gzip.GzipFile(fileobj=fp, mode=mode)
+            elif HAS_BZ2 and peek_data[:3] == BZ2_MAGIC:
+                fp = bz2.BZ2File(fp, mode=mode)
+            elif HAS_LZ4 and peek_data[:4] == LZ4_MAGIC:
+                fp = lz4.open(fp, mode=mode)
+            elif HAS_ZSTD and peek_data[:4] == ZSTD_MAGIC:
+                dctx = zstd.ZstdDecompressor()
+                fp = dctx.stream_reader(fp)
+    return fp
+
+
+def RecordAdapter(url, out, selector=None, clobber=True):
+    url = url or ""
+    url = str(url)
+
+    # Guess adapter based on extension
+    ext_to_adapter = {
+        ".avro": "avro",
+        ".json": "jsonfile",
+    }
+    _, ext = os.path.splitext(url)
+
+    p = urlparse.urlparse(url, ext_to_adapter.get(ext, "stream"))
+
+    if '+' in p.scheme:
+        adapter, sub_adapter = p.scheme.split("+", 1)
+    else:
+        adapter = p.scheme
+        sub_adapter = None
+
+    mod = importlib.import_module("flow.record.adapter.{}".format(adapter))
+
+    clsname = ("{}Writer" if out else "{}Reader").format(adapter.title())
+
+    cls = getattr(mod, clsname)
+    arg_dict = dict(urlparse.parse_qsl(p.query))
+    cls_url = p.netloc + p.path
+    if sub_adapter:
+        cls_url = sub_adapter + "://" + cls_url
+
+    if not out and selector:
+        arg_dict["selector"] = selector
+
+    if out:
+        arg_dict["clobber"] = clobber
+
+    log.debug("Creating {!r} for {!r} with args {!r}".format(cls, url, arg_dict))
+    return cls(cls_url, **arg_dict)
+
+
+def RecordReader(url=None, selector=None):
+    return RecordAdapter(url, False, selector=selector)
+
+
+def RecordWriter(url=None, clobber=True):
+    return RecordAdapter(url, True, clobber=clobber)
+
+
+def stream(src, dst):
+    for r in src:
+        dst.write(r)
+    dst.flush()
+
+
+def fieldtype(clspath):
+    if clspath.endswith('[]'):
+        origpath = clspath
+        clspath = clspath[:-2]
+        islist = True
+    else:
+        islist = False
+
+    if clspath not in WHITELIST:
+        raise AttributeError("Invalid field type: {}".format(clspath))
+
+    p = clspath.rsplit(".", 1)
+    module_path = "flow.record.fieldtypes"
+    clsname = p.pop()
+    if p:
+        module_path += "." + p[0]
+
+    mod = importlib.import_module(module_path)
+
+    t = getattr(mod, clsname)
+
+    if not issubclass(t, FieldType):
+        raise AttributeError("Field type does not derive from FieldType")
+
+    if islist:
+        listtype = type(origpath, mod.typedlist.__bases__, dict(mod.typedlist.__dict__))
+        listtype.__type__ = t
+        t = listtype
+
+    return t
+
+
+def extend_record(record, other_records, replace=False, name=None):
+    """Extend `record` with fields and values from `other_records`.
+
+    Duplicate fields are ignored in `other_records` unless `replace=True`.
+
+    Args:
+        record (Record): Initial Record we want to extend.
+        other_records (List[Record]): List of Records we use for extending/replacing.
+        replace (bool): if `True`, it will replace existing fields and values
+            in `record` from fields and values from `other_records`. Last record always wins.
+        name (str): rename the RecordDescriptor name to `name`. Otherwise, use name from
+            initial `record`.
+    """
+    field_map = collections.OrderedDict(
+        (fname, ftype) for (ftype, fname) in record._desc.get_field_tuples()
+    )
+    record_maps = [record._asdict()]
+    for other in other_records:
+        for (ftype, fname) in other._desc.get_field_tuples():
+            if not replace and fname in field_map:
+                continue
+            field_map[fname] = ftype
+        record_maps.append(other._asdict())
+    field_tuples = [(ftype, fname) for (fname, ftype) in field_map.items()]
+    ExtendedRecord = RecordDescriptor(name or record._desc.name, field_tuples)
+    if replace:
+        record_maps = record_maps[::-1]
+    return ExtendedRecord.init_from_dict(collections.ChainMap(*record_maps))
+
+
+class DynamicFieldtypeModule:
+
+    def __init__(self, path=""):
+        self.path = path
+
+    def __getattr__(self, path):
+        path = (self.path + "." if self.path else "") + path
+
+        obj = WHITELIST_TREE
+        for p in path.split('.'):
+            if p not in obj:
+                raise AttributeError("Invalid field type: {}".format(path))
+            obj = obj[p]
+
+        return DynamicFieldtypeModule(path)
+
+    def gettypename(self):
+        if fieldtype(self.path):
+            return self.path
+
+    def __call__(self, *args, **kwargs):
+        t = fieldtype(self.path)
+
+        return t(*args, **kwargs)
+
+
+net = DynamicFieldtypeModule("net")
+dynamic_fieldtype = DynamicFieldtypeModule()
diff --git a/flow/record/fieldtypes/__init__.py b/flow/record/fieldtypes/__init__.py
new file mode 100644
index 0000000..0a8fdcc
--- /dev/null
+++ b/flow/record/fieldtypes/__init__.py
@@ -0,0 +1,491 @@
+import re
+import math
+import warnings
+
+import binascii
+from binascii import a2b_hex, b2a_hex
+from posixpath import basename, dirname
+
+from datetime import datetime as _dt, timedelta
+from flow.record.base import FieldType
+
+try:
+    import urlparse
+except ImportError:
+    import urllib.parse as urlparse
+
+RE_NORMALIZE_PATH = re.compile(r'[\\/]+')
+NATIVE_UNICODE = isinstance(u'', str)
+
+string_type = str
+varint_type = int
+bytes_type = bytes
+float_type = float
+
+
+def fieldtype_for_value(value, default="string"):
+    """Returns fieldtype name derived from the value. Returns `default` if it cannot be derived.
+
+    Args:
+        value: value to derive the fieldtype from
+
+    Returns:
+        str: the field type name or `default` if it cannot be derived
+
+    Examples:
+        >>> fieldtype_for_value("hello")
+        "string"
+        >>> fieldtype_for_value(1337)
+        "varint"
+        >>> fieldtype_for_value(object(), None)
+        None
+    """
+    if isinstance(value, bytes_type):
+        return "bytes"
+    elif isinstance(value, string_type):
+        return "string"
+    elif isinstance(value, float_type):
+        return "float"
+    elif isinstance(value, bool):
+        return "boolean"
+    elif isinstance(value, (varint_type, int)):
+        return "varint"
+    elif isinstance(value, _dt):
+        return "datetime"
+    return default
+
+
+class dynamic(FieldType):
+
+    def __new__(cls, obj):
+        if isinstance(obj, FieldType):
+            # Already a flow field type
+            return obj
+
+        elif isinstance(obj, bytes_type):
+            return bytes(obj)
+
+        elif isinstance(obj, string_type):
+            return string(obj)
+
+        elif isinstance(obj, bool):
+            # Must appear before int, because bool is a subclass of int
+            return boolean(obj)
+
+        elif isinstance(obj, (varint_type, int)):
+            return varint(obj)
+
+        elif isinstance(obj, _dt):
+            return datetime(obj)
+
+        elif isinstance(obj, (list, tuple)):
+            return stringlist(obj)
+
+        raise NotImplementedError("Unsupported type for dynamic fieldtype: {}".format(type(obj)))
+
+
+class typedlist(list, FieldType):
+
+    __type__ = None
+
+    def __init__(self, values=None):
+        if not values:
+            values = []
+        super(self.__class__, self).__init__(self._convert(values))
+
+    def _convert(self, values):
+        return [self.__type__(f) if not isinstance(f, self.__type__) else f for f in values]
+
+    def _pack(self):
+        result = []
+        for f in self:
+            if not isinstance(f, self.__type__):
+                # Dont pack records already, it's the job of RecordPacker to pack record fields.
+                # Otherwise unpacking will yield unexpected results (records that are not unpacked).
+                if self.__type__ == record:
+                    r = f
+                else:
+                    r = self.__type__(f)._pack()
+                result.append(r)
+            else:
+                r = f._pack()
+                result.append(r)
+        return result
+
+    @classmethod
+    def _unpack(cls, data):
+        data = map(cls.__type__._unpack, data)
+        return cls(data)
+
+    @classmethod
+    def default(cls):
+        """Override default so the field is always an empty list."""
+        return cls()
+
+
+class dictlist(list, FieldType):
+
+    def _pack(self):
+        return self
+
+
+class stringlist(list, FieldType):
+
+    def _pack(self):
+        return self
+
+
+class string(string_type, FieldType):
+
+    def __new__(cls, value):
+        if isinstance(value, bytes_type):
+            value = cls._decode(value, "utf-8")
+            if isinstance(value, bytes_type):
+                # Still bytes, so decoding failed (Python 2)
+                return bytes(value)
+        return super().__new__(cls, value)
+
+    def _pack(self):
+        return self
+
+    @classmethod
+    def _decode(cls, data, encoding):
+        """Decode a byte-string into a unicode-string.
+
+        Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
+        Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
+        """
+        if NATIVE_UNICODE:
+            # Raises exception on decode error
+            return data.decode(encoding)
+        try:
+            return data.decode(encoding)
+        except UnicodeDecodeError:
+            # Fallback to bytes (Python 2 only)
+            preview = data[:16].encode('hex_codec') + ('..' if len(data) > 16 else '')
+            warnings.warn("Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(
+                preview), RuntimeWarning)
+            return data
+
+
+# Alias for backwards compatibility
+wstring = string
+
+
+class bytes(bytes_type, FieldType):
+    value = None
+
+    def __init__(self, value):
+        if not isinstance(value, bytes_type):
+            raise TypeError("Value not of bytes type")
+        self.value = value
+
+    def _pack(self):
+        return self.value
+
+    def __repr__(self):
+        return repr(self.value)
+
+
+class datetime(_dt, FieldType):
+
+    def __new__(cls, *args, **kwargs):
+        if len(args) == 1 and not kwargs:
+            arg = args[0]
+            if isinstance(arg, bytes_type):
+                arg = arg.decode("utf-8")
+            if isinstance(arg, string_type):
+                # I expect ISO 8601 format e.g. datetime.isformat()
+                # When the microseconds part is 0, str(datetime) will not print the microsecond part (only seconds)
+                # So we have to account for this.
+                # String constructor is used for example in JsonRecordAdapter
+                if "." in arg:
+                    return cls.strptime(arg, "%Y-%m-%dT%H:%M:%S.%f")
+                else:
+                    return cls.strptime(arg, "%Y-%m-%dT%H:%M:%S")
+            elif isinstance(arg, (int,)):
+                return cls.utcfromtimestamp(arg)
+            elif isinstance(arg, (_dt,)):
+                return _dt.__new__(
+                    cls,
+                    arg.year, arg.month, arg.day,
+                    arg.hour, arg.minute, arg.second, arg.microsecond,
+                    arg.tzinfo)
+
+        return _dt.__new__(cls, *args, **kwargs)
+
+    def __eq__(self, other):
+        return self - other == timedelta(0)
+
+    def _pack(self):
+        return self
+
+    def __repr__(self):
+        result = str(self)
+        return result
+
+
+class varint(varint_type, FieldType):
+
+    def _pack(self):
+        return self
+
+
+class float(float, FieldType):
+
+    def _pack(self):
+        return self
+
+
+class uint16(int, FieldType):
+
+    value = None
+
+    def __init__(self, value):
+        if value < 0 or value > 0xffff:
+            raise ValueError("Value not within (0x0, 0xffff), got: {}".format(value))
+
+        self.value = value
+
+    def _pack(self):
+        return self.value
+
+    def __repr__(self):
+        return str(self.value)
+
+
+class uint32(int, FieldType):
+    value = None
+
+    def __init__(self, value):
+        if value < 0 or value > 0xffffffff:
+            raise ValueError("Value not within (0x0, 0xffffffff), got {}".format(value))
+
+        self.value = value
+
+    def _pack(self):
+        return self.value
+
+
+class boolean(int, FieldType):
+    value = None
+
+    def __init__(self, value):
+        if value < 0 or value > 1:
+            raise ValueError("Value not a valid boolean value")
+
+        self.value = bool(value)
+
+    def _pack(self):
+        return self.value
+
+    def __str__(self):
+        return str(self.value)
+
+    def __repr__(self):
+        return str(self.value)
+
+
+def human_readable_size(x):
+    # hybrid of http://stackoverflow.com/a/10171475/2595465
+    #     with http://stackoverflow.com/a/5414105/2595465
+    if x == 0:
+        return '0'
+    magnitude = int(math.log(abs(x), 10.24))
+    if magnitude > 16:
+        format_str = '%iP'
+        # denominator_mag = 15
+    else:
+        float_fmt = '%2.1f' if magnitude % 3 == 1 else '%1.2f'
+        illion = (magnitude + 1) // 3
+        format_str = float_fmt + " " + [' ', 'K', 'M', 'G', 'T', 'P'][illion]
+    return (format_str % (x * 1.0 / (1024 ** illion))) + "B"
+
+
+class filesize(varint):
+
+    def __repr__(self):
+        return human_readable_size(self)
+
+
+class unix_file_mode(varint):
+
+    def __repr__(self):
+        return oct(self).rstrip("L")
+
+
+class digest(FieldType):
+    __md5 = __md5_bin = None
+    __sha1 = __sha1_bin = None
+    __sha256 = __sha256_bin = None
+
+    def __init__(self, value=None, **kwargs):
+        if isinstance(value, (tuple, list)):
+            self.md5, self.sha1, self.sha256 = value
+        elif isinstance(value, dict):
+            self.md5 = value.get("md5", self.md5)
+            self.sha1 = value.get("sha1", self.sha1)
+            self.sha256 = value.get("sha256", self.sha256)
+
+    @classmethod
+    def default(cls):
+        """Override default so the field is always a digest() instance."""
+        return cls()
+
+    def __repr__(self):
+        return "(md5={d.md5}, sha1={d.sha1}, sha256={d.sha256})".format(d=self)
+
+    @property
+    def md5(self):
+        return self.__md5
+
+    @property
+    def sha1(self):
+        return self.__sha1
+
+    @property
+    def sha256(self):
+        return self.__sha256
+
+    @md5.setter
+    def md5(self, val):
+        if val is None:
+            self.__md5 = self.__md5_bin = None
+            return
+        try:
+            self.__md5_bin = a2b_hex(val)
+            self.__md5 = val
+            if len(self.__md5_bin) != 16:
+                raise TypeError("Incorrect hash length")
+        except binascii.Error as e:
+            raise TypeError("Invalid MD5 value {!r}, {}".format(val, e))
+
+    @sha1.setter
+    def sha1(self, val):
+        if val is None:
+            self.__sha1 = self.__sha1_bin = None
+            return
+        try:
+            self.__sha1_bin = a2b_hex(val)
+            self.__sha1 = val
+            if len(self.__sha1_bin) != 20:
+                raise TypeError("Incorrect hash length")
+        except binascii.Error as e:
+            raise TypeError("Invalid SHA-1 value {!r}, {}".format(val, e))
+
+    @sha256.setter
+    def sha256(self, val):
+        if val is None:
+            self.__sha256 = self.__sha256_bin = None
+            return
+        try:
+            self.__sha256_bin = a2b_hex(val)
+            self.__sha256 = val
+            if len(self.__sha256_bin) != 32:
+                raise TypeError("Incorrect hash length")
+        except binascii.Error as e:
+            raise TypeError("Invalid SHA-256 value {!r}, {}".format(val, e))
+
+    def _pack(self):
+        return (
+            self.__md5_bin,
+            self.__sha1_bin,
+            self.__sha256_bin,
+        )
+
+    @classmethod
+    def _unpack(cls, data):
+        value = (
+            b2a_hex(data[0]).decode() if data[0] else None,
+            b2a_hex(data[1]).decode() if data[1] else None,
+            b2a_hex(data[2]).decode() if data[2] else None,
+        )
+        return cls(value)
+
+
+class uri(string, FieldType):
+
+    def __init__(self, value):
+        self._parsed = urlparse.urlparse(value)
+
+    @staticmethod
+    def normalize(path):
+        r"""Normalize Windows paths to posix.
+
+        c:\windows\system32\cmd.exe -> c:/windows/system32/cmd.exe
+        """
+        return RE_NORMALIZE_PATH.sub('/', path)
+
+    @classmethod
+    def from_windows(cls, path):
+        """Initialize a uri instance from a windows path."""
+        return cls(uri.normalize(path))
+
+    @property
+    def scheme(self):
+        return self._parsed.scheme
+
+    @property
+    def protocol(self):
+        return self.scheme
+
+    @property
+    def netloc(self):
+        return self._parsed.netloc
+
+    @property
+    def path(self):
+        return self._parsed.path
+
+    @property
+    def params(self):
+        return self._parsed.params
+
+    @property
+    def query(self):
+        return self._parsed.query
+
+    @property
+    def args(self):
+        return self.query
+
+    @property
+    def fragment(self):
+        return self._parsed.fragment
+
+    @property
+    def username(self):
+        return self._parsed.username
+
+    @property
+    def password(self):
+        return self._parsed.password
+
+    @property
+    def hostname(self):
+        return self._parsed.hostname
+
+    @property
+    def port(self):
+        return self._parsed.port
+
+    @property
+    def filename(self):
+        return basename(self.path)
+
+    @property
+    def dirname(self):
+        return dirname(self.path)
+
+
+class record(FieldType):
+
+    def __new__(cls, record_value):
+        return record_value
+
+    def _pack(self):
+        return self.value
+
+    @classmethod
+    def _unpack(cls, data):
+        return data
diff --git a/flow/record/fieldtypes/credential.py b/flow/record/fieldtypes/credential.py
new file mode 100644
index 0000000..cc87675
--- /dev/null
+++ b/flow/record/fieldtypes/credential.py
@@ -0,0 +1,9 @@
+from flow.record.fieldtypes import string
+
+
+class username(string):
+    pass
+
+
+class password(string):
+    pass
diff --git a/flow/record/fieldtypes/net/__init__.py b/flow/record/fieldtypes/net/__init__.py
new file mode 100644
index 0000000..10e83e3
--- /dev/null
+++ b/flow/record/fieldtypes/net/__init__.py
@@ -0,0 +1,15 @@
+from flow.record.fieldtypes import string
+from .ip import ipaddress, ipnetwork, IPAddress, IPNetwork
+
+__all__ = [
+    'ipaddress', 'ipnetwork',
+    'IPAddress', 'IPNetwork',
+]
+
+
+class hostname(string):
+    pass
+
+
+class email(string):
+    pass
diff --git a/flow/record/fieldtypes/net/ip.py b/flow/record/fieldtypes/net/ip.py
new file mode 100644
index 0000000..b11c680
--- /dev/null
+++ b/flow/record/fieldtypes/net/ip.py
@@ -0,0 +1,80 @@
+from ipaddress import ip_address, ip_network
+from flow.record.base import FieldType
+
+
+class ipaddress(FieldType):
+    val = None
+    _type = "net.ipaddress"
+
+    def __init__(self, addr):
+        self.val = ip_address(addr)
+
+    def __eq__(self, b):
+        try:
+            return self.val == ip_address(b)
+        except ValueError:
+            return False
+
+    def __str__(self):
+        return str(self.val)
+
+    def __repr__(self):
+        return "{}({!r})".format(self._type, str(self))
+
+    def _pack(self):
+        return self.val.packed
+
+    @staticmethod
+    def _unpack(data):
+        return ipaddress(data)
+
+
+class ipnetwork(FieldType):
+    val = None
+    _type = "net.ipnetwork"
+
+    def __init__(self, addr):
+        self.val = ip_network(addr)
+
+    def __eq__(self, b):
+        try:
+            return self.val == ip_network(b)
+        except ValueError:
+            return False
+
+    @staticmethod
+    def _is_subnet_of(a, b):
+        try:
+            # Always false if one is v4 and the other is v6.
+            if a._version != b._version:
+                raise TypeError("{} and {} are not of the same version".format(a, b))
+            return (b.network_address <= a.network_address and
+                    b.broadcast_address >= a.broadcast_address)
+        except AttributeError:
+            raise TypeError("Unable to test subnet containment "
+                            "between {} and {}".format(a, b))
+
+    def __contains__(self, b):
+        try:
+            return self._is_subnet_of(ip_network(b), self.val)
+        except (ValueError, TypeError):
+            return False
+
+    def __str__(self):
+        return str(self.val)
+
+    def __repr__(self):
+        return "{}({!r})".format(self._type, str(self))
+
+    def _pack(self):
+        return self.val.compressed
+
+    @staticmethod
+    def _unpack(data):
+        return ipnetwork(data)
+
+
+# alias: net.IPAddress -> net.ipaddress
+# alias: net.IPNetwork -> net.ipnetwork
+IPAddress = ipaddress
+IPNetwork = ipnetwork
diff --git a/flow/record/fieldtypes/net/ipv4.py b/flow/record/fieldtypes/net/ipv4.py
new file mode 100644
index 0000000..e271b74
--- /dev/null
+++ b/flow/record/fieldtypes/net/ipv4.py
@@ -0,0 +1,137 @@
+import struct
+import socket
+
+from flow.record import FieldType
+from flow.record.utils import to_native_str
+
+
+def addr_long(s):
+    if isinstance(s, Address):
+        return s.val
+
+    if isinstance(s, int):
+        return s
+
+    return struct.unpack(">I", socket.inet_aton(s))[0]
+
+
+def addr_str(s):
+    if isinstance(s, Address):
+        return socket.inet_ntoa(struct.pack(">I", s.val))
+
+    if isinstance(s, int):
+        return socket.inet_ntoa(struct.pack(">I", s))
+
+    return s
+
+
+def mask_to_bits(n):
+    return bin(n).count("1")
+
+
+def bits_to_mask(b):
+    return (0xffffffff << (32 - b)) & 0xffffffff
+
+
+class subnet(FieldType):
+    net = None
+    mask = None
+    _type = "net.ipv4.subnet"
+
+    def __init__(self, addr, netmask=None):
+        if isinstance(addr, type(u'')):
+            addr = to_native_str(addr)
+
+        if not isinstance(addr, str):
+            raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
+
+        if netmask is None:
+            ip, sep, mask = addr.partition("/")
+            self.mask = bits_to_mask(int(mask)) if mask else 0xffffffff
+            self.net = addr_long(ip)
+        else:
+            self.net = addr_long(addr)
+            self.mask = bits_to_mask(netmask)
+
+        if self.net & self.mask != self.net:
+            suggest = '{}/{}'.format(addr_str(self.net & self.mask), mask_to_bits(self.mask))
+            raise ValueError("Not a valid subnet {!r}, did you mean {!r} ?".format(str(addr), suggest))
+
+    def __contains__(self, addr):
+        if addr is None:
+            return False
+
+        if isinstance(addr, type(u'')):
+            addr = to_native_str(addr)
+
+        if isinstance(addr, str):
+            addr = addr_long(addr)
+
+        if isinstance(addr, Address):
+            addr = addr.val
+
+        if isinstance(addr, int):
+            return addr & self.mask == self.net
+
+        return False
+
+    def __str__(self):
+        return "{0}/{1}".format(addr_str(self.net), mask_to_bits(self.mask))
+
+    def __repr__(self):
+        return "{}({!r})".format(self._type, str(self))
+
+
+class SubnetList:
+    subnets = None
+
+    def __init__(self):
+        self.subnets = []
+
+    def load(self, path):
+        f = open(path, "rb")
+        for line in f:
+            entry, desc = line.split(" ", 1)
+            self.subnets.append(Subnet(entry))
+
+        f.close()
+
+    def add(self, subnet):
+        self.subnets.append(Subnet(subnet))
+
+    def __contains__(self, addr):
+        if type(addr) is str:
+            addr = addr_long(addr)
+
+        return any(addr in s for s in self.subnets)
+
+
+class address(FieldType):
+    val = None
+    _type = "net.ipv4.address"
+
+    def __init__(self, addr):
+        self.val = addr_long(addr)
+
+    def __eq__(self, b):
+        return addr_long(self) == addr_long(b)
+
+    def __str__(self):
+        return addr_str(self.val)
+
+    def __repr__(self):
+        return "{}({!r})".format(self._type, str(self))
+
+    def _pack(self):
+        return self.val
+
+    @staticmethod
+    def _unpack(data):
+        return address(data)
+
+
+# Backwards compatiblity
+Address = address
+Subnet = subnet
+
+__all__ = ["address", "subnet", "Address", "Subnet", "SubnetList"]
diff --git a/flow/record/fieldtypes/net/tcp.py b/flow/record/fieldtypes/net/tcp.py
new file mode 100644
index 0000000..aa4f4d9
--- /dev/null
+++ b/flow/record/fieldtypes/net/tcp.py
@@ -0,0 +1,9 @@
+from flow.record.fieldtypes import uint16
+
+
+class port(uint16):
+    pass
+
+
+# Backwards compatiblity
+Port = port
diff --git a/flow/record/fieldtypes/net/udp.py b/flow/record/fieldtypes/net/udp.py
new file mode 100644
index 0000000..aa4f4d9
--- /dev/null
+++ b/flow/record/fieldtypes/net/udp.py
@@ -0,0 +1,9 @@
+from flow.record.fieldtypes import uint16
+
+
+class port(uint16):
+    pass
+
+
+# Backwards compatiblity
+Port = port
diff --git a/flow/record/jsonpacker.py b/flow/record/jsonpacker.py
new file mode 100644
index 0000000..ca4ae35
--- /dev/null
+++ b/flow/record/jsonpacker.py
@@ -0,0 +1,101 @@
+import json
+import base64
+import logging
+from datetime import datetime
+
+from . import fieldtypes
+from .base import Record, RecordDescriptor
+from .utils import EventHandler
+
+log = logging.getLogger(__package__)
+
+
+class JsonRecordPacker:
+
+    def __init__(self, indent=None):
+        self.descriptors = {}
+        self.on_descriptor = EventHandler()
+        self.indent = indent
+
+    def register(self, desc, notify=False):
+        if not isinstance(desc, RecordDescriptor):
+            raise Exception("Expected Record Descriptor")
+
+        # Descriptor already known
+        if desc.identifier in self.descriptors:
+            return
+
+        # versioned record descriptor
+        self.descriptors[desc.identifier] = desc
+
+        # for older non versioned records
+        self.descriptors[desc.name] = desc
+
+        if notify and self.on_descriptor:
+            log.debug("JsonRecordPacker::on_descriptor {}".format(desc))
+            self.on_descriptor(desc)
+
+    def pack_obj(self, obj):
+        if isinstance(obj, Record):
+            if obj._desc.identifier not in self.descriptors:
+                self.register(obj._desc, True)
+            serial = obj._asdict()
+            serial['_type'] = 'record'
+            serial['_recorddescriptor'] = obj._desc.identifier
+
+            # PYTHON2: Because "bytes" are also "str" we have to handle this here
+            for (field_type, field_name) in obj._desc.get_field_tuples():
+                if field_type == "bytes" and isinstance(serial[field_name], str):
+                    serial[field_name] = base64.b64encode(serial[field_name]).decode()
+
+            return serial
+        if isinstance(obj, RecordDescriptor):
+            serial = {
+                '_type': 'recorddescriptor',
+                '_data': obj._pack(),
+            }
+            return serial
+        if isinstance(obj, datetime):
+            serial = obj.strftime("%Y-%m-%dT%H:%M:%S.%f")
+            return serial
+        if isinstance(obj, fieldtypes.digest):
+            return {
+                "md5": obj.md5,
+                "sha1": obj.sha1,
+                "sha256": obj.sha256,
+            }
+        if isinstance(obj, (fieldtypes.net.ipaddress, fieldtypes.net.ipnetwork)):
+            return str(obj)
+        if isinstance(obj, bytes):
+            return base64.b64encode(obj).decode()
+
+        raise Exception("Unpackable type " + str(type(obj)))
+
+    def unpack_obj(self, obj):
+        if isinstance(obj, dict):
+            _type = obj.get('_type', None)
+            if _type == "record":
+                record_descriptor_identifier = obj['_recorddescriptor']
+                record_descriptor_identifier = tuple(record_descriptor_identifier)
+                record_descriptor = self.descriptors[record_descriptor_identifier]
+                del obj['_recorddescriptor']
+                del obj['_type']
+                for (field_type, field_name) in record_descriptor.get_field_tuples():
+                    if field_type == "bytes":
+                        obj[field_name] = base64.b64decode(obj[field_name])
+                result = record_descriptor.recordType(**obj)
+                return result
+            if _type == "recorddescriptor":
+                data = obj['_data']
+                return RecordDescriptor._unpack(*data)
+        return obj
+
+    def pack(self, obj):
+        return json.dumps(obj, default=self.pack_obj, indent=self.indent)
+
+    def unpack(self, d):
+        record_dict = json.loads(d, object_hook=self.unpack_obj)
+        result = self.unpack_obj(record_dict)
+        if isinstance(result, RecordDescriptor):
+            self.register(result)
+        return result
diff --git a/flow/record/packer.py b/flow/record/packer.py
new file mode 100644
index 0000000..efcbf9b
--- /dev/null
+++ b/flow/record/packer.py
@@ -0,0 +1,167 @@
+import warnings
+import binascii
+import datetime
+import msgpack
+import functools
+
+from . import fieldtypes
+from .base import Record, FieldType, RecordDescriptor, GroupedRecord, RESERVED_FIELDS, RECORD_VERSION
+from .utils import EventHandler, to_str
+
+# Override defaults for msgpack packb/unpackb
+packb = functools.partial(msgpack.packb, use_bin_type=True)
+unpackb = functools.partial(msgpack.unpackb, raw=False)
+
+RECORD_PACK_EXT_TYPE = 0xe
+
+RECORD_PACK_TYPE_RECORD = 0x1
+RECORD_PACK_TYPE_DESCRIPTOR = 0x2
+RECORD_PACK_TYPE_FIELDTYPE = 0x3
+RECORD_PACK_TYPE_DATETIME = 0x10
+RECORD_PACK_TYPE_VARINT = 0x11
+RECORD_PACK_TYPE_GROUPEDRECORD = 0x12
+
+
+def identifier_to_str(identifier):
+    if isinstance(identifier, tuple) and len(identifier) == 2:
+        return (to_str(identifier[0]), identifier[1])
+    else:
+        return to_str(identifier)
+
+
+class RecordPacker:
+    EXT_TYPE = RECORD_PACK_EXT_TYPE
+    TYPES = [FieldType, Record, RecordDescriptor]
+
+    def __init__(self):
+        self.descriptors = {}
+        self.on_descriptor = EventHandler()
+
+    def register(self, desc, notify=False):
+        if not isinstance(desc, RecordDescriptor):
+            raise Exception("Expected Record Descriptor")
+
+        # versioned record descriptor
+        self.descriptors[desc.identifier] = desc
+
+        # for older non versioned records
+        self.descriptors[desc.name] = desc
+
+        if notify and self.on_descriptor:
+            self.on_descriptor(desc)
+
+    def pack_obj(self, obj, unversioned=False):
+        packed = None
+
+        if isinstance(obj, datetime.datetime):
+            t = obj.utctimetuple()[:6] + (obj.microsecond, )
+            packed = (RECORD_PACK_TYPE_DATETIME, t)
+
+        elif isinstance(obj, int):
+            neg = obj < 0
+            h = hex(abs(obj))[2:].rstrip("L")
+            if len(h) % 2 != 0:
+                h = "0" + h
+
+            packed = RECORD_PACK_TYPE_VARINT, (neg, binascii.a2b_hex(h))
+
+        elif isinstance(obj, GroupedRecord):
+            for desc in obj.descriptors:
+                if desc.identifier not in self.descriptors:
+                    self.register(desc, True)
+
+            packed = RECORD_PACK_TYPE_GROUPEDRECORD, obj._pack()
+
+        elif isinstance(obj, Record):
+            if obj._desc.identifier not in self.descriptors:
+                self.register(obj._desc, True)
+
+            data = obj._pack(unversioned=unversioned)
+            packed = RECORD_PACK_TYPE_RECORD, data
+
+        elif isinstance(obj, RecordDescriptor):
+            packed = RECORD_PACK_TYPE_DESCRIPTOR, obj._pack()
+
+        if not packed:
+            raise Exception("Unpackable type " + str(type(obj)))
+
+        return msgpack.ExtType(RECORD_PACK_EXT_TYPE, self.pack(packed))
+
+    def pack(self, obj):
+        return packb(obj, default=self.pack_obj)
+
+    def unpack_obj(self, t, data):
+        if t != RECORD_PACK_EXT_TYPE:
+            raise Exception("Unknown ExtType")
+
+        subtype, value = self.unpack(data)
+
+        if subtype == RECORD_PACK_TYPE_DATETIME:
+            dt = fieldtypes.datetime(*value)
+            return dt
+
+        if subtype == RECORD_PACK_TYPE_VARINT:
+            neg, h = value
+            v = int(binascii.b2a_hex(h), 16)
+            if neg:
+                v = -v
+
+            return v
+
+        if subtype == RECORD_PACK_TYPE_RECORD:
+            identifier, values = value
+            identifier = identifier_to_str(identifier)
+            desc = self.descriptors[identifier]
+
+            # Compatibility for older records
+            # We check the actual amount of values against the expected amount of values
+            # The values received include reserved fields, so we have to add them to the
+            # fields already declared in the descriptor.
+            # The descriptor should be received from the same stream, so any inconsistency
+            # in field count should be from reserved fields.
+            version = values[-1]
+            expected_len = len(desc.fields) + len(RESERVED_FIELDS)
+
+            # Perform some basic checking on record version, if any, and issue a warning if needed.
+            if not isinstance(version, int) or version < 1 or version > 255:
+                warnings.warn(
+                    ("Got old style record with no version information (expected {:d}). " +
+                     "Compatibility is not guaranteed.").format(
+                        RECORD_VERSION), RuntimeWarning)
+            elif version != RECORD_VERSION:
+                warnings.warn(
+                    "Got other version record (expected {:d}, got {:d}). Compatibility is not guaranteed.".format(
+                        RECORD_VERSION, version), RuntimeWarning)
+                # Optionally add compatibility code here later
+
+            # If the actual amount of fields is less, there's nothing we can really do.
+            # If the actual amount of fields is more, we strip additional fields but
+            # maintain the version field
+            # This implies that any record that has _more_ reserved fields always
+            # has a version field.
+            if len(values) > expected_len:
+                # Likely newer style record. Strip extra fields but maintain version field
+                values = values[:expected_len - 1]
+                values += (version,)
+
+            return desc.recordType._unpack(*values)
+
+        if subtype == RECORD_PACK_TYPE_GROUPEDRECORD:
+            name, packed_records = value
+            records = []
+            for value in packed_records:
+                identifier, values = value
+                identifier = identifier_to_str(identifier)
+                desc = self.descriptors[identifier]
+                records.append(desc.recordType._unpack(*values))
+            return GroupedRecord(name, records)
+
+        if subtype == RECORD_PACK_TYPE_DESCRIPTOR:
+            name, fields = value
+            name = to_str(name)
+            return RecordDescriptor._unpack(name, fields)
+
+        raise Exception("Unknown subtype: %x" % subtype)
+
+    def unpack(self, d):
+        return unpackb(d, ext_hook=self.unpack_obj, use_list=False)
diff --git a/flow/record/selector.py b/flow/record/selector.py
new file mode 100644
index 0000000..ba00308
--- /dev/null
+++ b/flow/record/selector.py
@@ -0,0 +1,714 @@
+import __future__
+
+import ast
+import operator
+import re
+
+from flow.record.base import GroupedRecord, Record, dynamic_fieldtype
+from flow.record.fieldtypes import net
+from flow.record.whitelist import WHITELIST, WHITELIST_TREE
+
+try:
+    import astor
+    HAVE_ASTOR = True
+except ImportError:
+    HAVE_ASTOR = False
+
+string_types = (str, type(u''))
+
+AST_NODE_S_TYPES = tuple(
+    filter(None, [
+        getattr(ast, "Str", None),
+        getattr(ast, "Bytes", None),
+    ]),
+)
+
+AST_NODE_VALUE_TYPES = tuple(
+    filter(None, [
+        getattr(ast, "NameConstant", None),
+        getattr(ast, "Constant", None),
+    ]),
+)
+
+AST_OPERATORS = {
+    ast.Add: operator.add,
+    ast.Mult: operator.mul,
+    ast.Div: operator.truediv,
+    ast.And: operator.and_,
+    ast.Or: operator.or_,
+    ast.Not: operator.not_,
+    ast.Mod: operator.mod,
+    ast.BitAnd: operator.and_,
+    ast.BitOr: operator.or_,
+}
+
+AST_COMPARATORS = {
+    ast.Eq: operator.eq,
+    ast.In: lambda left, right:
+        False if (isinstance(left, NoneObject) or isinstance(right, NoneObject))
+        else operator.contains(right, left),
+    ast.NotIn: lambda left, right:
+        False if (isinstance(left, NoneObject) or isinstance(right, NoneObject))
+        else operator.contains(right, left) is False,
+    ast.NotEq: operator.ne,
+    ast.Gt: operator.gt,
+    ast.Lt: operator.lt,
+    ast.GtE: operator.ge,
+    ast.LtE: operator.le,
+    ast.Is: operator.is_,
+    ast.IsNot: operator.is_not,
+}
+
+
+class NoneObject:
+    """Returned in the Selector matching if a field does not exist on the Record.
+
+    NoneObject is used to override some comparators like __contains__.
+    """
+
+    def __eq__(a, b):
+        return False
+
+    def __ne__(a, b):
+        return False
+
+    def __lt__(a, b):
+        return False
+
+    def __gt__(a, b):
+        return False
+
+    def __lte__(a, b):
+        return False
+
+    def __gte__(a, b):
+        return False
+
+    def __noteq__(a, b):
+        return False
+
+    def __contains__(a, b):
+        return False
+
+    def __len__(self):
+        return 0
+
+
+NONE_OBJECT = NoneObject()
+
+
+class InvalidSelectorError(Exception):
+    pass
+
+
+class InvalidOperation(Exception):
+    pass
+
+
+def lower(s):
+    """Return lowercased string, otherwise `s` if not string type."""
+    if isinstance(s, string_types):
+        return s.lower()
+    return s
+
+
+def upper(s):
+    """Return uppercased string, otherwise `s` if not string type."""
+    if isinstance(s, string_types):
+        return s.upper()
+    return s
+
+
+def names(r):
+    """Return the available names as a set in the Record otherwise ['UnknownRecord']."""
+    if isinstance(r, GroupedRecord):
+        return set(sub_record._desc.name for sub_record in r.records)
+    if isinstance(r, (Record, WrappedRecord)):
+        return set([r._desc.name])
+    return ["UnknownRecord"]
+
+
+def name(r):
+    """Return the name of the Record otherwise 'UnknownRecord'."""
+    if isinstance(r, (Record, WrappedRecord)):
+        return r._desc.name
+    return "UnknownRecord"
+
+
+def get_type(obj):
+    """Return the type of the Object as 'str'."""
+    return str(type(obj))
+
+
+def has_field(r, field):
+    """Check if field exists on Record object.
+
+    Args:
+        r: Record to match on.
+        field_name: Field name
+
+    Returns:
+        (bool): True if field exists, otherwise False
+
+    """
+    return field in r._desc.fields
+
+
+def field_regex(r, fields, regex):
+    """Check a regex against fields of a Record object.
+
+    Args:
+        r: The record to match on.
+        fields: The fields in the Record to match.
+        regex: The regex pattern to search for.
+
+    Returns:
+        (bool): True or False
+
+    """
+    s_pattern = re.compile(regex)
+    for field in fields:
+        fvalue = getattr(r, field, NONE_OBJECT)
+        if fvalue is NONE_OBJECT:
+            continue
+
+        match = re.search(s_pattern, fvalue)
+        if match is not None:
+            return True
+    return False
+
+
+def field_equals(r, fields, strings, nocase=True):
+    """Check for exact string matches on fields of a Record object.
+
+    Args:
+        r: The record to match on.
+        fields: The fields in the Record to match.
+        strings: The strings to search for.
+        nocase: Should the matching be case insensitive.
+
+    Returns:
+        (bool): True or False
+
+    """
+    if nocase:
+        strings_to_check = [lower(s) for s in strings]
+    else:
+        strings_to_check = strings
+
+    for field in fields:
+        fvalue = getattr(r, field, NONE_OBJECT)
+        if fvalue is NONE_OBJECT:
+            continue
+        if nocase:
+            fvalue = lower(fvalue)
+        for s in strings_to_check:
+            if s == fvalue:
+                return True
+    return False
+
+
+def field_contains(r, fields, strings, nocase=True, word_boundary=False):
+    """Check if the string matches on fields of a Record object.
+
+    Only supports strings for now and partial matches using the __contains__ operator.
+
+    * `fields` is a list of field names to check
+    * `strings` is a list of strings to check on the fields
+    * `word_boundary` is a boolean. True if matching required only word boundary matches.
+    * Non existing fields on the Record object are skipped.
+    * Defaults to case-insensitive matching, use `nocase=False` if you want to be case sensitive.
+    """
+    if nocase:
+        strings_to_check = [lower(s) for s in strings]
+    else:
+        strings_to_check = strings
+
+    for field in fields:
+        fvalue = getattr(r, field, NONE_OBJECT)
+        if fvalue is NONE_OBJECT:
+            continue
+        if nocase:
+            fvalue = lower(fvalue)
+        for s in strings_to_check:
+            if word_boundary is False:
+                if s in fvalue:
+                    return True
+            else:
+                if fvalue is None:
+                    if s is None:
+                        return True
+                    continue
+
+                if not isinstance(fvalue, string_types):
+                    continue
+
+                s_pattern = u"\\b{}\\b".format(re.escape(s))
+                match = re.search(s_pattern, fvalue)
+                if match is not None:
+                    return True
+    return False
+
+
+# Function whitelist that are allowed in selectors
+FUNCTION_WHITELIST = [
+    lower, upper, name, names, get_type, field_contains, field_equals, field_regex, has_field,
+]
+
+
+def resolve_attr_path(node):
+    """Resolve a node attribute to full path, eg: net.ipv4.Subnet."""
+    x = node.func
+    attr_path = []
+    while isinstance(x, ast.Attribute):
+        attr_path.append(x.attr)
+        x = x.value
+    if isinstance(x, ast.Name):
+        attr_path.append(x.id)
+    return '.'.join(reversed(attr_path))
+
+
+class SelectorResult:
+
+    def __init__(self, expression_str, match_result, backtrace, referenced_fields):
+        self.expresssion_str = expression_str
+        self.result = match_result
+        self.backtrace_info = backtrace
+        self.referenced_fields = referenced_fields
+
+    def backtrace(self):
+        result = u""
+        max_source_line_length = len(self.expresssion_str)
+        for row in self.backtrace_info[::-1]:
+            result += u"{}-> {}\n".format(
+                row[0].rstrip().ljust(max_source_line_length + 15),
+                row[1])
+        return result
+
+
+class Selector:
+    VERBOSITY_ALL = 1
+    VERBOSITY_BRANCHES = 2
+    VERBOSITY_NONE = 3
+
+    def __init__(self, expression):
+        expression = expression or "True"
+        self.expression_str = expression
+        self.expression = compile(
+            source=expression,
+            filename="<code>",
+            mode="eval",
+            flags=ast.PyCF_ONLY_AST | __future__.unicode_literals.compiler_flag,
+        )
+        self.matcher = None
+
+    def __str__(self):
+        return self.expression_str
+
+    def __repr__(self):
+        return 'Selector({!r})'.format(self.expression_str)
+
+    def __contains__(self, record):
+        return self.match(record)
+
+    def explain_selector(self, record, verbosity=VERBOSITY_ALL):
+        matcher = RecordContextMatcher(self.expression, self.expression_str, backtrace_verbosity=verbosity)
+        match_result = matcher.matches(record)
+        backtrace_info = matcher.selector_backtrace
+        if not HAVE_ASTOR:
+            backtrace_info.append(("WARNING: astor module not installed, trace not available", False))
+        return SelectorResult(self.expression_str, match_result, backtrace_info, [])
+
+    def match(self, record):
+        if not self.matcher:
+            self.matcher = RecordContextMatcher(self.expression, self.expression_str)
+
+        result = self.matcher.matches(record)
+        return result
+
+
+class WrappedRecord:
+    """WrappedRecord wraps a Record but will return a NoneObject for non existing attributes."""
+
+    __slots__ = ("record", )
+
+    def __init__(self, record):
+        self.record = record
+
+    def __getattr__(self, k):
+        return getattr(self.record, k, NONE_OBJECT)
+
+
+class CompiledSelector:
+    """CompiledSelector is faster than Selector but unsafe if you don't trust the query."""
+
+    def __init__(self, expression):
+        self.expression = expression or None
+        self.code = None
+        self.ns = {func.__name__: func for func in FUNCTION_WHITELIST}
+        self.ns["net"] = net
+
+        if expression:
+            self.code = compile(
+                source=expression,
+                filename="<code>",
+                mode="eval",
+                flags=__future__.unicode_literals.compiler_flag,
+            )
+
+    def __str__(self):
+        return self.expression
+
+    def __repr__(self):
+        return 'CompiledSelector({!r})'.format(self.expression)
+
+    def __contains__(self, record):
+        return self.match(record)
+
+    def match(self, record):
+        if self.code is None:
+            return True
+        ns = self.ns.copy()
+        ns.update({
+            "r": WrappedRecord(record),
+            "Type": TypeMatcher(record),
+        })
+        return eval(self.code, ns)
+
+
+class TypeMatcher:
+    """
+    Helper to get and check fields of a certain type.
+
+    Types can be selected using `Type.<typename>`. Attributes can be selected
+    using `Type.<typename>.<attribute>`.
+
+    For example `Type.uri.filename` will retrieve all the filenames from all
+    uri's in a record.
+
+    These selectors can also still be used in other helper functions, as
+    they will unwrap to resulting fieldnames. So for example, you can still
+    do `field_contains(r, Type.string, ['something'])`, which will check
+    all `string` fields.
+
+    Membership tests also work. `'something' in Type.string` will perform
+    a membership test in each string value and return True if there are any.
+
+    Reverse membership tests are trickier, and only work with a non-compiled
+    Selector. For example, `Type.net.ipv4.Address in net.ipv4.Subnet('10.0.0.0/8')`
+    requires the TypeMatcher to unroll its values, which is only possible
+    when overriding this behaviour.
+    """
+
+    def __init__(self, rec):
+        self._rec = rec
+
+    def __getattr__(self, attr):
+        if attr in WHITELIST_TREE:
+            return TypeMatcherInstance(self._rec, [attr])
+
+        return NONE_OBJECT
+
+
+class TypeMatcherInstance:
+
+    def __init__(self, rec, ftypeparts=None, attrs=None):
+        self._rec = rec
+        self._ftypeparts = ftypeparts or []
+        self._attrs = attrs or []
+
+        self._ftype = None
+        self._ftypetree = WHITELIST_TREE
+        for p in ftypeparts:
+            self._ftypetree = self._ftypetree[p]
+
+        if self._ftypetree is True:
+            self._ftype = '.'.join(ftypeparts)
+
+    def __getattr__(self, attr):
+        if not self._ftype:
+            if attr not in self._ftypetree:
+                return NONE_OBJECT
+
+            ftypeparts = self._ftypeparts + [attr]
+            return TypeMatcherInstance(self._rec, ftypeparts)
+        elif not attr.startswith('_'):
+            attrs = self._attrs + [attr]
+            return TypeMatcherInstance(self._rec, self._ftypeparts, attrs)
+
+        return NONE_OBJECT
+
+    def __iter__(self):
+        return self._fields()
+
+    def _fields(self):
+        for f in self._rec._desc.getfields(self._ftype):
+            yield f.name
+
+    def _values(self):
+        for f in self._fields():
+            obj = getattr(self._rec, f, NONE_OBJECT)
+            for a in self._attrs:
+                obj = getattr(obj, a, NONE_OBJECT)
+
+            if obj is NONE_OBJECT:
+                continue
+
+            yield obj
+
+    def _subrecords(self):
+        """Return all fields that are records (records in records).
+
+        Returns: list of records
+        """
+        fields = self._rec._desc.getfields("record")
+        for f in fields:
+            r = getattr(self._rec, f.name)
+            if r is not None:
+                yield r
+
+        fields = self._rec._desc.getfields("record[]")
+        for f in fields:
+            records = getattr(self._rec, f.name)
+            if records is not None:
+                for r in records:
+                    yield r
+
+    def _op(self, op, other):
+        for v in self._values():
+            if op(v, other):
+                return True
+
+        subrecords = self._subrecords()
+        for record in subrecords:
+            type_matcher = TypeMatcherInstance(record, self._ftypeparts, self._attrs)
+            if type_matcher._op(op, other):
+                return True
+
+        return False
+
+    def __eq__(self, other):
+        return self._op(operator.eq, other)
+
+    def __ne__(self, other):
+        return self._op(operator.ne, other)
+
+    def __lt__(self, other):
+        return self._op(operator.lt, other)
+
+    def __gt__(self, other):
+        return self._op(operator.gt, other)
+
+    def __lte__(self, other):
+        return self._op(operator.le, other)
+
+    def __gte__(self, other):
+        return self._op(operator.ge, other)
+
+    def __noteq__(self, other):
+        return self._op(operator.ne, other)
+
+    def __contains__(self, other):
+        return self._op(operator.contains, other)
+
+
+class RecordContextMatcher:
+
+    def __init__(self, expr, expr_str, backtrace_verbosity=Selector.VERBOSITY_NONE):
+        self.expression = expr
+        self.expression_str = expr_str
+        self.selector_backtrace = []
+        self.selector_backtrace_verbosity = backtrace_verbosity
+        self.data = {}
+        self.rec = None
+
+    def matches(self, rec):
+        self.selector_backtrace = []
+        self.data = {
+            "None": None,
+            "True": True,
+            "False": False,
+            "str": str,
+            "fields": rec._desc.getfields,
+            "any": any,
+            "all": all,
+        }
+
+        # Add whitelisted functions to global dict
+        self.data.update({
+            func.__name__: func for func in FUNCTION_WHITELIST
+        })
+
+        self.data["r"] = rec
+        self.rec = rec
+
+        # This ensures backwards compatibility with old Selector queries
+        self.data["obj"] = rec
+
+        # Type matcher
+        self.data["Type"] = TypeMatcher(rec)
+
+        return self.eval(self.expression.body)
+
+    def eval(self, node):
+        r = self._eval(node)
+        verbosity = self.selector_backtrace_verbosity
+        log_trace = (
+            (verbosity == Selector.VERBOSITY_ALL) or
+            (verbosity == Selector.VERBOSITY_BRANCHES and isinstance(node, (ast.Compare, ast.BoolOp)))
+        )
+        if log_trace and HAVE_ASTOR:
+            source_line = astor.to_source(node)
+            self.selector_backtrace.append((source_line, r))
+        return r
+
+    def _eval(self, node):
+        if isinstance(node, ast.Num):
+            return node.n
+        elif isinstance(node, AST_NODE_S_TYPES):
+            return node.s
+        elif isinstance(node, AST_NODE_VALUE_TYPES):
+            return node.value
+        elif isinstance(node, ast.List):
+            return list(map(self.eval, node.elts))
+        elif isinstance(node, ast.Tuple):
+            return tuple(map(self.eval, node.elts))
+        elif isinstance(node, ast.Name):
+            if node.id not in self.data:
+                return getattr(dynamic_fieldtype, node.id)
+
+            return self.data[node.id]
+        elif isinstance(node, ast.Attribute):
+            if node.attr.startswith('__'):
+                raise InvalidOperation(
+                    "Selector {!r} contains invalid attribute: {!r}".format(
+                        self.expression_str, node.attr))
+
+            obj = self.eval(node.value)
+
+            return getattr(obj, node.attr, NONE_OBJECT)
+        elif isinstance(node, ast.BoolOp):
+            values = []
+            for expr in node.values:
+                try:
+                    value = self.eval(expr)
+                except TypeError as e:
+                    if 'NoneType' in str(e):
+                        value = False
+                    else:
+                        raise
+                value = bool(value)
+                values.append(value)
+            result = values.pop(0)
+            for value in values:
+                result = AST_OPERATORS[type(node.op)](result, value)
+            return result
+        elif isinstance(node, ast.BinOp):
+            left = self.eval(node.left)
+            right = self.eval(node.right)
+            if isinstance(left, NoneObject) or isinstance(right, NoneObject):
+                return False
+            return AST_OPERATORS[type(node.op)](left, right)
+        elif isinstance(node, ast.UnaryOp):
+            return AST_OPERATORS[type(node.op)](self.eval(node.operand))
+        elif isinstance(node, ast.Compare):
+            left = self.eval(node.left)
+            right = self.eval(node.comparators[0])
+
+            # print [AST_COMPARATORS[type(node.ops[0])](getattr(self.rec, l.name), right) for l in left]
+            # return [AST_COMPARATORS[type(node.ops[0])](getattr(self.rec, l.name), right) for l in left]
+
+            comptype = type(node.ops[0])
+            comp = AST_COMPARATORS[comptype]
+
+            # Special case for __contains__, where we need to first unwrap all values matching the Type query
+            if comptype in (ast.In, ast.NotIn) and isinstance(left, TypeMatcherInstance):
+                for v in left._values():
+                    if comp(v, right):
+                        return True
+                return False
+            return comp(left, right)
+        elif isinstance(node, ast.Call):
+            if not isinstance(node.func, (ast.Attribute, ast.Name)):
+                raise InvalidOperation("Error, only ast.Attribute or ast.Name are expected")
+
+            func_name = resolve_attr_path(node)
+            if not (callable(self.data.get(func_name)) or func_name in WHITELIST):
+                raise InvalidOperation(
+                    "Call '{}' not allowed. No calls other then whitelisted 'global' calls allowed!".format(
+                        func_name))
+
+            func = self.eval(node.func)
+
+            args = list(map(self.eval, node.args))
+            kwargs = dict((kw.arg, self.eval(kw.value)) for kw in node.keywords)
+
+            return func(*args, **kwargs)
+
+        elif isinstance(node, ast.comprehension):
+            iter = self.eval(node.iter)
+            return iter
+
+        elif isinstance(node, ast.GeneratorExp):
+            def recursive_generator(gens):
+                """
+                Yield all the values in the most deepest generator.
+
+                Example:
+                [ord(c) for line in file for c in line]
+                This function would yield all c values for this expression
+
+                Args:
+                    gens: A list of generator/ comprehension objects
+
+                Returns:
+                    Generator
+                """
+                gens = list(gens)
+                gen = gens.pop()
+                loop_index_var_name = gen.target.id
+                resolved_gen = self.eval(gen)
+                if resolved_gen is not NONE_OBJECT:
+                    for val in resolved_gen:
+                        self.data[loop_index_var_name] = val
+                        if len(gens) > 0:
+                            for subval in recursive_generator(gens):
+                                yield subval
+                        else:
+                            yield val
+
+            def generator_expr():
+                """
+                Embedded generator logic for ast.GeneratorExp.
+
+                A function can't yield and return so we write nested generator function and return that.
+
+                Returns:
+                    yields evaluated generator expression values
+
+                """
+                for gen in node.generators:
+                    if gen.target.id in self.data:
+                        raise InvalidOperation(
+                            "Generator variable '{}' overwrites existing variable!".format(
+                                gen.target.id))
+                values = recursive_generator(node.generators[::-1])
+                for val in values:
+                    result = self.eval(node.elt)
+                    yield result
+            return generator_expr()
+
+        raise TypeError(node)
+
+
+def make_selector(selector, force_compiled=False):
+    """Return a Selector object (either CompiledSelector or Selector)."""
+    ret = selector
+    if not selector:
+        ret = None
+    elif isinstance(selector, string_types):
+        ret = CompiledSelector(selector) if force_compiled else Selector(selector)
+    elif isinstance(selector, Selector):
+        if force_compiled:
+            ret = CompiledSelector(selector.expression_str)
+    return ret
diff --git a/flow/record/stream.py b/flow/record/stream.py
new file mode 100644
index 0000000..5723aec
--- /dev/null
+++ b/flow/record/stream.py
@@ -0,0 +1,293 @@
+from __future__ import print_function
+
+import os
+import sys
+import struct
+import logging
+import datetime
+from functools import lru_cache
+from collections import ChainMap
+
+from .base import RecordDescriptor, RecordReader
+from .packer import RecordPacker
+from flow.record import RecordWriter
+from flow.record.selector import make_selector
+from flow.record.fieldtypes import fieldtype_for_value
+
+
+log = logging.getLogger(__package__)
+
+RECORDSTREAM_MAGIC = b"RECORDSTREAM\n"
+
+
+def RecordOutput(fp):
+    """Return a RecordPrinter if `fp` is a tty otherwise a RecordStreamWriter."""
+    if hasattr(fp, "isatty") and fp.isatty():
+        return RecordPrinter(fp)
+    return RecordStreamWriter(fp)
+
+
+class RecordPrinter:
+    """Records are printed as textual representation (repr) to fp."""
+
+    fp = None
+
+    def __init__(self, fp, flush=True):
+        self.fp = fp
+        self.auto_flush = flush
+
+    def write(self, obj):
+        buf = repr(obj).encode() + b"\n"
+        self.fp.write(buf)
+        if self.auto_flush:
+            self.flush()
+
+    def flush(self):
+        self.fp.flush()
+
+    def close(self):
+        pass
+
+
+class RecordStreamWriter:
+    """Records are written as binary (serialized) to fp."""
+
+    fp = None
+    packer = None
+
+    def __init__(self, fp):
+        self.fp = fp
+        self.packer = RecordPacker()
+        self.packer.on_descriptor.add_handler(self.on_new_descriptor)
+        self.header_written = False
+
+    def __del__(self):
+        self.close()
+
+    def on_new_descriptor(self, descriptor):
+        self.write(descriptor)
+
+    def close(self):
+        if self.fp and self.fp != getattr(sys.stdout, "buffer", sys.stdout):
+            self.fp.close()
+            self.fp = None
+
+    def flush(self):
+        if not self.header_written:
+            self.writeheader()
+
+    def write(self, obj):
+        if not self.header_written:
+            self.writeheader()
+        blob = self.packer.pack(obj)
+        self.fp.write(struct.pack(">I", len(blob)))
+        self.fp.write(blob)
+
+    def writeheader(self):
+        self.header_written = True
+        self.write(RECORDSTREAM_MAGIC)
+
+
+class RecordStreamReader:
+    fp = None
+    recordtype = None
+    descs = None
+    packer = None
+
+    def __init__(self, fp, selector=None):
+        self.fp = fp
+        self.closed = False
+        self.selector = make_selector(selector)
+        self.packer = RecordPacker()
+        self.readheader()
+
+    def readheader(self):
+        # Manually read the msgpack format to avoid unserializing invalid data
+        # we read size (4) + msgpack type (2) + msgpack bytes (recordstream magic)
+        header = self.fp.read(4 + 2 + len(RECORDSTREAM_MAGIC))
+        if not header.endswith(RECORDSTREAM_MAGIC):
+            raise IOError("Unknown file format, not a RecordStream")
+
+    def read(self):
+        d = self.fp.read(4)
+        if len(d) != 4:
+            raise EOFError()
+
+        size = struct.unpack(">I", d)[0]
+        d = self.fp.read(size)
+        return self.packer.unpack(d)
+
+    def close(self):
+        self.closed = True
+
+    def __iter__(self):
+        try:
+            while not self.closed:
+                obj = self.read()
+                if obj == RECORDSTREAM_MAGIC:
+                    continue
+                if isinstance(obj, RecordDescriptor):
+                    self.packer.register(obj)
+                else:
+                    if not self.selector or self.selector.match(obj):
+                        yield obj
+        except EOFError:
+            pass
+
+
+def record_stream(sources, selector=None):
+    """Return a Record stream generator from the given Record sources.
+
+    Exceptions in a Record source will be caught so the stream is not interrupted.
+    """
+    log.debug("Record stream with selector: {!r}".format(selector))
+    for src in sources:
+        # Inform user that we are reading from stdin
+        if src in ("-", ""):
+            print("[reading from stdin]", file=sys.stderr)
+
+        # Initial value for reader, in case of exception message
+        reader = "RecordReader"
+        try:
+            reader = RecordReader(src, selector=selector)
+            for rec in reader:
+                yield rec
+            reader.close()
+        except IOError as e:
+            log.error("{}({!r}): {}".format(reader, src, e))
+        except KeyboardInterrupt:
+            raise
+        except Exception as e:  # noqa: B902
+            log.warning(
+                "Exception in {!r} for {!r}: {!r} -- skipping to next reader".format(
+                    reader, src, e))
+            continue
+
+
+class PathTemplateWriter:
+    """Write records to a path on disk, path can be a template string.
+
+    This allows for archiving records on disk based on timestamp for example.
+
+    Default template string is:
+
+        '{name}-{record._generated:%Y%m%dT%H}.records.gz'
+
+    Available template fields:
+
+    `name` defaults to "records", but can be overridden in the initializer.
+    `record` is the record object
+    `ts` is record._generated
+
+    If the destination path already exists it will rename the existing file using the current datetime.
+    """
+
+    DEFAULT_TEMPLATE = '{name}-{record._generated:%Y%m%dT%H}.records.gz'
+
+    def __init__(self, path_template=None, name=None):
+        self.path_template = path_template or self.DEFAULT_TEMPLATE
+        self.name = name or "records"
+        self.current_path = None
+        self.writer = None
+        self.stream = None
+
+    def rotate_existing_file(self, path):
+        if os.path.exists(path):
+            now = datetime.datetime.utcnow()
+            src = os.path.realpath(path)
+
+            src_dir = os.path.dirname(src)
+            src_fname = os.path.basename(src)
+
+            # stamp will be part of new filename to denote rotation stamp
+            stamp = '{now:%Y%m%dT%H%M%S}'.format(now=now)
+
+            # Use "records.gz" as the extension if we have this naming convention
+            if src_fname.endswith('.records.gz'):
+                fname, _ = src_fname.rsplit('.records.gz', 1)
+                ext = "records.gz"
+            else:
+                fname, ext = os.path.splitext(src_fname)
+
+            # insert the rotation stamp into the new filename.
+            dst = os.path.join(src_dir, '{fname}.{stamp}.{ext}'.format(**locals()))
+            log.info('RENAME {!r} -> {!r}'.format(src, dst))
+            os.rename(src, dst)
+
+    def record_stream_for_path(self, path):
+        if self.current_path != path:
+            self.current_path = path
+            log.info('Writing records to {!r}'.format(path))
+            self.rotate_existing_file(path)
+            dst_dir = os.path.dirname(path)
+            if not os.path.exists(dst_dir):
+                os.makedirs(dst_dir)
+            rs = RecordWriter(path)
+            self.close()
+            self.writer = rs
+        return self.writer
+
+    def write(self, record):
+        ts = record._generated or datetime.datetime.utcnow()
+        path = self.path_template.format(name=self.name, record=record, ts=ts)
+        rs = self.record_stream_for_path(path)
+        rs.write(record)
+        rs.fp.flush()
+
+    def close(self):
+        if self.writer:
+            self.writer.close()
+
+
+class RecordArchiver(PathTemplateWriter):
+    """RecordWriter that writes/archives records to a path with YYYY/mm/dd."""
+
+    def __init__(self, archive_path, path_template=None, name=None):
+        path_template = path_template or self.DEFAULT_TEMPLATE
+        template = os.path.join(str(archive_path), "{ts:%Y/%m/%d}", path_template)
+        PathTemplateWriter.__init__(self, path_template=template, name=name)
+
+
+class RecordFieldRewriter:
+    """Rewrite records using a new RecordDescriptor for chosen fields and/or excluded or new record fields."""
+
+    def __init__(self, fields=None, exclude=None, expression=None):
+        self.fields = fields or []
+        self.exclude = exclude or []
+        self.expression = compile(expression, '<string>', 'exec') if expression else None
+
+    @lru_cache(maxsize=256)
+    def record_descriptor_for_fields(self, descriptor, fields=None, exclude=None, new_fields=None):
+        if not fields and not exclude and not new_fields:
+            return descriptor
+        exclude = exclude or []
+        desc_fields = []
+        if fields:
+            for fname in fields:
+                if fname in exclude:
+                    continue
+                field = descriptor.fields.get(fname, None)
+                if field:
+                    desc_fields.append((field.typename, field.name))
+        else:
+            desc_fields = [(ftype, fname) for (ftype, fname) in descriptor.get_field_tuples() if fname not in exclude]
+        if new_fields:
+            desc_fields.extend(new_fields)
+        return RecordDescriptor(descriptor.name, desc_fields)
+
+    def rewrite(self, record):
+        if not self.fields and not self.exclude and not self.expression:
+            return record
+
+        local_dict = {}
+        new_fields = []
+        if self.expression:
+            exec(self.expression, record._asdict(), local_dict)
+            # convert new variables to new record fields (field type is derived from value)
+            new_fields = [(fieldtype_for_value(val, "string"), key) for key, val in local_dict.items()]
+
+        RewriteRecord = self.record_descriptor_for_fields(
+            record._desc, tuple(self.fields), tuple(self.exclude), tuple(new_fields)
+        )
+        # give new variables precendence
+        return RewriteRecord.init_from_dict(ChainMap(local_dict, record._asdict()))
diff --git a/flow/record/tools/__init__.py b/flow/record/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/flow/record/tools/geoip.py b/flow/record/tools/geoip.py
new file mode 100644
index 0000000..3a940fa
--- /dev/null
+++ b/flow/record/tools/geoip.py
@@ -0,0 +1,194 @@
+# Python imports
+import re
+import sys
+import random
+import argparse
+import logging
+
+# Flow imports
+from flow.record.utils import catch_sigpipe
+from flow.record import (
+    RecordDescriptor,
+    RecordWriter,
+    record_stream,
+    extend_record,
+)
+
+# Third party imports
+import maxminddb
+
+
+logger = logging.getLogger(__name__)
+
+IPv4Record = RecordDescriptor(
+    "geo/ipv4",
+    [
+        ("net.ipaddress", "ip"),
+    ],
+)
+
+GeoRecord = RecordDescriptor(
+    "maxmind/geo",
+    [
+        ("string", "country"),
+        ("string", "country_code"),
+        ("string", "city"),
+        ("float", "longitude"),
+        ("float", "latitude"),
+    ],
+)
+
+AsnRecord = RecordDescriptor(
+    "maxmind/asn",
+    [
+        ("string", "asn"),
+        ("string", "org"),
+    ],
+)
+
+DEFAULT_CITY_DB = "/usr/share/GeoIP/GeoLite2-City.mmdb"
+DEFAULT_ASN_DB = "/usr/share/GeoIP/GeoLite2-ASN.mmdb"
+REGEX_IPV4 = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
+
+def georecord_for_ip(city_db, ip):
+    r = city_db.get(ip) if city_db else None
+    if not r:
+        return GeoRecord()
+
+    loc_dict = r.get("location", {})
+    country_dict = r.get("country", {})
+    city_dict = r.get("city", {})
+
+    country = country_dict.get("names", {}).get("en")
+    country_code = country_dict.get("iso_code")
+    city = city_dict.get("names", {}).get("en")
+    lon = loc_dict.get("longitude")
+    lat = loc_dict.get("latitude")
+
+    return GeoRecord(
+        country=country,
+        country_code=country_code,
+        city=city,
+        longitude=lon,
+        latitude=lat,
+    )
+
+
+def asnrecord_for_ip(asn_db, ip):
+    r = asn_db.get(ip) if asn_db else None
+    if not r:
+        return AsnRecord()
+    asn = r.get("autonomous_system_number", None)
+    org = r.get("autonomous_system_organization", None)
+    return AsnRecord(asn=asn, org=org)
+
+
+def ip_records_from_text_files(files):
+    """Yield IPv4Records by extracting IP addresses from `files` using a regex."""
+    for fname in files:
+        with open(fname, "r") if fname != "-" else sys.stdin as f:
+            for line in f:
+                for ip in REGEX_IPV4.findall(line):
+                    yield IPv4Record(ip)
+
+
+@catch_sigpipe
+def main():
+    parser = argparse.ArgumentParser(
+        description="Annotate records with GeoIP and ASN data",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-c", "--city-db", default=DEFAULT_CITY_DB, help="path to GeoIP city database"
+    )
+    parser.add_argument(
+        "-a", "--asn-db", default=DEFAULT_ASN_DB, help="path to GeoIP ASN database"
+    )
+    parser.add_argument(
+        "-i",
+        "--ip-field",
+        metavar="FIELD",
+        default="ip",
+        help="the source record field to use for lookups",
+    )
+    parser.add_argument(
+        "-w",
+        "--writer",
+        metavar="OUTPUT",
+        default="-",
+        help="write records to output",
+    )
+    parser.add_argument("input", nargs="*", default=["-"], help="input files")
+    parser.add_argument(
+        "-t",
+        "--text",
+        action="store_true",
+        help="treats input as text and extract IPv4 Records using regex",
+    )
+
+    # Hidden options
+    parser.add_argument(
+        "-m", "--mode", type=int, default=maxminddb.MODE_AUTO, help=argparse.SUPPRESS
+    )
+    parser.add_argument("-g", "--generate", action="store_true", help=argparse.SUPPRESS)
+    args = parser.parse_args()
+
+    if args.generate:
+        with RecordWriter() as writer:
+            while True:
+                record = IPv4Record(random.randint(0, 0xFFFFFFFF))
+                writer.write(record)
+
+    if args.mode:
+        logger.warning("MODE: %u", args.mode)
+
+    try:
+        city_db = maxminddb.open_database(args.city_db, args.mode)
+    except FileNotFoundError:
+        logger.warning(
+            "[*] Disabled Geo record annotation. (database not found: %r)",
+            args.city_db,
+        )
+        city_db = None
+
+    try:
+        asn_db = maxminddb.open_database(args.asn_db, args.mode)
+    except FileNotFoundError:
+        logger.warning(
+            "[*] Disabled ASN record annotation. (database not found: %r)", args.asn_db
+        )
+        asn_db = None
+
+    if not any([city_db, asn_db]) and not args.text:
+        print(
+            "[!] Both City and ASN database not available. Nothing to annotate, exiting..",
+            file=sys.stderr,
+        )
+        return 1
+
+    if args.text:
+        # Input are text files, extract IPv4Records from text using a regex
+        record_iterator = ip_records_from_text_files(args.input)
+    else:
+        # Input are Record files
+        record_iterator = record_stream(args.input)
+
+    with RecordWriter(args.writer) as writer:
+        for record in record_iterator:
+            ip = getattr(record, args.ip_field, None)
+
+            annotated_records = []
+            if city_db:
+                geo_record = georecord_for_ip(city_db, str(ip)) if ip else GeoRecord()
+                annotated_records.append(geo_record)
+            if asn_db:
+                asn_record = asnrecord_for_ip(asn_db, str(ip)) if ip else AsnRecord()
+                annotated_records.append(asn_record)
+
+            record = extend_record(record, annotated_records)
+            writer.write(record)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/flow/record/tools/rdump.py b/flow/record/tools/rdump.py
new file mode 100644
index 0000000..3d550e9
--- /dev/null
+++ b/flow/record/tools/rdump.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import sys
+import logging
+
+from flow.record import RecordWriter, record_stream
+from flow.record.stream import RecordFieldRewriter
+from flow.record.selector import make_selector
+from flow.record.utils import catch_sigpipe
+
+try:
+    from flow.record.version import version
+except ImportError:
+    version = "unknown"
+
+log = logging.getLogger(__name__)
+
+try:
+    # Python 2
+    import urlparse
+    from urllib import urlencode
+except ImportError:
+    # Python 3
+    import urllib.parse as urlparse
+    from urllib.parse import urlencode
+
+
+@catch_sigpipe
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="Record dumper, a tool that can read, write and filter records",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        '--version', action='version', version="flow.record version {}".format(version))
+    parser.add_argument(
+        'src', metavar='SOURCE', nargs='*', default=['-'],
+        help='Record source')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Increase verbosity')
+
+    misc = parser.add_argument_group("miscellaneous")
+    misc.add_argument(
+        '-l', '--list', action='store_true',
+        help='List unique Record Descriptors')
+    misc.add_argument(
+        '-n', '--no-compile', action='store_true',
+        help="Don't use a compiled selector (safer, but slower)")
+    misc.add_argument(
+        '--record-source', default=None,
+        help='Overwrite the record source field')
+    misc.add_argument(
+        '--record-classification', default=None,
+        help='Overwrite the record classification field')
+
+    selection = parser.add_argument_group('selection')
+    selection.add_argument(
+        '-F', '--fields', metavar='FIELDS',
+        help='Fields (comma seperated) to output in dumping')
+    selection.add_argument(
+        '-X', '--exclude', metavar='FIELDS',
+        help='Fields (comma seperated) to exclude in dumping')
+    selection.add_argument(
+        '-s', '--selector', metavar='SELECTOR', default=None,
+        help='Only output records matching Selector')
+
+    output = parser.add_argument_group('output control')
+    output.add_argument(
+        '-f', '--format', metavar='FORMAT',
+        help='Format string')
+    output.add_argument(
+        '-c', '--count', type=int,
+        help='Exit after COUNT records')
+    output.add_argument(
+        '-w', '--writer', metavar='OUTPUT', default=None,
+        help='Write records to output')
+    output.add_argument(
+        '-m', '--mode', default=None, choices=("csv", "json", "jsonlines", "line"),
+        help='Output mode')
+
+    advanced = parser.add_argument_group('advanced')
+    advanced.add_argument(
+        '-E', "--exec-expression",
+        help="execute a (Python) expression for each record AFTER selector matching, can be used to assign new fields")
+
+    aliases = parser.add_argument_group('aliases')
+    aliases.add_argument(
+        '-j', '--json', action='store_const', const='json', dest='mode',
+        default=argparse.SUPPRESS,
+        help='Short for --mode=json')
+    aliases.add_argument(
+        '-J', '--jsonlines', action='store_const', const='jsonlines', dest='mode',
+        default=argparse.SUPPRESS,
+        help='Short for --mode=jsonlines')
+    aliases.add_argument(
+        '-C', '--csv', action='store_const', const='csv', dest='mode',
+        default=argparse.SUPPRESS,
+        help='Short for --mode=csv')
+    aliases.add_argument(
+        "-L", "--line", action='store_const', const='line', dest='mode',
+        default=argparse.SUPPRESS,
+        help='Short for --mode=line')
+
+    args = parser.parse_args()
+
+    levels = [logging.WARNING, logging.INFO, logging.DEBUG]
+    level = levels[min(len(levels) - 1, args.verbose)]
+    logging.basicConfig(level=level, format="%(asctime)s %(levelname)s %(message)s")
+
+    fields_to_exclude = args.exclude.split(",") if args.exclude else []
+    fields = args.fields.split(",") if args.fields else []
+
+    uri = args.writer or "text://"
+    if not args.writer:
+        mode_to_uri = {
+            "csv": "csvfile://",
+            "json": "jsonfile://?indent=2",
+            "jsonlines": "jsonfile://",
+            "line": "line://",
+        }
+        uri = mode_to_uri.get(args.mode, uri)
+        qparams = {
+            "fields": args.fields,
+            "exclude": args.exclude,
+            "format_spec": args.format,
+        }
+        query = urlencode({k: v for k, v in qparams.items() if v})
+        uri += "&" if urlparse.urlparse(uri).query else "?" + query
+
+    record_field_rewriter = None
+    if fields or fields_to_exclude or args.exec_expression:
+        record_field_rewriter = RecordFieldRewriter(fields, fields_to_exclude, args.exec_expression)
+
+    selector = make_selector(args.selector, not args.no_compile)
+    seen_desc = set()
+    count = 0
+    with RecordWriter(uri) as record_writer:
+        for count, rec in enumerate(record_stream(args.src, selector)):
+            if args.count and count >= args.count:
+                break
+
+            if args.record_source is not None:
+                rec._source = args.record_source
+            if args.record_classification is not None:
+                rec._classification = args.record_classification
+            if record_field_rewriter:
+                rec = record_field_rewriter.rewrite(rec)
+
+            # Dump RecordDescriptors
+            if args.list:
+                desc = rec._desc
+                if desc.descriptor_hash not in seen_desc:
+                    seen_desc.add(desc.descriptor_hash)
+                    print("# {}".format(desc))
+                    print(desc.definition())
+                    print()
+                continue
+
+            record_writer.write(rec)
+
+    if args.list:
+        print("Processed {} records".format(count))
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/flow/record/utils.py b/flow/record/utils.py
new file mode 100644
index 0000000..bffccc0
--- /dev/null
+++ b/flow/record/utils.py
@@ -0,0 +1,87 @@
+import os
+import sys
+import base64
+from functools import wraps
+
+_native = str
+_unicode = type(u'')
+_bytes = type(b'')
+
+
+def is_stdout(fp):
+    return fp == getattr(sys.stdout, "buffer", sys.stdout)
+
+
+def to_bytes(value):
+    """Convert a value to a byte string."""
+    if value is None or isinstance(value, _bytes):
+        return value
+    if isinstance(value, _unicode):
+        return value.encode("utf-8")
+    return _bytes(value)
+
+
+def to_str(value):
+    """Convert a value to a unicode string."""
+    if value is None or isinstance(value, _unicode):
+        return value
+    if isinstance(value, _bytes):
+        return value.decode("utf-8")
+    return _unicode(value)
+
+
+def to_native_str(value):
+    """Convert a value to a native `str`."""
+    if value is None or isinstance(value, _native):
+        return value
+    if isinstance(value, _unicode):
+        # Python 2: unicode -> str
+        return value.encode("utf-8")
+    if isinstance(value, _bytes):
+        # Python 3: bytes -> str
+        return value.decode("utf-8")
+    return _native(value)
+
+
+def to_base64(value):
+    """Convert a value to a base64 string."""
+    return base64.b64encode(value).decode()
+
+
+def catch_sigpipe(func):
+    """Catches KeyboardInterrupt and BrokenPipeError (OSError 22 on Windows)."""
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except KeyboardInterrupt:
+            print("Aborted!", file=sys.stderr)
+            return 1
+        except (BrokenPipeError, OSError) as e:
+            exc_type = type(e)
+            # Only catch BrokenPipeError or OSError 22
+            if (exc_type is BrokenPipeError) or (exc_type is OSError and e.errno == 22):
+                devnull = os.open(os.devnull, os.O_WRONLY)
+                os.dup2(devnull, sys.stdout.fileno())
+                return 1
+            # Raise other exceptions
+            raise
+
+    return wrapper
+
+
+class EventHandler:
+
+    def __init__(self):
+        self.handlers = []
+
+    def add_handler(self, callback):
+        self.handlers.append(callback)
+
+    def remove_handler(self, callback):
+        self.handlers.remove(callback)
+
+    def __call__(self, *args, **kwargs):
+        for h in self.handlers:
+            h(*args, **kwargs)
diff --git a/flow/record/whitelist.py b/flow/record/whitelist.py
new file mode 100644
index 0000000..dee0add
--- /dev/null
+++ b/flow/record/whitelist.py
@@ -0,0 +1,40 @@
+WHITELIST = [
+    "boolean",
+    "dynamic",
+    "datetime",
+    "filesize",
+    "uint16",
+    "uint32",
+    "float",
+    "string",
+    "stringlist",
+    "dictlist",
+    "unix_file_mode",
+    "varint",
+    "wstring",
+    "net.ipv4.Address",
+    "net.ipv4.Subnet",
+    "net.tcp.Port",
+    "net.udp.Port",
+    "uri",
+    "digest",
+    "bytes",
+    "record",
+    "net.ipaddress",
+    "net.ipnetwork",
+    "net.IPAddress",
+    "net.IPNetwork",
+]
+
+
+WHITELIST_TREE = {}
+for field in WHITELIST:
+    parent = None
+    obj = WHITELIST_TREE
+    for part in field.split('.'):
+        if part not in obj:
+            obj[part] = {}
+        parent = obj
+        obj = obj[part]
+
+    parent[part] = True
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6d6687e
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = ["setuptools>=43.0.0", "wheel", "setuptools_scm[toml]>=3.4.1"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+write_to = "flow/record/version.py"
+
+[tool.black]
+line-length = 120
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..34ae005
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,9 @@
+[metadata]
+author = Dissect Team
+author_email = dissect@fox-it.com
+url = https://github.com/fox-it/flow.record
+license = Affero General Public License v3
+long_description = file: README.md
+license_files = LICENSE, COPYRIGHT
+classifiers =
+  Programming Language :: Python :: 3
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..4b3a22f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='flow.record',
+    packages=['flow.' + v for v in find_packages('flow')],
+    install_requires=[
+        'msgpack>=0.5.2',
+    ],
+    extras_require={
+        # Note: these compression libraries do not work well with pypy
+        'compression': [
+           'lz4',
+           'zstandard',
+        ],
+    },
+    namespace_packages=['flow'],
+    entry_points={
+        'console_scripts': [
+            'r=flow.record.tools.r:main',
+            'rdd=flow.record.tools.rdd:main',
+            'rselect=flow.record.tools.rselect:main',
+            'rdump=flow.record.tools.rdump:main',
+            'rgeoip=flow.record.tools.geoip:main',
+        ],
+    },
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/selector_explain_example.py b/tests/selector_explain_example.py
new file mode 100644
index 0000000..9520b4e
--- /dev/null
+++ b/tests/selector_explain_example.py
@@ -0,0 +1,32 @@
+from flow.record.selector import Selector
+from flow.record import RecordDescriptor
+
+desc = RecordDescriptor("test/samplerecord", [
+    ("uint16", "value"),
+    ("string", "x"),
+])
+
+
+def main():
+    s_str = u"r.x == u'\\u2018Test\\u2019' or r.value == 17 or (r.value == 1337 and r.x == 'YOLO')"
+    print(u"Evaluating selector.... \n{}".format(s_str))
+    print("\n")
+    s = Selector(s_str)
+    obj = desc(0, "Test")
+    obj.x = u"\u2018Test\u2019"
+    obj.value = 16
+    val = s.explain_selector(obj)
+    print(val.backtrace())
+
+
+if __name__ == "__main__":
+    main()
+
+
+"""
+r.x == 'Test' or r.value == 17      -> True
+    r.x == 'Test'                   -> True
+        or
+    r.value == 17                   -> False
+
+"""
diff --git a/tests/standalone_test.py b/tests/standalone_test.py
new file mode 100644
index 0000000..3d8749d
--- /dev/null
+++ b/tests/standalone_test.py
@@ -0,0 +1,16 @@
+from __future__ import print_function
+
+
+def main(glob):
+    for var, val in sorted(glob.items()):
+        if not var.startswith("test_"):
+            continue
+
+        print("{:40s}".format(var), end="")
+        try:
+            val()
+            print("PASSED")
+        except Exception:  # noqa: B902
+            print("FAILED")
+            import traceback
+            traceback.print_exc()
diff --git a/tests/test_compiled_selector.py b/tests/test_compiled_selector.py
new file mode 100644
index 0000000..ff8995f
--- /dev/null
+++ b/tests/test_compiled_selector.py
@@ -0,0 +1,37 @@
+from flow.record import RecordDescriptor
+from flow.record.selector import CompiledSelector as Selector
+
+
+def test_selector_func_name():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+    assert TestRecord(None, None) not in Selector("name(r) == 'foo/bar'")
+    assert TestRecord(None, None) in Selector("name(r) == 'test/record'")
+
+
+def test_selector():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+
+    assert TestRecord("foo", "bar") in Selector("r.query == 'foo'")
+    assert TestRecord(None, None) not in Selector("r.query == 'foo'")
+    assert TestRecord(None, None) not in Selector("name(r.query) == 'XX'")
+
+
+def test_non_existing_field():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+
+    assert TestRecord("foo", "bar") not in Selector("r.query and r.non_existing_field")
+    assert TestRecord("foo", "bar") in Selector("not r.non_existing_field")
+    assert TestRecord("foo", "bar") in Selector("r.query and r.url and not r.non_existing_field")
+
+
+if __name__ == "__main__":
+    __import__("standalone_test").main(globals())
diff --git a/tests/test_fieldtype_ip.py b/tests/test_fieldtype_ip.py
new file mode 100644
index 0000000..94a683f
--- /dev/null
+++ b/tests/test_fieldtype_ip.py
@@ -0,0 +1,238 @@
+from __future__ import unicode_literals
+
+import pytest
+
+from flow.record import RecordDescriptor
+from flow.record import RecordPacker
+from flow.record.fieldtypes import net
+from flow.record.selector import Selector, CompiledSelector
+
+
+def test_field_ipaddress():
+    a = net.IPAddress("192.168.1.1")
+    assert a == "192.168.1.1"
+
+    with pytest.raises(ValueError) as excinfo:
+        net.IPAddress("a.a.a.a")
+    excinfo.match(".* does not appear to be an IPv4 or IPv6 address")
+
+
+def test_field_ipnetwork():
+    a = net.IPNetwork("192.168.1.0/24")
+    assert a == "192.168.1.0/24"
+
+    # Host bits set
+    with pytest.raises(ValueError) as excinfo:
+        net.IPNetwork("192.168.1.10/24")
+    excinfo.match(".* has host bits set")
+
+
+def test_record_ipaddress():
+    TestRecord = RecordDescriptor("test/ipaddress", [
+        ("net.ipaddress", "ip"),
+    ])
+
+    r = TestRecord("127.0.0.1")
+    assert r.ip == "127.0.0.1"
+    assert r.ip != "lala.1234.bad.ip"
+    assert isinstance(r.ip, net.ipaddress)
+    assert repr(r.ip) == "net.ipaddress('127.0.0.1')"
+
+    # ipv4
+    assert TestRecord("1.1.1.1").ip == "1.1.1.1"
+    assert TestRecord("0.0.0.0").ip == "0.0.0.0"
+    assert TestRecord("192.168.0.1").ip == "192.168.0.1"
+    assert TestRecord("255.255.255.255").ip == "255.255.255.255"
+
+    # ipv6
+    assert TestRecord("::1").ip == "::1"
+    assert TestRecord("2001:4860:4860::8888").ip == "2001:4860:4860::8888"
+    assert TestRecord("2001:4860:4860::4444").ip == "2001:4860:4860::4444"
+
+    # instantiate from different types
+    assert TestRecord(1).ip == "0.0.0.1"
+    assert TestRecord(0x7f0000ff).ip == "127.0.0.255"
+    assert TestRecord(b"\x7f\xff\xff\xff").ip == "127.255.255.255"
+    assert TestRecord("127.0.0.1").ip == "127.0.0.1"
+
+    # invalid ip addresses
+    for invalid in ["1.1.1.256", "192.168.0.1/24", "a.b.c.d", ":::::1"]:
+        with pytest.raises(Exception) as excinfo:
+            TestRecord(invalid)
+        excinfo.match(r'.*does not appear to be an IPv4 or IPv6 address*')
+
+    r = TestRecord()
+    assert r.ip is None
+
+
+def test_record_ipnetwork():
+    TestRecord = RecordDescriptor("test/ipnetwork", [
+        ("net.ipnetwork", "subnet"),
+    ])
+
+    # ipv4
+    r = TestRecord("192.168.0.0/24")
+    assert r.subnet == "192.168.0.0/24"
+    assert r.subnet != "bad.sub/net"
+    assert "bad.ip" not in r.subnet
+    assert "192.168.0.1" in r.subnet
+    assert "192.168.0.2/32" in r.subnet
+    assert "192.168.0.255" in r.subnet
+    assert "192.168.0.128/30" in r.subnet
+    assert "192.168.1.1" not in r.subnet
+    assert isinstance(r.subnet, net.ipnetwork)
+    assert repr(r.subnet) == "net.ipnetwork('192.168.0.0/24')"
+
+    r = TestRecord("192.168.1.1/32")
+    assert r.subnet == "192.168.1.1"
+    assert r.subnet == "192.168.1.1/32"
+    assert "192.168.1.1" in r.subnet
+    assert "192.168.1.1/32" in r.subnet
+
+    # ipv6 - https://en.wikipedia.org/wiki/IPv6_address
+    r = TestRecord("::1")
+    assert r.subnet == "::1"
+    assert r.subnet == "::1/128"
+
+    r = TestRecord("::/0")
+    assert "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" in r.subnet
+    assert "::" in r.subnet
+    assert "::1" in r.subnet
+
+    r = TestRecord("64:ff9b::/96")
+    assert "64:ff9b::0.0.0.0" in r.subnet
+    assert "64:ff9b::255.255.255.255" in r.subnet
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_ipaddress(PSelector):
+    TestRecord = RecordDescriptor("test/ipaddress", [
+        ("string", "description"),
+        ("net.ipaddress", "ip"),
+    ])
+
+    records = [
+        TestRecord("Google DNS IPv4", "8.8.8.8"),
+        TestRecord("Google DNS IPv4", "8.8.4.4"),
+        TestRecord("Google DNS IPv6", "2001:4860:4860::8888"),
+        TestRecord("Google DNS IPv6", "2001:4860:4860::4444"),
+    ]
+
+    recs = [r for r in records if r in PSelector("r.ip in net.ipnetwork('8.8.0.0/16')")]
+    assert len(recs) == 2
+
+    recs = [r for r in records if r in PSelector("r.ip == '8.8.8.8'")]
+    assert len(recs) == 1
+
+    recs = [r for r in records if r in PSelector("r.ip in net.ipnetwork('2001:4860:4860::/48')")]
+    assert len(recs) == 2
+
+    record = TestRecord("Optional", None)
+    assert record not in PSelector("r.ip == '1.1.1.1'")
+    assert record in PSelector("r.ip == None")
+    assert record in PSelector("not r.ip")
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_ipnetwork(PSelector):
+    TestRecord = RecordDescriptor("test/ipnetwork", [
+        ("string", "description"),
+        ("net.ipnetwork", "subnet"),
+    ])
+
+    records = [
+        # ipv4
+        TestRecord("RFC1918", "10.0.0.0/8"),
+        TestRecord("RFC1918", "172.16.0.0/12"),
+        TestRecord("RFC1918", "192.168.0.0/16"),
+        # ipv6
+        TestRecord("Private network", "fc00::/7"),
+        TestRecord("Link local", "fe80::/10"),
+        TestRecord("Facebook IPv6 range", "2a03:2880::/32"),
+    ]
+    recs = [r for r in records if r in PSelector("'fe80::1ff:fe23:4567:890a' in r.subnet")]
+    assert len(recs) == 1
+
+    recs = [r for r in records if r in PSelector("'2a03:2880:f003:c07:face:b00c::2' in r.subnet")]
+    assert len(recs) == 1
+
+    recs = [r for r in records if r in PSelector("'192.168.1.0/24' in r.subnet")]
+    assert len(recs) == 1
+    assert recs[0].subnet == "192.168.0.0/16"
+
+    recs = [r for r in records if r in PSelector("'192.168.1.141' in r.subnet")]
+    assert len(recs) == 1
+    assert recs[0].subnet == "192.168.0.0/16"
+
+    record = TestRecord("Google", "8.0.0.0/8")
+    assert record in PSelector("'8.8.4.4' in r.subnet")
+    assert record in PSelector("'8.8.8.8/32' in r.subnet")
+    assert record in PSelector("'8.8.0.0/16' in r.subnet")
+    assert record in PSelector("'8.8.4.0/24' in r.subnet")
+    assert record in PSelector("'8.8.8.0/24' in r.subnet")
+
+    record = TestRecord("Optional", None)
+    assert record not in PSelector("r.subnet and '1.1.1.1' in r.subnet")
+    assert record in PSelector("r.subnet == None")
+    assert record in PSelector("not r.subnet")
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_ipaddress_in_ipnetwork(PSelector):
+    TestRecord = RecordDescriptor("test/scandata", [
+        ("net.ipaddress", "ip"),
+        ("uint16", "port"),
+        ("string", "description"),
+    ])
+
+    records = [
+        TestRecord("8.8.8.8", 53, "google"),
+        TestRecord("1.1.1.1", 53, "cloudflare"),
+        TestRecord("2620:fe::9", 53, "quad9"),
+        TestRecord(None, None, "empty"),
+    ]
+
+    for record in records:
+        if record in PSelector('r.ip in net.ipnetwork("8.8.0.0/16")'):
+            assert record.ip == "8.8.8.8"
+
+    for record in records:
+        if record in PSelector('r.ip in net.ipnetwork("1.1.1.1/32")'):
+            assert record.ip == "1.1.1.1"
+
+    for record in records:
+        if record in PSelector('r.ip in net.ipnetwork("2620:FE::/48")'):
+            assert record.description == "quad9"
+            assert record.ip == "2620:00fe:0:0:0:0:0:0009"
+
+
+def test_pack_ipaddress():
+    packer = RecordPacker()
+
+    TestRecord = RecordDescriptor("test/ipaddress", [
+        ("net.ipaddress", "ip"),
+    ])
+
+    record_in = TestRecord("10.22.99.255")
+    data = packer.pack(record_in)
+    record_out = packer.unpack(data)
+    assert record_in == record_out
+
+    # ip should be encoded as dword/bytes
+    assert b"\x0a\x16\x63\xff" in data
+
+
+def test_pack_ipnetwork():
+    packer = RecordPacker()
+
+    TestRecord = RecordDescriptor("test/ipnetwork", [
+        ("net.ipnetwork", "subnet"),
+    ])
+
+    record_in = TestRecord("172.16.0.0/16")
+    data = packer.pack(record_in)
+    record_out = packer.unpack(data)
+    assert record_in == record_out
+
+    # subnet should be encoded as string
+    assert b"172.16.0.0/16" in data
diff --git a/tests/test_fieldtypes.py b/tests/test_fieldtypes.py
new file mode 100644
index 0000000..2854b45
--- /dev/null
+++ b/tests/test_fieldtypes.py
@@ -0,0 +1,458 @@
+# coding: utf-8
+
+import pytest
+import datetime
+import hashlib
+
+from flow.record import RecordDescriptor
+from flow.record.fieldtypes import net
+from flow.record.fieldtypes import uri
+from flow.record.fieldtypes import fieldtype_for_value
+import flow.record.fieldtypes
+
+INT64_MAX = (1 << 63) - 1
+INT32_MAX = (1 << 31) - 1
+INT16_MAX = (1 << 15) - 1
+
+UINT128_MAX = (1 << 128) - 1
+UINT64_MAX = (1 << 64) - 1
+UINT32_MAX = (1 << 32) - 1
+UINT16_MAX = (1 << 16) - 1
+
+
+def test_uint16():
+    desc = RecordDescriptor("test/uint16", [
+        ("uint16", "value"),
+    ])
+
+    # valid
+    desc.recordType(0x0)
+    desc.recordType(0x1)
+    desc.recordType(UINT16_MAX)
+
+    # invalid
+    with pytest.raises(ValueError):
+        desc.recordType(-1)
+
+    with pytest.raises(ValueError):
+        desc.recordType(UINT16_MAX + 1)
+
+    with pytest.raises((ValueError, OverflowError)):
+        desc.recordType(UINT128_MAX)
+
+
+def test_uint32():
+    TestRecord = RecordDescriptor("test/uint32", [
+        ("uint32", "value"),
+    ])
+
+    # valid
+    TestRecord(0x0)
+    TestRecord(0x1)
+    TestRecord(UINT16_MAX)
+    TestRecord(UINT32_MAX)
+
+    # invalid
+    with pytest.raises(ValueError):
+        TestRecord(-1)
+
+    with pytest.raises(ValueError):
+        TestRecord(UINT32_MAX + 1)
+
+    with pytest.raises((ValueError, OverflowError)):
+        TestRecord(UINT128_MAX)
+
+
+def test_net_ipv4_address():
+    TestRecord = RecordDescriptor("test/net/ipv4/address", [
+        ("net.ipv4.Address", "ip"),
+    ])
+
+    TestRecord("1.1.1.1")
+    TestRecord("0.0.0.0")
+    TestRecord("192.168.0.1")
+    TestRecord("255.255.255.255")
+
+    r = TestRecord(u"127.0.0.1")
+
+    assert isinstance(r.ip, net.ipv4.Address)
+
+    for invalid in ["1.1.1.256", "192.168.0.1/24", "a.b.c.d"]:
+        with pytest.raises(Exception) as excinfo:
+            TestRecord(invalid)
+        excinfo.match(r'.*illegal IP address string.*')
+
+    r = TestRecord()
+    assert r.ip is None
+
+
+def test_net_ipv4_subnet():
+    TestRecord = RecordDescriptor("test/net/ipv4/subnet", [
+        ("net.ipv4.Subnet", "subnet"),
+    ])
+
+    r = TestRecord("1.1.1.0/24")
+    assert str(r.subnet) == "1.1.1.0/24"
+
+    assert "1.1.1.1" in r.subnet
+    assert "1.1.1.2" in r.subnet
+
+    assert "1.1.2.1" not in r.subnet
+    # assert "1.1.1.1/32" not in r.subnet
+
+    r = TestRecord("0.0.0.0")
+    r = TestRecord("192.168.0.1")
+    r = TestRecord("255.255.255.255")
+
+    r = TestRecord(u"127.0.0.1")
+
+    for invalid in ["a.b.c.d", "foo", "bar", ""]:
+        with pytest.raises(Exception) as excinfo:
+            TestRecord(invalid)
+        excinfo.match(r'.*illegal IP address string.*')
+
+    for invalid in [1, 1.0, sum, dict(), list(), True]:
+        with pytest.raises(TypeError) as excinfo:
+            TestRecord(invalid)
+        excinfo.match(r'Subnet\(\) argument 1 must be string, not .*')
+
+    with pytest.raises(ValueError) as excinfo:
+        TestRecord("192.168.0.106/28")
+    excinfo.match(r"Not a valid subnet '192\.168\.0\.106/28', did you mean '192\.168\.0\.96/28' ?")
+
+
+def test_bytes():
+    TestRecord = RecordDescriptor("test/string", [
+        ("string", "url"),
+        ("bytes", "body"),
+    ])
+
+    r = TestRecord("url", b"some bytes")
+    assert r.body == b"some bytes"
+
+    with pytest.raises(TypeError) as excinfo:
+        r = TestRecord("url", 1234)
+        excinfo.match(r"Value not of bytes type")
+
+    with pytest.raises(TypeError) as excinfo:
+        r = TestRecord("url", u"a string")
+        excinfo.match(r"Value not of bytes type")
+
+    b_array = bytes(bytearray(range(256)))
+    body = b"HTTP/1.1 200 OK\r\n\r\n" + b_array
+    r = TestRecord("http://www.fox-it.com", body)
+    assert r
+    assert r.url == u"http://www.fox-it.com"
+    assert r.body == b"HTTP/1.1 200 OK\r\n\r\n" + b_array
+
+    # testcase when input are bytes
+    r = TestRecord("http://www.fox-it.com", b'HTTP/1.1 500 Error\r\n\r\nError')
+    assert r.body == b"HTTP/1.1 500 Error\r\n\r\nError"
+
+
+def test_string():
+    TestRecord = RecordDescriptor("test/string", [
+        ("string", "name"),
+    ])
+
+    r = TestRecord("Fox-IT")
+    assert r.name == u"Fox-IT"
+
+    r = TestRecord(u"Rémy")
+    assert r.name == u"Rémy"
+
+    # construct from 'bytes'
+    r = TestRecord(b'R\xc3\xa9my')
+    assert r.name == u"Rémy"
+
+    # construct from 'bytes' but with invalid unicode bytes
+    if isinstance(u'', str):
+        # Python 3
+        with pytest.raises(UnicodeDecodeError):
+            TestRecord(b'R\xc3\xa9\xeamy')
+    else:
+        # Python 2
+        with pytest.warns(RuntimeWarning):
+            r = TestRecord(b'R\xc3\xa9\xeamy')
+            assert r.name
+
+
+def test_wstring():
+    # Behaves the same as test/string, only available for backwards compatibility purposes
+    TestRecord = RecordDescriptor("test/wstring", [
+        ("wstring", "name"),
+    ])
+
+    r = TestRecord("Fox-IT")
+    assert r.name == u"Fox-IT"
+
+
+def test_typedlist():
+    TestRecord = RecordDescriptor("test/typedlist", [
+        ("string[]", "string_value"),
+        ("uint32[]", "uint32_value"),
+        ("uri[]", "uri_value"),
+    ])
+
+    r = TestRecord(['a', 'b', 'c'], [1, 2, 3], ["/etc/passwd", "/etc/shadow"])
+    assert len(r.string_value) == 3
+    assert len(r.uint32_value) == 3
+    assert len(r.uri_value) == 2
+    assert r.string_value[2] == 'c'
+    assert r.uint32_value[1] == 2
+    assert all([isinstance(v, uri) for v in r.uri_value])
+    assert r.uri_value[1].filename == 'shadow'
+
+    r = TestRecord()
+    assert r.string_value == []
+    assert r.uint32_value == []
+    assert r.uri_value == []
+
+    with pytest.raises(ValueError):
+        r = TestRecord(uint32_value=['a', 'b', 'c'])
+
+
+def test_stringlist():
+    TestRecord = RecordDescriptor("test/string", [
+        ("stringlist", "value"),
+    ])
+
+    r = TestRecord(['a', 'b', 'c'])
+    assert len(r.value) == 3
+    assert r.value[2] == 'c'
+
+    r = TestRecord([u"Rémy"])
+    assert r.value[0]
+
+
+def test_dictlist():
+    TestRecord = RecordDescriptor("test/dictlist", [
+        ("dictlist", "hits"),
+    ])
+
+    r = TestRecord([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
+    assert len(r.hits) == 2
+    assert r.hits == [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
+    assert r.hits[0]["a"] == 1
+    assert r.hits[0]["b"] == 2
+    assert r.hits[1]["a"] == 3
+    assert r.hits[1]["b"] == 4
+
+
+def test_boolean():
+    TestRecord = RecordDescriptor("test/boolean", [
+        ("boolean", "booltrue"),
+        ("boolean", "boolfalse"),
+    ])
+
+    r = TestRecord(True, False)
+    assert bool(r.booltrue) is True
+    assert bool(r.boolfalse) is False
+
+    r = TestRecord(1, 0)
+    assert bool(r.booltrue) is True
+    assert bool(r.boolfalse) is False
+
+    assert str(r.booltrue) == "True"
+    assert str(r.boolfalse) == "False"
+
+    assert repr(r.booltrue) == "True"
+    assert repr(r.boolfalse) == "False"
+
+    with pytest.raises(ValueError):
+        r = TestRecord(2, -1)
+
+    with pytest.raises(ValueError):
+        r = TestRecord('True', 'False')
+
+
+def test_float():
+    TestRecord = RecordDescriptor("test/float", [
+        ("float", "value"),
+    ])
+
+    # initialize via float
+    r = TestRecord(1.3337)
+    assert r.value == 1.3337
+
+    # initialize via string
+    r = TestRecord("1.3337")
+    assert r.value == 1.3337
+
+    # initialize via int
+    r = TestRecord("1337")
+    assert r.value == 1337.0
+
+    # negative float
+    r = TestRecord(-12345)
+    assert r.value == -12345
+
+    # invalid float
+    with pytest.raises(ValueError):
+        r = TestRecord("abc")
+
+
+def test_uri_type():
+    TestRecord = RecordDescriptor("test/uri", [
+        ("uri", "path"),
+    ])
+
+    r = TestRecord("http://www.google.com/a.bin")
+    assert r.path.filename == "a.bin"
+    assert r.path.dirname == "/"
+    assert r.path.hostname == "www.google.com"
+    assert r.path.protocol == "http"
+    assert r.path.protocol == r.path.scheme
+    assert r.path.path == "/a.bin"
+
+    r = TestRecord("http://username:password@example.com/path/file.txt?query=1")
+    assert r.path.filename == "file.txt"
+    assert r.path.dirname == "/path"
+    assert r.path.args == "query=1"
+    assert r.path.username == "username"
+    assert r.path.password == "password"
+    assert r.path.protocol == "http"
+    assert r.path.hostname == "example.com"
+
+    r = TestRecord(uri.from_windows(r"c:\windows\program files\Fox-IT B.V\flow.exe"))
+    assert r.path.filename == "flow.exe"
+
+    r = TestRecord()
+    r.path = uri.normalize(r"c:\Users\Fox-IT\Downloads\autoruns.exe")
+    assert r.path.filename == "autoruns.exe"
+    assert r.path.dirname == uri.normalize(r"\Users\Fox-IT\Downloads")
+    assert r.path.dirname == "/Users/Fox-IT/Downloads"
+
+    r = TestRecord()
+    r.path = "/usr/local/bin/sshd"
+    assert r.path.filename == "sshd"
+    assert r.path.dirname == "/usr/local/bin"
+
+
+def test_datetime():
+    TestRecord = RecordDescriptor("test/datetime", [
+        ("datetime", "ts"),
+    ])
+
+    now = datetime.datetime.utcnow()
+    r = TestRecord(now)
+    assert r.ts == now
+
+    r = TestRecord(u"2018-03-22T15:15:23")
+    assert r.ts == datetime.datetime(2018, 3, 22, 15, 15, 23)
+
+    r = TestRecord(u"2018-03-22T15:15:23.000000")
+    assert r.ts == datetime.datetime(2018, 3, 22, 15, 15, 23)
+
+    r = TestRecord(u"2018-03-22T15:15:23.123456")
+    assert r.ts == datetime.datetime(2018, 3, 22, 15, 15, 23, 123456)
+
+    dt = datetime.datetime(2018, 3, 22, 15, 15, 23, 123456)
+    dt_str = dt.isoformat()
+    r = TestRecord(dt_str)
+    assert r.ts == dt
+
+    r = TestRecord(1521731723)
+    assert r.ts == datetime.datetime(2018, 3, 22, 15, 15, 23)
+
+
+def test_digest():
+    TestRecord = RecordDescriptor("test/digest", [
+        ("digest", "digest"),
+    ])
+
+    md5 = hashlib.md5(b"hello").hexdigest()
+    sha1 = hashlib.sha1(b"hello").hexdigest()
+    sha256 = hashlib.sha256(b"hello").hexdigest()
+
+    record = TestRecord()
+    assert isinstance(record.digest, flow.record.fieldtypes.digest)
+
+    record = TestRecord((md5, sha1, sha256))
+    assert record.digest.md5 == "5d41402abc4b2a76b9719d911017c592"
+    assert record.digest.sha1 == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
+    assert record.digest.sha256 == "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
+
+    record = TestRecord(("5d41402abc4b2a76b9719d911017c592", None, None))
+    assert record.digest.md5 == "5d41402abc4b2a76b9719d911017c592"
+    assert record.digest.sha1 is None
+    assert record.digest.sha256 is None
+
+    record = TestRecord()
+    record.digest = (md5, sha1, sha256)
+    assert record.digest.md5 == md5
+    assert record.digest.sha1 == sha1
+    assert record.digest.sha256 == sha256
+
+    with pytest.raises(TypeError) as excinfo:
+        record = TestRecord(("a", sha1, sha256))
+        excinfo.match(r'.*Invalid MD5.*Odd-length string')
+
+    with pytest.raises(TypeError) as excinfo:
+        record = TestRecord(("aa", sha1, sha256))
+        excinfo.match(r'.*Invalid MD5.*Incorrect hash length')
+
+    with pytest.raises(TypeError) as excinfo:
+        record = TestRecord((md5, "aa", sha256))
+        excinfo.match(r'.*Invalid SHA1.*')
+
+    with pytest.raises(TypeError) as excinfo:
+        record = TestRecord((md5, sha1, "aa"))
+        excinfo.match(r'.*Invalid SHA256.*')
+
+    record = TestRecord()
+    assert record.digest is not None
+    assert record.digest.md5 is None
+    assert record.digest.sha1 is None
+    assert record.digest.sha256 is None
+    with pytest.raises(TypeError) as excinfo:
+        record.digest.md5 = "INVALID MD5"
+        excinfo.match(r'.*Invalid MD5.*')
+
+
+def test_dynamic():
+    TestRecord = RecordDescriptor("test/dynamic", [
+        ("dynamic", "value"),
+    ])
+
+    r = TestRecord(b"bytes")
+    assert r.value == b"bytes"
+    assert isinstance(r.value, flow.record.fieldtypes.bytes)
+
+    r = TestRecord(u"string")
+    assert r.value == u"string"
+    assert isinstance(r.value, flow.record.fieldtypes.string)
+
+    r = TestRecord(123)
+    assert r.value == 123
+    assert isinstance(r.value, flow.record.fieldtypes.varint)
+
+    r = TestRecord(True)
+    assert r.value
+    assert isinstance(r.value, flow.record.fieldtypes.boolean)
+
+    r = TestRecord([1, 2, 3])
+    assert r.value == [1, 2, 3]
+    assert isinstance(r.value, flow.record.fieldtypes.stringlist)
+
+    now = datetime.datetime.utcnow()
+    r = TestRecord(now)
+    assert r.value == now
+    assert isinstance(r.value, flow.record.fieldtypes.datetime)
+
+
+def test_fieldtype_for_value():
+    assert fieldtype_for_value(True) == "boolean"
+    assert fieldtype_for_value(False) == "boolean"
+    assert fieldtype_for_value(1337) == "varint"
+    assert fieldtype_for_value(1.337) == "float"
+    assert fieldtype_for_value(b"\r\n") == "bytes"
+    assert fieldtype_for_value("hello world") == "string"
+    assert fieldtype_for_value(datetime.datetime.now()) == "datetime"
+    assert fieldtype_for_value([1, 2, 3, 4, 5]) == "string"
+    assert fieldtype_for_value([1, 2, 3, 4, 5], None) is None
+    assert fieldtype_for_value(object(), None) is None
+
+
+if __name__ == "__main__":
+    __import__("standalone_test").main(globals())
diff --git a/tests/test_json_packer.py b/tests/test_json_packer.py
new file mode 100644
index 0000000..cfce228
--- /dev/null
+++ b/tests/test_json_packer.py
@@ -0,0 +1,25 @@
+from __future__ import print_function
+from datetime import datetime
+from flow.record import JsonRecordPacker, RecordDescriptor
+
+
+def test_record_in_record():
+    packer = JsonRecordPacker()
+    dt = datetime.utcnow()
+
+    RecordA = RecordDescriptor("test/record_a", [
+        ("datetime", "some_dt"),
+    ])
+    RecordB = RecordDescriptor("test/record_b", [
+        ("record", "record"),
+        ("datetime", "some_dt"),
+    ])
+
+    record_a = RecordA(dt)
+    record_b = RecordB(record_a, dt)
+
+    data_record_b = packer.pack(record_b)
+    record_b_unpacked = packer.unpack(data_record_b)
+
+    assert record_b == record_b_unpacked
+    assert record_a == record_b_unpacked.record
diff --git a/tests/test_json_record_adapter.py b/tests/test_json_record_adapter.py
new file mode 100644
index 0000000..2b6b11a
--- /dev/null
+++ b/tests/test_json_record_adapter.py
@@ -0,0 +1,71 @@
+import json
+import datetime
+from flow.record import RecordDescriptor, RecordWriter, RecordReader
+
+
+def generate_records(count=100):
+    TestRecordEmbedded = RecordDescriptor("test/embedded_record", [
+        ("datetime", "dt"),
+    ])
+    TestRecord = RecordDescriptor("test/adapter", [
+        ("uint32", "number"),
+        ("record", "record"),
+    ])
+
+    for i in range(count):
+        embedded = TestRecordEmbedded(datetime.datetime.utcnow())
+        yield TestRecord(number=i, record=embedded)
+
+
+def test_json_adapter(tmpdir):
+    json_file = tmpdir.join("records.json")
+    record_adapter_path = "jsonfile://{}".format(json_file)
+    writer = RecordWriter(record_adapter_path)
+    nr_records = 1337
+
+    for record in generate_records(nr_records):
+        writer.write(record)
+    writer.flush()
+
+    nr_received_records = 0
+    reader = RecordReader(record_adapter_path)
+    for record in reader:
+        nr_received_records += 1
+
+    assert nr_records == nr_received_records
+
+
+def test_json_adapter_contextmanager(tmpdir):
+    json_file = tmpdir.join("records.json")
+    record_adapter_path = "jsonfile://{}".format(json_file)
+    with RecordWriter(record_adapter_path) as writer:
+        nr_records = 1337
+        for record in generate_records(nr_records):
+            writer.write(record)
+
+    nr_received_records = 0
+    with RecordReader(record_adapter_path) as reader:
+        for record in reader:
+            nr_received_records += 1
+
+        assert nr_records == nr_received_records
+
+
+def test_json_adapter_jsonlines(tmpdir):
+    json_file = tmpdir.join("data.jsonl")
+
+    items = [
+        {'some_float': 1.5, 'some_string': 'hello world', 'some_int': 1337, 'some_bool': True},
+        {'some_float': 2.7, 'some_string': 'goodbye world', 'some_int': 12345, 'some_bool': False},
+    ]
+    with open(json_file, "w") as fout:
+        for row in items:
+            fout.write(json.dumps(row) + "\n")
+
+    record_adapter_path = "jsonfile://{}".format(json_file)
+    reader = RecordReader(record_adapter_path)
+    for index, record in enumerate(reader):
+        assert record.some_float == items[index]["some_float"]
+        assert record.some_string == items[index]["some_string"]
+        assert record.some_int == items[index]["some_int"]
+        assert record.some_bool == items[index]["some_bool"]
diff --git a/tests/test_packer.py b/tests/test_packer.py
new file mode 100644
index 0000000..4c5ffb2
--- /dev/null
+++ b/tests/test_packer.py
@@ -0,0 +1,216 @@
+import datetime
+
+from flow.record import fieldtypes
+from flow.record import RecordDescriptor
+from flow.record import RecordPacker
+from flow.record.packer import RECORD_PACK_EXT_TYPE
+from flow.record.fieldtypes import uri
+
+
+def test_uri_packing():
+    packer = RecordPacker()
+
+    TestRecord = RecordDescriptor("test/uri", [
+        ("uri", "path"),
+    ])
+
+    # construct with an url
+    record = TestRecord("http://www.google.com/evil.bin")
+    data = packer.pack(record)
+    record = packer.unpack(data)
+    assert record.path == "http://www.google.com/evil.bin"
+    assert record.path.filename == "evil.bin"
+    assert record.path.dirname == "/"
+
+    # construct from uri() -> for windows=True
+    path = uri.from_windows(r"c:\Program Files\Fox-IT\flow is awesome.exe")
+    record = TestRecord(path)
+    data = packer.pack(record)
+    record = packer.unpack(data)
+    assert record.path == "c:/Program Files/Fox-IT/flow is awesome.exe"
+    assert record.path.filename == "flow is awesome.exe"
+    assert record.path.dirname == "/Program Files/Fox-IT"
+
+    # construct using uri.from_windows()
+    path = uri.from_windows(r"c:\Users\Hello World\foo.bar.exe")
+    record = TestRecord(path)
+    data = packer.pack(record)
+    record = packer.unpack(data)
+    assert record.path == "c:/Users/Hello World/foo.bar.exe"
+    assert record.path.filename == "foo.bar.exe"
+    assert record.path.dirname == "/Users/Hello World"
+
+
+def test_typedlist_packer():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/typedlist", [
+        ("string[]", "string_value"),
+        ("uint32[]", "uint32_value"),
+        ("uri[]", "uri_value"),
+    ])
+
+    r1 = TestRecord(['a', 'b', 'c'], [1, 2, 3], ["/etc/passwd", "/etc/shadow"])
+    data = packer.pack(r1)
+    r2 = packer.unpack(data)
+
+    assert len(r1.string_value) == 3
+    assert len(r1.uint32_value) == 3
+    assert len(r1.uri_value) == 2
+    assert r1.string_value[2] == 'c'
+    assert r1.uint32_value[1] == 2
+    assert all([isinstance(v, uri) for v in r1.uri_value])
+    assert r1.uri_value[1].filename == 'shadow'
+
+    assert len(r2.string_value) == 3
+    assert len(r2.uint32_value) == 3
+    assert len(r2.uri_value) == 2
+    assert r2.string_value[2] == 'c'
+    assert r2.uint32_value[1] == 2
+    assert all([isinstance(v, uri) for v in r2.uri_value])
+    assert r2.uri_value[1].filename == 'shadow'
+
+
+def test_dictlist_packer():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/dictlist", [
+        ("dictlist", "hits"),
+    ])
+
+    r1 = TestRecord([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
+    data = packer.pack(r1)
+    r2 = packer.unpack(data)
+
+    assert len(r1.hits) == 2
+    assert r1.hits == [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
+    assert r1.hits[0]["a"] == 1
+    assert r1.hits[0]["b"] == 2
+    assert r1.hits[1]["a"] == 3
+    assert r1.hits[1]["b"] == 4
+
+    assert len(r2.hits) == 2
+    assert r2.hits == [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
+    assert r2.hits[0]["a"] == 1
+    assert r2.hits[0]["b"] == 2
+    assert r2.hits[1]["a"] == 3
+    assert r2.hits[1]["b"] == 4
+
+
+def test_dynamic_packer():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/dynamic", [
+        ("dynamic", "value"),
+    ])
+
+    t = TestRecord(123)
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value == 123
+    assert isinstance(r.value, fieldtypes.varint)
+
+    t = TestRecord(b"bytes")
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value == b"bytes"
+    assert isinstance(r.value, fieldtypes.bytes)
+
+    t = TestRecord(u"string")
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value == u"string"
+    assert isinstance(r.value, fieldtypes.string)
+
+    t = TestRecord(True)
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value
+    assert isinstance(r.value, fieldtypes.boolean)
+
+    t = TestRecord([1, True, b"b", u"u"])
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value == [1, True, b"b", u"u"]
+    assert isinstance(r.value, fieldtypes.stringlist)
+
+    now = datetime.datetime.utcnow()
+    t = TestRecord(now)
+    data = packer.pack(t)
+    r = packer.unpack(data)
+
+    assert r.value == now
+    assert isinstance(r.value, fieldtypes.datetime)
+
+
+def test_pack_record_desc():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/pack", [
+        ("string", "a"),
+    ])
+    ext_type = packer.pack_obj(TestRecord)
+    assert ext_type.code == RECORD_PACK_EXT_TYPE
+    assert ext_type.data == b"\x92\x02\x92\xa9test/pack\x91\x92\xa6string\xa1a"
+    desc = packer.unpack_obj(ext_type.code, ext_type.data)
+    assert desc.name == TestRecord.name
+    assert desc.fields.keys() == TestRecord.fields.keys()
+    assert desc._pack() == TestRecord._pack()
+
+
+def test_pack_digest():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/digest", [
+        ("digest", "digest"),
+    ])
+    record = TestRecord(("d41d8cd98f00b204e9800998ecf8427e", None, None))
+    data = packer.pack(record)
+    record = packer.unpack(data)
+    assert record.digest.md5 == "d41d8cd98f00b204e9800998ecf8427e"
+    assert record.digest.sha1 is None
+    assert record.digest.sha256 is None
+
+
+def test_record_in_record():
+    packer = RecordPacker()
+    dt = datetime.datetime.utcnow()
+
+    RecordA = RecordDescriptor("test/record_a", [
+        ("datetime", "some_dt"),
+    ])
+    RecordB = RecordDescriptor("test/record_b", [
+        ("record", "record"),
+        ("datetime", "some_dt"),
+    ])
+
+    record_a = RecordA(dt)
+    record_b = RecordB(record_a, dt)
+
+    data_record_b = packer.pack(record_b)
+    record_b_unpacked = packer.unpack(data_record_b)
+
+    assert record_b == record_b_unpacked
+    assert record_a == record_b_unpacked.record
+
+
+def test_record_array():
+    packer = RecordPacker()
+
+    EmbeddedRecord = RecordDescriptor("test/record_a", [
+        ("string", "some_field"),
+    ])
+    ParentRecord = RecordDescriptor("test/record_b", [
+        ("record[]", "subrecords"),
+    ])
+
+    parent = ParentRecord()
+    for i in range(3):
+        emb_record = EmbeddedRecord(
+            some_field="embedded record {}".format(i))
+        parent.subrecords.append(emb_record)
+
+    data_record_parent = packer.pack(parent)
+    parent_unpacked = packer.unpack(data_record_parent)
+
+    assert parent == parent_unpacked
diff --git a/tests/test_rdump.py b/tests/test_rdump.py
new file mode 100644
index 0000000..b941b18
--- /dev/null
+++ b/tests/test_rdump.py
@@ -0,0 +1,178 @@
+import json
+import base64
+import hashlib
+import subprocess
+
+from flow.record import RecordDescriptor
+from flow.record import RecordWriter, RecordReader
+
+
+def test_rdump_pipe(tmp_path):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "count"),
+        ("string", "foo"),
+    ])
+
+    path = tmp_path / "test.records"
+    writer = RecordWriter(path)
+
+    for i in range(10):
+        writer.write(TestRecord(count=i, foo="bar"))
+    writer.close()
+
+    # validate input
+    args = ["rdump", str(path)]
+    res = subprocess.Popen(args, stdout=subprocess.PIPE)
+    stdout, stderr = res.communicate()
+    assert len(stdout.splitlines()) == 10
+
+    # rdump test.records | wc -l
+    p1 = subprocess.Popen(["rdump", str(path)], stdout=subprocess.PIPE)
+    p2 = subprocess.Popen(["wc", "-l"], stdin=p1.stdout, stdout=subprocess.PIPE)
+    stdout, stderr = p2.communicate()
+    assert stdout.strip() == b"10"
+
+    # (binary) rdump test.records -w - | rdump -s 'r.count == 5'
+    p1 = subprocess.Popen(["rdump", str(path), "-w", "-"], stdout=subprocess.PIPE)
+    p2 = subprocess.Popen(
+        ["rdump", "-s", "r.count == 5"], stdin=p1.stdout, stdout=subprocess.PIPE,
+    )
+    stdout, stderr = p2.communicate()
+    assert stdout.strip() in (b"<test/record count=5 foo='bar'>", b"<test/record count=5L foo=u'bar'>")
+
+    # (printer) rdump test.records | rdump -s 'r.count == 5'
+    p1 = subprocess.Popen(["rdump", str(path)], stdout=subprocess.PIPE)
+    p2 = subprocess.Popen(
+        ["rdump", "-s", "r.count == 5"],
+        stdin=p1.stdout,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    stdout, stderr = p2.communicate()
+    assert stdout.strip() == b""
+    assert b"Unknown file format, not a RecordStream" in stderr.strip()
+
+    # rdump test.records -w - | rdump -s 'r.count in (1, 3, 9)' -w filtered.records
+    path2 = tmp_path / "filtered.records"
+    p1 = subprocess.Popen(["rdump", str(path), "-w", "-"], stdout=subprocess.PIPE)
+    p2 = subprocess.Popen(
+        ["rdump", "-s", "r.count in (1, 3, 9)", "-w", str(path2)], stdin=p1.stdout,
+    )
+    stdout, stderr = p2.communicate()
+
+    reader = RecordReader(path2)
+    records = list(reader)
+    assert len(records) == 3
+    assert {r.count for r in records} == {1, 3, 9}
+
+
+def test_rdump_format_template(tmp_path):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "count"),
+        ("string", "foo"),
+    ])
+
+    path = tmp_path / "test.records"
+    writer = RecordWriter(path)
+
+    # generate some test records
+    for i in range(10):
+        writer.write(TestRecord(count=i, foo="bar"))
+    writer.close()
+
+    # validate output with -f
+    args = ["rdump", str(path), "-f", "TEST: {count},{foo}"]
+    print(args)
+    res = subprocess.Popen(args, stdout=subprocess.PIPE)
+    stdout, stderr = res.communicate()
+    for i, line in enumerate(stdout.decode().splitlines()):
+        assert line == "TEST: {i},bar".format(i=i)
+
+
+def test_rdump_json(tmp_path):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "count"),
+        ("string", "foo"),
+        ("bytes", "data"),
+        ("net.ipaddress", "ip"),
+        ("net.ipnetwork", "subnet"),
+        ("digest", "digest"),
+    ])
+
+    record_path = tmp_path / "test.records"
+    writer = RecordWriter(record_path)
+
+    # generate some test records
+    for i in range(10):
+        data = str(i).encode()
+        md5 = hashlib.md5(data).hexdigest()
+        sha1 = hashlib.sha1(data).hexdigest()
+        sha256 = hashlib.sha256(data).hexdigest()
+        writer.write(
+            TestRecord(
+                count=i,
+                foo="bar" * i,
+                data=b"\x00\x01\x02\x03--" + data,
+                ip=u"172.16.0.{}".format(i),
+                subnet=u"192.168.{}.0/24".format(i),
+                digest=(md5, sha1, sha256),
+            ))
+    writer.close()
+
+    # dump records as JSON lines
+    args = ["rdump", str(record_path), "--jsonlines"]
+    process = subprocess.Popen(args, stdout=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+
+    assert process.returncode == 0
+
+    # Basic validations in stdout
+    for i in range(10):
+        assert base64.b64encode("\x00\x01\x02\x03--{}".format(i).encode()) in stdout
+        assert u"192.168.{}.0/24".format(i).encode() in stdout
+        assert u"172.16.0.{}".format(i).encode() in stdout
+        assert ("bar" * i).encode() in stdout
+
+    # Load json using json.loads() and validate key values
+    for i, line in enumerate(stdout.splitlines()):
+        json_dict = json.loads(line)
+        assert json_dict
+        if i == 0:
+            assert "_type" in json_dict
+            assert json_dict["_type"] == "recorddescriptor"
+        else:
+            count = i - 1  # fix offset as first line is the recorddescriptor information
+            data = str(count).encode()
+            md5 = hashlib.md5(data).hexdigest()
+            sha1 = hashlib.sha1(data).hexdigest()
+            sha256 = hashlib.sha256(data).hexdigest()
+            assert json_dict["count"] == count
+            assert json_dict["foo"] == "bar" * count
+            assert json_dict["data"] == base64.b64encode("\x00\x01\x02\x03--{}".format(count).encode()).decode()
+            assert json_dict["ip"] == u"172.16.0.{}".format(count)
+            assert json_dict["subnet"] == u"192.168.{}.0/24".format(count)
+            assert json_dict["digest"]["md5"] == md5
+            assert json_dict["digest"]["sha1"] == sha1
+            assert json_dict["digest"]["sha256"] == sha256
+
+    # Write jsonlines to file
+    path = tmp_path / "records.jsonl"
+    path.write_bytes(stdout)
+    json_path = "jsonfile://{}".format(path)
+
+    # Read records from json and original records file and validate
+    for path in (json_path, record_path):
+        with RecordReader(path) as reader:
+            for i, record in enumerate(reader):
+                data = str(i).encode()
+                md5 = hashlib.md5(data).hexdigest()
+                sha1 = hashlib.sha1(data).hexdigest()
+                sha256 = hashlib.sha256(data).hexdigest()
+                assert record.count == i
+                assert record.ip == u"172.16.0.{}".format(i)
+                assert record.subnet == u"192.168.{}.0/24".format(i)
+                assert record.data == b"\x00\x01\x02\x03--" + data
+                assert record.digest.md5 == md5
+                assert record.digest.sha1 == sha1
+                assert record.digest.sha256 == sha256
+                assert record.foo == "bar" * i
diff --git a/tests/test_record.py b/tests/test_record.py
new file mode 100644
index 0000000..d22a100
--- /dev/null
+++ b/tests/test_record.py
@@ -0,0 +1,613 @@
+import sys
+import pytest
+from flow.record import RECORD_VERSION
+from flow.record import RecordDescriptor, RecordDescriptorError
+from flow.record import RecordPacker
+from flow.record import RecordWriter, RecordReader, RecordPrinter
+from flow.record import Record, GroupedRecord
+from flow.record import record_stream, extend_record
+from flow.record import fieldtypes
+from flow.record.stream import RecordFieldRewriter
+
+from . import utils_inspect as inspect
+
+
+def test_record_creation():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+    ])
+
+    # No arguments defaults to None
+    r = TestRecord()
+    assert r.url is None
+    assert r.query is None
+
+    # Keyword arguments
+    r = TestRecord(url="foo", query="bar")
+    assert r.url == "foo"
+    assert r.query == "bar"
+
+    # Positional arguments
+    r = TestRecord("foo", "bar")
+    assert r.url == "foo"
+    assert r.query == "bar"
+
+    # Single keyword argument
+    r = TestRecord(query="foo")
+    assert r.query == "foo"
+    assert r.url is None
+
+
+def test_record_version(tmpdir):
+    path = "jsonfile://{}".format(tmpdir.join("test.jsonl").strpath)
+    writer = RecordWriter(path)
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "hello"),
+        ("string", "world"),
+    ])
+
+    r1 = TestRecord(hello="hello", world="world")
+    writer.write(r1)
+    data = packer.pack(r1)
+    u1 = packer.unpack(data)
+    print(repr(u1._desc))
+
+    assert u1.hello == r1.hello
+    assert u1.world == r1.world
+
+    # change the order
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "world"),
+        ("string", "hello"),
+    ])
+    r2 = TestRecord(hello="hello", world="world")
+    writer.write(r2)
+    data = packer.pack(r2)
+    u2 = packer.unpack(data)
+
+    assert u2.hello == r2.hello
+    assert u2.world == r2.world
+    print(repr(u2._desc))
+
+    # change fieldtypes
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "world"),
+        ("string", "hello"),
+    ])
+    r3 = TestRecord(hello="hello", world=42)
+    writer.write(r3)
+    data = packer.pack(r3)
+    u3 = packer.unpack(data)
+
+    writer.flush()
+
+    assert u3._desc.identifier == r3._desc.identifier
+    assert u1._desc.identifier != u3._desc.identifier
+    assert u2._desc.identifier != u3._desc.identifier
+    assert u3.hello == r3.hello
+    assert u3.world == r3.world
+
+    reader = RecordReader(path)
+    rec = [r for r in reader]
+    assert len(rec) == 3
+    assert u3._desc.identifier == rec[2]._desc.identifier
+    assert u1._desc.identifier != rec[2]._desc.identifier
+    assert u2._desc.identifier != rec[2]._desc.identifier
+    assert u3.hello == rec[2].hello
+    assert u3.world == rec[2].world
+
+
+def test_grouped_record():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "hello"),
+        ("string", "world"),
+        ("uint32", "count"),
+    ])
+    WQMetaRecord = RecordDescriptor("wq/meta", [
+        ("string", "assignee"),
+        ("string", "profile"),
+        ("string", "hello"),
+    ])
+
+    test_record = TestRecord("a", "b", 12345)
+    meta_record = WQMetaRecord("me", "this is a test", "other hello")
+
+    grouped = GroupedRecord("grouped/wq", [test_record, meta_record])
+    assert grouped.hello == "a"
+    assert grouped.world == "b"
+    assert grouped.count == 12345
+    assert grouped.assignee == "me"
+    assert grouped.profile == "this is a test"
+
+    grouped.profile = "omg"
+    grouped.hello = "new value"
+    assert grouped.hello == "new value"
+    assert grouped.profile == "omg"
+    assert grouped.records[0].hello == "new value"
+    assert grouped.records[1].hello == "other hello"
+
+    grouped.records[1].hello = "testing"
+    assert grouped.hello != "testing"
+    assert grouped.hello == "new value"
+    assert grouped.records[1].hello == "testing"
+
+    assert len(grouped.records) == 2
+
+    # test grouped._asdict
+    rdict = grouped._asdict()
+    assert set(["hello", "world", "count", "assignee", "profile", "hello"]) <= set(rdict)
+
+    rdict = grouped._asdict(fields=["profile", "count", "_generated"])
+    assert set(["profile", "count", "_generated"]) == set(rdict)
+    assert rdict["profile"] == "omg"
+    assert rdict["count"] == 12345
+
+
+def test_grouped_records_packing(tmpdir):
+    RecordA = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+    RecordB = RecordDescriptor("test/b", [
+        ("string", "b_string"),
+        ("string", "common"),
+        ("uint32", "b_count"),
+    ])
+    a = RecordA("hello", "world", 12345, _source="TheBadInternet", _classification="CLASSIFIED")
+    b = RecordB("good", "bye", 54321, _source="TheGoodInternet", _classification="TLP.WHITE")
+    assert isinstance(a, Record)
+    assert not isinstance(a, GroupedRecord)
+
+    grouped = GroupedRecord("grouped/ab", [a, b])
+    assert isinstance(grouped, (Record, GroupedRecord))
+    assert [(f.typename, f.name) for f in grouped._desc.fields.values()] == [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+        ("string", "b_string"),
+        ("uint32", "b_count"),
+    ]
+
+    path = tmpdir.join("grouped.records").strpath
+    writer = RecordWriter(path)
+    writer.write(grouped)
+    writer.write(grouped)
+    writer.write(grouped)
+    writer.write(grouped)
+    writer.write(grouped)
+    writer.flush()
+
+    reader = RecordReader(path)
+    record = next(iter(reader))
+
+    # grouped record tests
+    assert isinstance(record, Record)
+    assert isinstance(record, GroupedRecord)
+    assert record.common == "world"     # first 'key' has precendence
+    assert record.name == "grouped/ab"
+    assert record.a_string == "hello"
+    assert record.a_count == 12345
+    assert record.b_count == 54321
+    assert record.b_string == "good"
+    assert record._source == "TheBadInternet"
+    assert record._classification == "CLASSIFIED"
+
+    # access 'common' on second record directly
+    assert record.records[1].common == "bye"
+
+    # access raw records directly
+    assert len(record.records) == 2
+    assert record.records[0]._desc.name == "test/a"
+    assert record.records[1]._desc.name == "test/b"
+
+    # test using selectors
+    reader = RecordReader(path, selector="r.a_count == 12345")
+    assert len(list(iter(reader))) == 5
+
+    reader = RecordReader(path, selector="r.common == 'bye'")
+    assert len(list(iter(reader))) == 0
+    reader = RecordReader(path, selector="r.common == 'world'")
+    assert len(list(iter(reader))) == 5
+
+
+def test_record_reserved_fieldname():
+    with pytest.raises(RecordDescriptorError):
+        RecordDescriptor("test/a", [
+            ("string", "_classification"),
+            ("string", "_source"),
+            ("uint32", "_generated"),
+        ])
+
+
+def test_record_printer_stdout(capsys):
+    Record = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+    record = Record("hello", "world", 10)
+
+    # fake capsys to be a tty.
+    def isatty():
+        return True
+    capsys._capture.out.tmpfile.isatty = isatty
+
+    writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
+    writer.write(record)
+
+    out, err = capsys.readouterr()
+    modifier = '' if isinstance(u'', str) else 'u'
+    expected = "<test/a a_string={u}'hello' common={u}'world' a_count=10>\n".format(u=modifier)
+    assert out == expected
+
+
+def test_record_field_limit():
+    count = 1337
+    fields = [('uint32', 'field_{}'.format(i)) for i in range(count)]
+    values = dict([('field_{}'.format(i), i) for i in range(count)])
+
+    Record = RecordDescriptor("test/limit", fields)
+    record = Record(**values)
+
+    for i in range(count):
+        assert getattr(record, 'field_{}'.format(i)) == i
+
+    # test kwarg init
+    record = Record(field_404=12345)
+    assert record.field_404 == 12345
+    assert record.field_0 is None
+
+    # test arg init
+    record = Record(200, 302, 404)
+    assert record.field_0 == 200
+    assert record.field_1 == 302
+    assert record.field_2 == 404
+    assert record.field_404 is None
+
+    # test arg + kwarg init
+    record = Record(200, 302, 404, field_502=502)
+    assert record.field_0 == 200
+    assert record.field_1 == 302
+    assert record.field_2 == 404
+    assert record.field_3 is None
+    assert record.field_502 == 502
+
+
+def test_record_internal_version():
+    Record = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+
+    record = Record("hello", "world", 10)
+    assert record._version == RECORD_VERSION
+
+    record = Record("hello", "world", 10, _version=1337)
+    assert record._version == RECORD_VERSION
+
+
+def test_record_reserved_keyword():
+    Record = RecordDescriptor("test/a", [
+        ("string", "from"),
+        ("string", "and"),
+        ("uint32", "or"),
+        ("uint32", "normal"),
+    ])
+
+    init = Record.recordType.__init__
+    sig = inspect.signature(init)
+    params = list(sig.parameters.values())
+    assert init.__code__.co_argcount == 1
+    assert len(params) == 3
+    assert params[1].name == 'args'
+    assert params[1].kind == params[1].VAR_POSITIONAL
+    assert params[2].name == 'kwargs'
+    assert params[2].kind == params[2].VAR_KEYWORD
+
+    r = Record('hello', 'world', 1337, 10)
+    assert getattr(r, 'from') == 'hello'
+    assert getattr(r, 'and') == 'world'
+    assert getattr(r, 'or') == 1337
+    assert r.normal == 10
+
+    r = Record('some', 'missing', normal=5)
+    assert getattr(r, 'from') == 'some'
+    assert getattr(r, 'and') == 'missing'
+    assert getattr(r, 'or') is None
+    assert r.normal == 5
+
+    r = Record('from_value', **{'and': 'dict', 'or': 7331, 'normal': 3})
+    assert getattr(r, 'from') == 'from_value'
+    assert getattr(r, 'and') == 'dict'
+    assert getattr(r, 'or') == 7331
+    assert r.normal == 3
+
+    Record = RecordDescriptor("test/a", [
+        ("uint32", "normal"),
+    ])
+
+    init = Record.recordType.__init__
+    sig = inspect.signature(init)
+    params = list(sig.parameters.values())
+    assert init.__code__.co_argcount == 6
+    assert len(params) == 6
+    assert params[1].name == 'normal'
+    assert params[1].kind == params[1].POSITIONAL_OR_KEYWORD
+    assert params[1].default is None
+    assert params[2].name == '_source'
+    assert params[2].kind == params[2].POSITIONAL_OR_KEYWORD
+    assert params[2].default is None
+    assert params[3].name == '_classification'
+    assert params[3].kind == params[3].POSITIONAL_OR_KEYWORD
+    assert params[3].default is None
+    assert params[4].name == '_generated'
+    assert params[4].kind == params[4].POSITIONAL_OR_KEYWORD
+    assert params[4].default is None
+    assert params[5].name == '_version'
+    assert params[5].kind == params[5].POSITIONAL_OR_KEYWORD
+    assert params[5].default is None
+
+    Record = RecordDescriptor("test/a", [
+        ("uint32", "self"),
+        ("uint32", "cls"),
+    ])
+    r = Record(1, 2)
+    assert r.self == 1
+    assert r.cls == 2
+
+
+def test_record_stream(tmp_path):
+    Record = RecordDescriptor("test/counter", [
+        ("uint32", "counter"),
+        ("string", "tag"),
+    ])
+
+    datasets = [
+        tmp_path / "dataset1.records",
+        tmp_path / "dataset2.records.gz",
+    ]
+
+    for ds in datasets:
+        writer = RecordWriter(str(ds))
+        for i in range(100):
+            writer.write(Record(i, tag=ds.name))
+        writer.close()
+
+    datasets = [str(ds) for ds in datasets]
+    assert len(list(record_stream(datasets))) == len(datasets) * 100
+    assert len(list(record_stream(datasets, "r.counter == 42"))) == len(datasets)
+
+
+def test_record_replace():
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "index"),
+        ("string", "foo"),
+    ])
+
+    t = TestRecord(1, "hello")
+    assert t.index == 1
+    assert t.foo == "hello"
+
+    t2 = t._replace(foo="bar", index=1337)
+    assert t2.foo == "bar"
+    assert t2.index == 1337
+
+    t3 = t._replace()
+    assert t3.index == 1
+    assert t3.foo == "hello"
+    assert t3._source == t._source
+    assert t3._generated == t._generated
+    assert t3._version == t._version
+
+    t4 = t2._replace(foo="test", _source="pytest")
+    assert t4.index == 1337
+    assert t4.foo == "test"
+    assert t4._source == "pytest"
+    assert t4._generated == t2._generated
+
+    with pytest.raises(ValueError) as excinfo:
+        t._replace(foobar="keyword does not exist")
+    excinfo.match(".*Got unexpected field names:.*foobar.*")
+
+
+def test_record_init_from_record():
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "index"),
+        ("string", "foo"),
+    ])
+
+    t = TestRecord(1, "hello")
+    assert t.index == 1
+    assert t.foo == "hello"
+
+    TestRecord2 = TestRecord.extend([
+        ("string", "bar"),
+        ("uint32", "test"),
+    ])
+    t2 = TestRecord2.init_from_record(t)
+    assert t2.index == 1
+    assert t2.foo == "hello"
+    assert t2.bar is None
+    assert t2.test is None
+
+    t2.bar = "bar"
+    t2.test = 3
+    assert t2.bar == "bar"
+    assert t2.test == 3
+
+    TestRecord3 = RecordDescriptor("test/record3", [
+        ("string", "test"),
+        ("uint32", "count"),
+    ])
+    with pytest.raises(TypeError):
+        t3 = TestRecord3.init_from_record(t2, raise_unknown=True)
+
+    # explicit raise_unknown=False
+    t3 = TestRecord3.init_from_record(t2, raise_unknown=False)
+    assert t3.test == "3"
+    assert t3.count is None
+
+    # default should not raise either
+    t3 = TestRecord3.init_from_record(t2)
+    assert t3.test == "3"
+    assert t3.count is None
+
+
+def test_record_asdict():
+    Record = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+    record = Record("hello", "world", 1337)
+    rdict = record._asdict()
+    assert rdict.get("a_string") == "hello"
+    assert rdict.get("common") == "world"
+    assert rdict.get("a_count") == 1337
+    assert set(rdict) == set(["a_string", "common", "a_count", "_source", "_generated", "_version", "_classification"])
+
+    rdict = record._asdict(fields=["common", "_source", "a_string"])
+    assert set(rdict) == set(["a_string", "common", "_source"])
+
+    rdict = record._asdict(exclude=["a_count", "_source", "_generated", "_version"])
+    assert set(rdict) == set(["a_string", "common", "_classification"])
+
+    rdict = record._asdict(fields=["common", "_source", "a_string"], exclude=["common"])
+    assert set(rdict) == set(["a_string", "_source"])
+
+
+def test_recordfield_rewriter_expression():
+    rewriter = RecordFieldRewriter(expression="upper_a = a_string.upper(); count_times_10 = a_count * 10")
+    Record = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+    record = Record("hello", "world", 1337)
+    new_record = rewriter.rewrite(record)
+    assert new_record.a_string == "hello"
+    assert new_record.common == "world"
+    assert new_record.a_count == 1337
+    assert new_record.upper_a == "HELLO"
+    assert new_record.count_times_10 == 1337 * 10
+
+
+def test_recordfield_rewriter_fields():
+    rewriter = RecordFieldRewriter(fields=["a_count"])
+    Record = RecordDescriptor("test/a", [
+        ("string", "a_string"),
+        ("string", "common"),
+        ("uint32", "a_count"),
+    ])
+    record = Record("hello", "world", 1337)
+    new_record = rewriter.rewrite(record)
+    assert hasattr(new_record, "a_count")
+    assert not hasattr(new_record, "a_string")
+    assert not hasattr(new_record, "common")
+
+
+def test_extend_record():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+    ])
+    FooRecord = RecordDescriptor("test/foo", [
+        ("varint", "foo"),
+        ("bytes", "query"),
+        ("bytes", "bar"),
+    ])
+    HelloRecord = RecordDescriptor("test/hello", [
+        ("string", "hello"),
+        ("string", "world"),
+        ("string", "url"),
+    ])
+
+    a = TestRecord("http://flow.record", "myquery")
+    b = FooRecord(12345, b"FOO", b"BAR")
+    c = HelloRecord("hello", "world", "http://hello.world")
+
+    new = extend_record(a, [b, c])
+    assert new._desc == RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+        ("varint", "foo"),
+        ("bytes", "bar"),
+        ("string", "hello"),
+        ("string", "world"),
+    ])
+    assert new.url == "http://flow.record"
+    assert new.query == "myquery"
+    assert new.foo == 12345
+    assert new.bar == b"BAR"
+    assert new.hello == "hello"
+    assert new.world == "world"
+
+    new = extend_record(a, [b, c], replace=True)
+    assert new._desc == RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("bytes", "query"),
+        ("varint", "foo"),
+        ("bytes", "bar"),
+        ("string", "hello"),
+        ("string", "world"),
+    ])
+    assert new.url == "http://hello.world"
+    assert new.query == b"FOO"
+    assert new.foo == 12345
+    assert new.bar == b"BAR"
+    assert new.hello == "hello"
+    assert new.world == "world"
+
+
+def test_extend_record_with_replace():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "ip"),
+        ("uint16", "port"),
+        ("string", "data"),
+        ("string", "note"),
+    ])
+    ReplaceRecord = RecordDescriptor("test/foo", [
+        ("net.ipaddress", "ip"),
+        ("net.tcp.Port", "port"),
+        ("bytes", "data"),
+        ("string", "location"),
+    ])
+
+    a = TestRecord("10.13.13.17", 80, "HTTP/1.1 200 OK\r\n", "webserver")
+    b = ReplaceRecord(
+        ip=a.ip,
+        port=a.port,
+        data=a.data.encode(),
+        location="DMZ",
+    )
+    new = extend_record(a, [b], replace=False)
+    assert new.ip == "10.13.13.17"
+    assert new.port == 80
+    assert new.data == "HTTP/1.1 200 OK\r\n"
+    assert new.note == "webserver"
+    assert new.location == "DMZ"
+    assert isinstance(new.ip, str)
+    assert isinstance(new.port, int)
+    assert isinstance(new.data, str)
+    assert isinstance(new.note, str)
+    assert isinstance(new.location, str)
+    assert new._desc.name == "test/record"
+    assert "<test/record " in repr(new)
+
+    new = extend_record(a, [b], replace=True, name="test/replaced")
+    assert new.ip == "10.13.13.17"
+    assert new.port == 80
+    assert new.data == b"HTTP/1.1 200 OK\r\n"
+    assert new.note == "webserver"
+    assert new.location == "DMZ"
+    assert isinstance(new.ip, fieldtypes.net.ipaddress)
+    assert isinstance(new.port, fieldtypes.net.tcp.Port)
+    assert isinstance(new.data, bytes)
+    assert isinstance(new.note, str)
+    assert isinstance(new.location, str)
+    assert new._desc.name == "test/replaced"
+    assert "<test/replaced " in repr(new)
diff --git a/tests/test_record_adapter.py b/tests/test_record_adapter.py
new file mode 100644
index 0000000..57bbe87
--- /dev/null
+++ b/tests/test_record_adapter.py
@@ -0,0 +1,381 @@
+import pytest
+
+import datetime
+import sys
+
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import BytesIO as StringIO
+
+from flow.record import RecordDescriptor
+from flow.record import RecordReader, RecordWriter, RecordOutput, RecordStreamReader
+from flow.record import PathTemplateWriter, RecordArchiver
+from flow.record.selector import Selector, CompiledSelector
+from flow.record.base import (
+    GZIP_MAGIC,
+    BZ2_MAGIC,
+    LZ4_MAGIC,
+    ZSTD_MAGIC,
+    HAS_LZ4,
+    HAS_ZSTD,
+)
+
+
+def generate_records(count=100):
+    TestRecordEmbedded = RecordDescriptor("test/embedded_record", [
+        ("datetime", "dt"),
+    ])
+    TestRecord = RecordDescriptor("test/adapter", [
+        ("uint32", "number"),
+        ("record", "record"),
+    ])
+
+    for i in range(count):
+        embedded = TestRecordEmbedded(datetime.datetime.utcnow())
+        yield TestRecord(number=i, record=embedded)
+
+
+def test_stream_writer_reader():
+    fp = StringIO()
+    out = RecordOutput(fp)
+    for rec in generate_records():
+        out.write(rec)
+
+    fp.seek(0)
+    reader = RecordStreamReader(fp, selector="r.number in (2, 7)")
+    records = []
+    for rec in reader:
+        records.append(rec)
+
+    assert set([2, 7]) == set([r.number for r in records])
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_file_writer_reader(tmpdir, PSelector):
+    p = tmpdir.join("test.records")
+    with RecordWriter(p) as out:
+        for rec in generate_records():
+            out.write(rec)
+        out.flush()
+
+    selector = PSelector("r.number in (1, 3)")
+    with RecordReader(p, selector=selector) as reader:
+        numbers = [r.number for r in reader]
+        assert set([1, 3]) == set(numbers)
+
+
+@pytest.mark.parametrize("compression", ["gz", "bz2", "lz4", "zstd"])
+def test_compressed_writer_reader(tmpdir, compression):
+    """Test auto compression of Record files."""
+    if compression == "lz4" and not HAS_LZ4:
+        pytest.skip("lz4 module not installed")
+    if compression == "zstd" and not HAS_ZSTD:
+        pytest.skip("zstandard module not installed")
+
+    p = tmpdir.mkdir("{}-test".format(compression))
+    path = str(p.join("test.records.{}".format(compression)))
+
+    assert path.endswith(".{}".format(compression))
+
+    count = 100
+    writer = RecordWriter(path)
+    for rec in generate_records(count):
+        writer.write(rec)
+    # writer needs to be closed to flush current buffers
+    writer.close()
+
+    # test if the file we wrote is actually correct format
+    with open(path, "rb") as f:
+        if compression == "gz":
+            assert f.read(2) == GZIP_MAGIC
+        elif compression == "bz2":
+            assert f.read(3) == BZ2_MAGIC
+        elif compression == "lz4":
+            assert f.read(4) == LZ4_MAGIC
+        elif compression == "zstd":
+            assert f.read(4) == ZSTD_MAGIC
+
+    # Read the records from compressed file
+    reader = RecordReader(path)
+    numbers = []
+    for rec in reader:
+        numbers.append(rec.number)
+
+    assert numbers == list(range(count))
+
+
+def test_path_template_writer(tmpdir):
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "id"),
+    ])
+
+    records = [
+        TestRecord(id=1, _generated=datetime.datetime(2017, 12, 6, 22, 10)),
+        TestRecord(id=2, _generated=datetime.datetime(2017, 12, 6, 23, 59)),
+        TestRecord(id=3, _generated=datetime.datetime(2017, 12, 7, 00, 00)),
+    ]
+
+    p = tmpdir.mkdir("test")
+    writer = PathTemplateWriter(str(p.join("{name}-{ts:%Y%m%dT%H}.records.gz")), name="test")
+    for rec in records:
+        writer.write(rec)
+    writer.close()
+
+    assert p.join("test-20171206T22.records.gz").check(file=1)
+    assert p.join("test-20171206T23.records.gz").check(file=1)
+    assert p.join("test-20171207T00.records.gz").check(file=1)
+
+    # Test rotation/renaming
+    before = p.listdir()
+    writer = PathTemplateWriter(str(p.join("{name}-{ts:%Y%m%dT%H}.records.gz")), name="test")
+    for rec in records:
+        writer.write(rec)
+    writer.close()
+    after = p.listdir()
+
+    assert set(before).issubset(set(after))
+    assert len(after) > len(before)
+    assert len(before) == 3
+    assert len(after) == 6
+
+
+def test_record_archiver(tmpdir):
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "id"),
+    ])
+
+    records = [
+        TestRecord(id=1, _generated=datetime.datetime(2017, 12, 6, 22, 10)),
+        TestRecord(id=2, _generated=datetime.datetime(2017, 12, 6, 23, 59)),
+        TestRecord(id=3, _generated=datetime.datetime(2017, 12, 7, 00, 00)),
+    ]
+
+    p = tmpdir.mkdir("test")
+
+    writer = RecordArchiver(p, name="archive-test")
+    for rec in records:
+        writer.write(rec)
+    writer.close()
+
+    assert p.join("2017/12/06").check(dir=1)
+    assert p.join("2017/12/07").check(dir=1)
+
+    assert p.join("2017/12/06/archive-test-20171206T22.records.gz").check(file=1)
+    assert p.join("2017/12/06/archive-test-20171206T23.records.gz").check(file=1)
+    assert p.join("2017/12/07/archive-test-20171207T00.records.gz").check(file=1)
+
+    # test archiving
+    before = p.join("2017/12/06").listdir()
+    writer = RecordArchiver(p, name="archive-test")
+    for rec in records:
+        writer.write(rec)
+    writer.close()
+    after = p.join("2017/12/06").listdir()
+
+    assert set(before).issubset(set(after))
+    assert len(after) > len(before)
+    assert len(before) == 2
+    assert len(after) == 4
+
+
+def test_record_writer_stdout():
+    writer = RecordWriter()
+    assert writer.fp == getattr(sys.stdout, "buffer", sys.stdout)
+
+    writer = RecordWriter(None)
+    assert writer.fp == getattr(sys.stdout, "buffer", sys.stdout)
+
+    writer = RecordWriter("")
+    assert writer.fp == getattr(sys.stdout, "buffer", sys.stdout)
+
+    # We cannot test RecordReader() because it will read from stdin during init
+    # reader = RecordReader()
+    # assert reader.fp == sys.stdin
+
+
+def test_record_adapter_archive(tmpdir):
+    # archive some records, using "testing" as name
+    writer = RecordWriter("archive://{}?name=testing".format(tmpdir))
+    dt = datetime.datetime.utcnow()
+    count = 0
+    for rec in generate_records():
+        writer.write(rec)
+        count += 1
+    writer.close()
+
+    # defaults to always archive by /YEAR/MONTH/DAY/ dir structure
+    outdir = tmpdir.join("{ts:%Y/%m/%d}".format(ts=dt))
+    assert len(outdir.listdir())
+
+    # read the archived records and test filename and counts
+    count2 = 0
+    for fname in outdir.listdir():
+        assert fname.basename.startswith("testing-")
+        for rec in RecordReader(str(fname)):
+            count2 += 1
+    assert count == count2
+
+
+def test_record_pathlib(tmp_path):
+    # Test support for Pathlib/PathLike objects
+    writer = RecordWriter(tmp_path / "test.records")
+    for rec in generate_records(100):
+        writer.write(rec)
+    writer.close()
+
+    reader = RecordReader(tmp_path / "test.records")
+    assert len([rec for rec in reader]) == 100
+    assert not isinstance(tmp_path / "test.records", str)
+
+
+def test_record_pathlib_contextmanager(tmp_path):
+    with RecordWriter(tmp_path / "test.records") as writer:
+        for rec in generate_records(100):
+            writer.write(rec)
+
+    with RecordReader(tmp_path / "test.records") as reader:
+        assert len([rec for rec in reader]) == 100
+        assert not isinstance(tmp_path / "test.records", str)
+
+
+def test_record_pathlib_contextmanager_double_close(tmp_path):
+    with RecordWriter(tmp_path / "test.records") as writer:
+        for rec in generate_records(100):
+            writer.write(rec)
+        writer.close()
+
+    with RecordReader(tmp_path / "test.records") as reader:
+        assert len([rec for rec in reader]) == 100
+        reader.close()
+
+
+def test_record_invalid_recordstream(tmp_path):
+    path = str(tmp_path / "invalid_records")
+    with open(path, "wb") as f:
+        f.write(b"INVALID RECORD STREAM FILE")
+
+    with pytest.raises(IOError):
+        with RecordReader(path) as reader:
+            for r in reader:
+                assert(r)
+
+
+@pytest.mark.parametrize("adapter,contains", [
+    ("csvfile", (b"5,hello,world", b"count,foo,bar,")),
+    ("jsonfile", (b'"count": 5', )),
+    ("text", (b"count=5", )),
+    ("line", (b"count = 5", b"--[ RECORD 5 ]--")),
+])
+def test_record_adapter(adapter, contains, tmp_path):
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "count"),
+        ("string", "foo"),
+        ("string", "bar"),
+    ])
+
+    # construct the RecordWriter with uri
+    path = tmp_path / "output"
+    uri = "{adapter}://{path!s}".format(adapter=adapter, path=path)
+
+    # test parametrized contains
+    with RecordWriter(uri) as writer:
+        for i in range(10):
+            rec = TestRecord(count=i, foo="hello", bar="world")
+            writer.write(rec)
+    for pattern in contains:
+        assert pattern in path.read_bytes()
+
+    # test include (excludes everything else except in include)
+    with RecordWriter("{}?fields=count".format(uri)) as writer:
+        for i in range(10):
+            rec = TestRecord(count=i, foo="hello", bar="world")
+            writer.write(rec)
+
+    # test exclude
+    with RecordWriter("{}?exclude=count".format(uri)) as writer:
+        for i in range(10):
+            rec = TestRecord(count=i, foo="hello", bar="world")
+            writer.write(rec)
+
+
+def test_text_record_adapter(capsys):
+    TestRecordWithFooBar = RecordDescriptor("test/record", [
+        ("string", "name"),
+        ("string", "foo"),
+        ("string", "bar"),
+    ])
+    TestRecordWithoutFooBar = RecordDescriptor("test/record2", [
+        ("string", "name"),
+    ])
+    format_spec = "Hello {name}, {foo} is {bar}!"
+    with RecordWriter(f"text://?format_spec={format_spec}") as writer:
+        # Format string with existing variables
+        rec = TestRecordWithFooBar(name="world", foo="foo", bar="bar")
+        writer.write(rec)
+        out, err = capsys.readouterr()
+        assert "Hello world, foo is bar!\n" == out
+
+        # Format string with non-existing variables
+        rec = TestRecordWithoutFooBar(name="planet")
+        writer.write(rec)
+        out, err = capsys.readouterr()
+        assert "Hello planet, {foo} is {bar}!\n" == out
+
+
+def test_recordstream_header(tmp_path):
+    # Create and delete a RecordWriter, with nothing happening
+    p = tmp_path / "out.records"
+    writer = RecordWriter(p)
+    del(writer)
+    assert p.read_bytes() == b""
+
+    # RecordWriter via context manager, always flushes and closes afterwards
+    p = tmp_path / "out2.records"
+    with RecordWriter(p) as writer:
+        pass
+    assert p.read_bytes() == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n"
+
+    # Manual create of RecordWriter with no records and close (no flush)
+    p = tmp_path / "out3.records"
+    writer = RecordWriter(p)
+    writer.close()
+    assert p.read_bytes() == b""
+
+    # Manual RecordWriter with no records but flush and close
+    p = tmp_path / "out3.records"
+    writer = RecordWriter(p)
+    writer.flush()
+    writer.close()
+    assert p.read_bytes() == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n"
+
+    # Manual RecordWriter with some records written, we flush to ensure output due to buffering
+    p = tmp_path / "out4.records"
+    writer = RecordWriter(p)
+    writer.write(next(generate_records()))
+    writer.flush()
+    del(writer)
+    assert p.read_bytes().startswith(b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n")
+
+
+def test_recordstream_header_stdout(capsysbinary):
+    with RecordWriter() as writer:
+        pass
+    out, err = capsysbinary.readouterr()
+    assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n"
+
+    writer = RecordWriter()
+    del(writer)
+    out, err = capsysbinary.readouterr()
+    assert out == b""
+
+    writer = RecordWriter()
+    writer.close()
+    out, err = capsysbinary.readouterr()
+    assert out == b""
+
+    writer = RecordWriter()
+    writer.flush()
+    writer.close()
+    out, err = capsysbinary.readouterr()
+    assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n"
diff --git a/tests/test_record_descriptor.py b/tests/test_record_descriptor.py
new file mode 100644
index 0000000..e9fde2b
--- /dev/null
+++ b/tests/test_record_descriptor.py
@@ -0,0 +1,142 @@
+import struct
+import hashlib
+
+from flow.record import RecordDescriptor
+from flow.record import RecordField
+
+
+def test_record_descriptor():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+        ("varint", "status"),
+    ])
+
+    # Get fields of type string
+    fields = TestRecord.getfields("string")
+    assert isinstance(fields, list)
+    assert len(fields) == 2
+    assert isinstance(fields[0], RecordField)
+    assert fields[0].typename == "string"
+    assert fields[0].name == "url"
+
+    # Get fields as tuples
+    fields = TestRecord.get_field_tuples()
+    assert isinstance(fields, tuple)
+    assert len(fields) == 3
+    assert isinstance(fields[0], tuple)
+    assert fields[0][0] == "string"
+    assert fields[0][1] == "url"
+
+
+def test_record_descriptor_clone():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+        ("varint", "status"),
+    ])
+
+    # Clone record descriptor
+    OtherRecord = RecordDescriptor("other/record", TestRecord)
+
+    assert TestRecord.name == "test/record"
+    assert OtherRecord.name == "other/record"
+    assert TestRecord.descriptor_hash != OtherRecord.descriptor_hash
+    assert TestRecord.get_field_tuples() == OtherRecord.get_field_tuples()
+
+
+def test_record_descriptor_extend():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+    ])
+
+    # Add field
+    ExtendedRecord = TestRecord.extend([("varint", "status")])
+
+    assert TestRecord.name == "test/record"
+    assert ExtendedRecord.name == "test/record"
+    assert TestRecord.descriptor_hash != ExtendedRecord.descriptor_hash
+    assert len(TestRecord.get_field_tuples()) == 2
+    assert len(ExtendedRecord.get_field_tuples()) == 3
+
+
+def test_record_descriptor_hash_cache():
+    # Get initial cache stats
+    TestRecord1 = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+    ])
+    info = RecordDescriptor.calc_descriptor_hash.cache_info()
+
+    # Create same descriptor, check cache hit increase
+    TestRecord2 = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+    ])
+    info2 = RecordDescriptor.calc_descriptor_hash.cache_info()
+    assert info2.hits == info.hits + 1
+    assert info.misses == info2.misses
+    assert TestRecord1.descriptor_hash == TestRecord2.descriptor_hash
+
+    # Create different descriptor, check for cache miss increase
+    TestRecord3 = RecordDescriptor("test/record", [
+        ("string", "url"),
+        ("string", "query"),
+        ("boolean", "test"),
+    ])
+    info3 = RecordDescriptor.calc_descriptor_hash.cache_info()
+    assert info2.hits == info.hits + 1
+    assert info3.misses == info.misses + 1
+    assert TestRecord2.descriptor_hash != TestRecord3.descriptor_hash
+
+
+def test_record_descriptor_hashing():
+    """ Test if hashing is still consistent to keep compatibility """
+    TestRecord = RecordDescriptor("test/hash", [
+        ("boolean", "one"),
+        ("string", "two"),
+    ])
+
+    # known good values from flow.record version 1.4.1
+    desc_hash = 1395243447
+    desc_bytes = b"test/hashonebooleantwostring"
+
+    # calculate
+    hash_digest = struct.unpack(">L", hashlib.sha256(desc_bytes).digest()[:4])[0]
+    assert desc_hash == hash_digest
+
+    # verify current implementation
+    assert TestRecord.descriptor_hash == hash_digest
+
+
+def test_record_descriptor_hash_eq():
+    """ Tests __hash__() on RecordDescriptor """
+    TestRecordSame1 = RecordDescriptor("test/same", [
+        ("boolean", "one"),
+        ("string", "two"),
+    ])
+
+    TestRecordSame2 = RecordDescriptor("test/same", [
+        ("boolean", "one"),
+        ("string", "two"),
+    ])
+
+    TestRecordDifferentName = RecordDescriptor("test/different", [
+        ("boolean", "one"),
+        ("string", "two"),
+    ])
+
+    TestRecordDifferentFields = RecordDescriptor("test/different", [
+        ("varint", "one"),
+        ("float", "two"),
+    ])
+
+    # __hash__
+    assert hash(TestRecordSame1) == hash(TestRecordSame2)
+    assert hash(TestRecordSame1) != hash(TestRecordDifferentName)
+
+    # __eq__
+    assert TestRecordSame1 == TestRecordSame2
+    assert TestRecordSame1 != TestRecordDifferentName
+    assert TestRecordDifferentName != TestRecordDifferentFields
diff --git a/tests/test_regression.py b/tests/test_regression.py
new file mode 100644
index 0000000..d1c9ea4
--- /dev/null
+++ b/tests/test_regression.py
@@ -0,0 +1,376 @@
+import pytest
+import codecs
+import os
+import datetime
+import sys
+
+import msgpack
+
+from flow.record import (
+    base,
+    whitelist,
+    fieldtypes,
+    Record,
+    GroupedRecord,
+    RecordDescriptor,
+    RecordPacker,
+    RECORD_VERSION,
+    RecordReader,
+    RecordWriter,
+)
+from flow.record.base import is_valid_field_name
+from flow.record.packer import RECORD_PACK_EXT_TYPE, RECORD_PACK_TYPE_RECORD
+from flow.record.selector import Selector, CompiledSelector
+
+
+def test_datetime_serialization():
+    packer = RecordPacker()
+
+    now = datetime.datetime.utcnow()
+
+    for tz in ["UTC", "Europe/Amsterdam"]:
+        os.environ["TZ"] = tz
+
+        descriptor = RecordDescriptor("""
+test/datetime
+    datetime datetime;
+""")
+
+        record = descriptor.recordType(datetime=now)
+        data = packer.pack(record)
+        r = packer.unpack(data)
+
+        assert r.datetime == now
+
+
+def test_long_int_serialization():
+    packer = RecordPacker()
+
+    long_types = RecordDescriptor("""
+test/long_types
+    varint long_type;
+    varint int_type;
+    varint long_type_neg;
+    varint int_type_neg;
+    varint max_int_as_long;
+    """)
+
+    l = 1239812398217398127398217389217389217398271398217321  # noqa: E741
+    i = 888888
+    lneg = -3239812398217398127398217389217389217398271398217321
+    ineg = -988888
+    max_int_as_long = sys.maxsize
+
+    record = long_types(l, i, lneg, ineg, max_int_as_long)
+    data = packer.pack(record)
+    r = packer.unpack(data)
+
+    assert r.long_type == l
+    assert r.int_type == i
+    assert r.long_type_neg == lneg
+    assert r.int_type_neg == ineg
+    assert r.max_int_as_long == max_int_as_long
+
+
+def test_unicode_serialization():
+    packer = RecordPacker()
+
+    descriptor = RecordDescriptor("""
+test/unicode
+    string text;
+""")
+
+    puny_domains = [b'xn--s7y.co', b'xn--80ak6aa92e.com', b'xn--pple-43d.com']
+
+    for p in puny_domains:
+        domain = codecs.decode(p, "idna")
+        record = descriptor.recordType(text=domain)
+        d = packer.pack(record)
+        record2 = packer.unpack(d)
+
+        assert record.text == record2.text
+        assert record.text == domain
+
+
+def test_pack_long_int_serialization():
+    packer = RecordPacker()
+    # test if 'long int' that fit in the 'int' type would be packed as int internally
+
+    max_neg_int = -0x8000000000000000
+    d = packer.pack([1234, 123456, max_neg_int, sys.maxsize])
+    assert d == b'\x94\xcd\x04\xd2\xce\x00\x01\xe2@\xd3\x80\x00\x00\x00\x00\x00\x00\x00\xcf\x7f\xff\xff\xff\xff\xff\xff\xff'  # noqa: E501
+
+
+def test_non_existing_field():
+    # RecordDescriptor that is used to test locally in the Broker client
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "text"),
+    ])
+    x = TestRecord(text="Fox-IT, For a More Secure Society")
+
+    # r.content does not exist in the RecordDescriptor
+    assert Selector('lower("Fox-IT") in lower(r.content)').match(x) is False
+    assert Selector('"Fox-IT" in r.content').match(x) is False
+    # because the field does not exist, it will still evaluate to False even for negative matches
+    assert Selector('"Fox-IT" not in r.content').match(x) is False
+    assert Selector('"Fox-IT" in r.content').match(x) is False
+    assert Selector('"Fox-IT" != r.content').match(x) is False
+    assert Selector('"Fox-IT" == r.content').match(x) is False
+    assert Selector('r.content == "Fox-IT, For a More Secure Society"').match(x) is False
+    assert Selector('r.content != "Fox-IT, For a More Secure Society"').match(x) is False
+    assert Selector('r.content in "Fox-IT, For a More Secure Society!"').match(x) is False
+    assert Selector('r.content not in "Fox-IT, For a More Secure Society!"').match(x) is False
+
+    # r.text exist in the RecordDescriptor
+    assert Selector('"fox-it" in lower(r.text)').match(x)
+    assert Selector('r.text in "Fox-IT, For a More Secure Society!!"').match(x)
+    assert Selector('r.text == "Fox-IT, For a More Secure Society"').match(x)
+    assert Selector('r.text != "Fox-IT"').match(x)
+    assert Selector('lower("SECURE") in lower(r.text)').match(x)
+    assert Selector('"f0x-1t" not in lower(r.text)').match(x)
+    assert Selector('lower("NOT SECURE") not in lower(r.text)').match(x)
+
+
+def test_set_field_type():
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "value"),
+    ])
+
+    r = TestRecord(1)
+
+    assert isinstance(r.value, fieldtypes.uint32)
+    r.value = 2
+    assert isinstance(r.value, fieldtypes.uint32)
+
+    with pytest.raises(ValueError):
+        r.value = 'lalala'
+    r.value = 2
+
+    r = TestRecord()
+    assert r.value is None
+    r.value = 1234
+    assert r.value == 1234
+    with pytest.raises(TypeError):
+        r.value = [1, 2, 3, 4, 5]
+
+
+def test_packer_unpacker_none_values():
+    """Tests packing and unpacking of Empty records (default values of None)."""
+    packer = RecordPacker()
+
+    # construct field types from all available fieldtypes
+    field_tuples = []
+    for typename in whitelist.WHITELIST:
+        fieldname = "field_{}".format(typename.replace(".", "_").lower())
+        field_tuples.append((typename, fieldname))
+
+    # create a TestRecord descriptor containing all the fieldtypes
+    TestRecord = RecordDescriptor("test/empty_record", field_tuples)
+
+    # initialize an Empty record and serialize/deserialize
+    record = TestRecord()
+    data = packer.pack(record)
+    r = packer.unpack(data)
+    assert isinstance(r, Record)
+
+
+def test_fieldname_regression():
+    TestRecord = RecordDescriptor("test/uri_typed", [
+        ("string", "fieldname"),
+    ])
+    rec = TestRecord('omg regression')
+
+    assert rec in Selector("r.fieldname == 'omg regression'")
+
+    with pytest.raises(AttributeError):
+        assert rec not in Selector("fieldname == 'omg regression'")
+
+
+def test_version_field_regression():
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "value"),
+    ])
+
+    r = TestRecord(1)
+
+    assert r.__slots__[-1] == '_version'
+
+    r._version = 256
+    data = packer.pack(r)
+    with pytest.warns(RuntimeWarning) as record:
+        packer.unpack(data)
+
+    assert len(record) == 1
+    assert record[0].message.args[0].startswith("Got old style record with no version information")
+
+    r._version = RECORD_VERSION + 1 if RECORD_VERSION < 255 else RECORD_VERSION - 1
+    data = packer.pack(r)
+    with pytest.warns(RuntimeWarning) as record:
+        packer.unpack(data)
+
+    assert len(record) == 1
+    assert record[0].message.args[0].startswith("Got other version record")
+
+
+def test_reserved_field_count_regression():
+    del base.RESERVED_FIELDS['_version']
+    base.RESERVED_FIELDS['_extra'] = 'varint'
+    base.RESERVED_FIELDS['_version'] = 'varint'
+
+    TestRecordExtra = RecordDescriptor("test/record", [
+        ("uint32", "value"),
+    ])
+
+    del base.RESERVED_FIELDS['_extra']
+
+    TestRecordBase = RecordDescriptor("test/record", [
+        ("uint32", "value"),
+    ])
+
+    packer = RecordPacker()
+    r = TestRecordExtra(1, _extra=1337)
+
+    assert r.value == 1
+    assert r._extra == 1337
+
+    data = packer.pack(r)
+    packer.register(TestRecordBase)
+
+    unpacked = packer.unpack(data)
+
+    with pytest.raises(AttributeError):
+        unpacked._extra
+
+    assert unpacked.value == 1
+    assert unpacked._version == 1
+
+
+def test_no_version_field_regression():
+    # Emulate old style record
+    packer = RecordPacker()
+    TestRecord = RecordDescriptor("test/record", [
+        ("uint32", "value"),
+    ])
+    packer.register(TestRecord)
+
+    r = TestRecord(1)
+
+    packed = r._pack()
+    mod = (packed[0], packed[1][:-1])  # Strip version field
+    rdata = packer.pack((RECORD_PACK_TYPE_RECORD, mod))
+    data = packer.pack(msgpack.ExtType(RECORD_PACK_EXT_TYPE, rdata))
+
+    with pytest.warns(RuntimeWarning) as record:
+        unpacked = packer.unpack(data)
+
+    assert len(record) == 1
+    assert record[0].message.args[0].startswith("Got old style record with no version information")
+
+    assert unpacked.value == 1
+    assert unpacked._version == 1  # Version field implicitly added
+
+
+def test_mixed_case_name():
+    assert is_valid_field_name("Test")
+    assert is_valid_field_name("test")
+    assert is_valid_field_name("TEST")
+
+    TestRecord = RecordDescriptor("Test/Record", [
+        ("uint32", "Value"),
+    ])
+
+    r = TestRecord(1)
+    assert r.Value == 1
+
+
+def test_multi_grouped_record_serialization(tmp_path):
+    TestRecord = RecordDescriptor("Test/Record", [
+        ("net.ipv4.Address", "ip"),
+    ])
+    GeoRecord = RecordDescriptor("geoip/country", [
+        ("string", "country"),
+        ("string", "city"),
+    ])
+    ASNRecord = RecordDescriptor("geoip/asn", [
+        ("string", "asn"),
+        ("string", "isp"),
+    ])
+
+    test_rec = TestRecord("1.3.3.7")
+    geo_rec = GeoRecord(country="Netherlands", city="Delft")
+
+    grouped_rec = GroupedRecord("grouped/geoip", [test_rec, geo_rec])
+    asn_rec = ASNRecord(asn="1337", isp="Cyberspace")
+    record = GroupedRecord("grouped/geo/asn", [grouped_rec, asn_rec])
+
+    assert record.ip == "1.3.3.7"
+    assert record.country == "Netherlands"
+    assert record.city == "Delft"
+    assert record.asn == "1337"
+    assert record.isp == "Cyberspace"
+
+    writer = RecordWriter(tmp_path / "out.record")
+    writer.write(record)
+    writer.close()
+
+    reader = RecordReader(tmp_path / "out.record")
+    records = list(reader)
+    assert len(records) == 1
+    record = records[0]
+    assert record.ip == "1.3.3.7"
+    assert record.country == "Netherlands"
+    assert record.city == "Delft"
+    assert record.asn == "1337"
+    assert record.isp == "Cyberspace"
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_ast_unicode_literals(PSelector):
+    TestRecord = RecordDescriptor("Test/Record", [])
+    assert TestRecord() in PSelector("get_type('string literal') == get_type(u'hello')")
+    assert TestRecord() in PSelector("get_type('not bytes') != get_type(b'hello')")
+
+
+def test_grouped_replace():
+    TestRecord = RecordDescriptor("test/adapter", [
+        ("uint32", "number"),
+    ])
+    OtherRecord = RecordDescriptor("test/other", [
+        ("string", "other"),
+    ])
+
+    # Constructing grouped record normally
+    record = TestRecord(number=1)
+    other_record = OtherRecord("foobar")
+    grouped_record = GroupedRecord("grouped/original", [record, other_record])
+    assert(grouped_record._source is None)
+    assert(grouped_record.number == 1)
+    assert(grouped_record.other == "foobar")
+
+    # Constructing grouped record normally (using a replaced record)
+    replaced_record = record._replace(_source="newsource")
+    grouped_record = GroupedRecord("grouped/replaced", [replaced_record, other_record])
+    assert(grouped_record._source == "newsource")
+    assert(grouped_record.number == 1)
+    assert(grouped_record.other == "foobar")
+
+    # Test GroupedRecord replace
+    replaced_grouped_record = grouped_record._replace(number=100)
+    assert(replaced_grouped_record.number == 100)
+    assert(replaced_grouped_record.other == "foobar")
+
+    # Test with multiple replacements
+    replaced_grouped_record = grouped_record._replace(number=200, other="a string", _source="testcase")
+    assert(replaced_grouped_record.number == 200)
+    assert(replaced_grouped_record.other == "a string")
+    assert(replaced_grouped_record._source == "testcase")
+
+    # Replacement with non existing field should raise a ValueError
+    with pytest.raises(ValueError) as excinfo:
+        grouped_record._replace(number=100, other="changed", non_existing_field="oops")
+    excinfo.match(".*Got unexpected field names:.*non_existing_field.*")
+
+
+if __name__ == "__main__":
+    __import__("standalone_test").main(globals())
diff --git a/tests/test_selector.py b/tests/test_selector.py
new file mode 100644
index 0000000..8a9fda1
--- /dev/null
+++ b/tests/test_selector.py
@@ -0,0 +1,504 @@
+from datetime import datetime
+
+import pytest
+
+from flow.record import RecordDescriptor
+from flow.record.selector import CompiledSelector, InvalidOperation, Selector
+
+
+def test_selector_func_name():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+    assert TestRecord(None, None) not in Selector("name(r) == 'foo/bar'")
+    assert TestRecord(None, None) in Selector("name(r) == 'test/record'")
+
+
+def test_selector():
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+    TestRecord2 = RecordDescriptor("test/record2", [
+        ("string", "key"),
+        ("string", "content"),
+    ])
+
+    assert TestRecord("foo", "bar") in Selector("r.query == 'foo'")
+    assert TestRecord(None, None) not in Selector("r.query == 'foo'")
+    assert TestRecord(None, None) not in Selector("name(r.query) == 'XX'")
+
+    with pytest.raises(InvalidOperation):
+        assert TestRecord(None, None) not in Selector("r.__class__ == 'str'")
+
+    s = Selector("lower(upper(r.content)) == 'xx'")
+    assert TestRecord("XX", "XX") not in s
+    assert TestRecord2("XX", "XX") in s
+
+    assert TestRecord(None, "BAR") in Selector(
+        "lower(r.query) == 'test' or lower(r.adsadsa) == 't' or lower(r.url) == 'bar'")
+
+    with pytest.raises(InvalidOperation):
+        assert TestRecord() in Selector("invalid_func(r.invalid_field, 1337) or r.id == 4")
+
+
+def test_selector_meta_query_true():
+    source = "internal/flow.record.test"
+
+    desc = RecordDescriptor("test/record", [
+        ("string", "value"),
+    ])
+    rec = desc("value", _source=source)
+    assert rec in Selector("r._source == '{}'".format(source))
+
+
+def test_selector_meta_query_false():
+    source = "internal/flow.record.test"
+
+    desc = RecordDescriptor("test/record", [
+        ("string", "value"),
+    ])
+    rec = desc("value", _source=source + "nope")
+    assert (rec in Selector("r._source == '{}'".format(source))) is False
+
+
+def test_selector_basic_query_true():
+    md5hash = "My MD5 hash!"
+
+    desc = RecordDescriptor("test/md5_hash", [
+        ("string", "md5"),
+    ])
+    rec = desc(md5hash)
+    assert rec in Selector("r.md5 == '{}'".format(md5hash))
+
+
+def test_selector_basic_query_false():
+    md5hash = "My MD5 hash!"
+
+    desc = RecordDescriptor("test/md5_hash", [
+        ("string", "md5"),
+    ])
+    rec = desc(md5hash + "nope")
+    assert (rec in Selector("r.md5 == '{}'".format(md5hash))) is False
+
+
+def test_selector_non_existing_field():
+    md5hash = "My MD5 hash!"
+
+    desc = RecordDescriptor("test/md5_hash", [
+        ("string", "md5"),
+    ])
+    rec = desc(md5hash)
+    assert (rec in Selector("r.non_existing_field == 1337")) is False
+
+
+# [MS] Disabled, list types?
+# def test_selector_string_in_array():
+#    obj = Expando()
+#    obj.filenames = ['record_mitchel_keystrokes.exe', 'python.exe', 'chrome.exe']
+
+#    s = Selector("'{}' in r.filenames".format(obj.filenames[0]))
+#    assert (obj in s) is True
+
+
+def test_selector_string_contains():
+    desc = RecordDescriptor("test/filetype", [
+        ("string", "filetype"),
+    ])
+    rec = desc('PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    assert rec in Selector("'PE' in r.filetype")
+
+
+def test_selector_not_in_operator():
+    desc = RecordDescriptor("test/md5_hash", [
+        ("string", "filetype"),
+    ])
+    rec = desc('PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    assert rec in Selector("'ELF' not in r.filetype")
+
+
+def test_selector_or_operator():
+    desc = RecordDescriptor("test/filetype", [
+        ("string", "filetype"),
+    ])
+    rec = desc('PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    assert rec in Selector("'PE32' in r.filetype or 'PE64' in r.xxxx")
+
+
+def test_selector_and_operator():
+    desc = RecordDescriptor("test/filetype", [
+        ("string", "filetype"),
+        ("string", "xxxx"),
+    ])
+
+    rec = desc('PE32 executable (GUI) Intel 80386, for MS Windows', 'PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    assert rec in Selector("'PE32' in r.filetype and 'PE32' in r.xxxx")
+
+
+def test_selector_in_function():
+    desc = RecordDescriptor("test/filetype", [
+        ("string", "filetype"),
+    ])
+    rec = desc('PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    assert rec in Selector("'pe' in lower(r.filetype)")
+
+
+def test_selector_function_call_whitelisting():
+    TestRecord = RecordDescriptor("test/filetype", [
+        ("string", "filetype"),
+    ])
+    rec = TestRecord('PE32 executable (GUI) Intel 80386, for MS Windows')
+
+    # We allow explicitly exposed functions
+    assert rec in Selector("'pe32' in lower(r.filetype)")
+    # But functions on types are not
+    with pytest.raises(Exception) as excinfo:
+        rec in Selector("'pe' in r.filetype.lower()")
+
+    assert rec in Selector("'EXECUTABLE' in upper(r.filetype)")
+    with pytest.raises(Exception) as excinfo:
+        rec in Selector("'EXECUTABLE' in r.filetype.upper()")
+
+    IPRecord = RecordDescriptor("test/address", [
+        ("net.ipv4.Address", "ip"),
+    ])
+    rec = IPRecord("192.168.1.1")
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.168.1.0/24')")
+    assert rec not in Selector("r.non_existing_field in net.ipv4.Subnet('192.168.1.0/24')")
+
+    # We call net.ipv4 instead of net.ipv4.Subnet, which should fail
+    with pytest.raises(Exception) as excinfo:
+        assert rec in Selector("r.ip in net.ipv4('192.168.1.0/24')")
+    excinfo.match("Call 'net.ipv4' not allowed. No calls other then whitelisted 'global' calls allowed!")
+
+
+def test_selector_subnet():
+    desc = RecordDescriptor("test/ip", [
+        ("net.ipv4.Address", "ip"),
+    ])
+    rec = desc('192.168.10.1')
+
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.168.10.1/32')")
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.168.10.0/24')")
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.168.0.0/16')")
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.0.0.0/8')")
+    assert rec in Selector("r.ip in net.ipv4.Subnet('192.168.10.1')")
+    assert rec in Selector("r.ip not in net.ipv4.Subnet('10.0.0.0/8')")
+
+
+def test_field_equals():
+    desc = RecordDescriptor("test/record", [
+        ("string", "mailfrom"),
+        ("string", "mailto"),
+        ("string", "foo"),
+    ])
+    rec = desc("hello@world.com", "foo@bar.com", "testing")
+    assert rec in CompiledSelector("field_equals(r, ['mailfrom', 'mailto'], ['hello@world.com',])")
+    assert rec in CompiledSelector("field_equals(r, ['mailfrom', 'mailto'], ['hElLo@WoRlD.com',])")
+    assert rec not in CompiledSelector("field_equals(r, ['mailfrom', 'mailto'], ['hElLo@WoRlD.com',], nocase=False)")
+    assert rec not in CompiledSelector("field_equals(r, ['mailfrom', 'mailto'], ['hello',])")
+
+
+def test_field_contains():
+    desc = RecordDescriptor("test/record", [
+        ("string", "mailfrom"),
+        ("string", "mailto"),
+        ("string", "foo"),
+    ])
+    rec = desc("hello@world.com", "foo@bar.com", "testing")
+    rec2 = desc("hello@world.com", "foo@bar.com")
+
+    assert rec in CompiledSelector("field_contains(r, ['mailfrom', 'mailto'], ['foo@bar.com', 'test@fox-it.com'])")
+    assert rec in CompiledSelector("field_contains(r, ['mailfrom', 'mailto'], ['FOO', 'HELLO'])")
+    assert rec in Selector("field_contains(r, ['mailfrom', 'mailto'], ['FOO', 'HELLO'])")
+    assert rec2 not in CompiledSelector("field_contains(r, ['testing'], ['TEST@fox-it.com'])")
+
+
+def test_field_contains_word_boundary():
+    desc = RecordDescriptor("test/record", [
+        ("string", "mailfrom"),
+        ("string", "mailto"),
+        ("string", "foo"),
+        ("string", "content"),
+    ])
+    rec = desc("hello@world.com", "foo@bar.com", "testing", "This is a testing string")
+    rec2 = desc("helloworld@world.com", "foo@bar.com")
+    rec3 = desc(None, None)
+    rec4 = desc(None, None, "hello@world.com")
+    rec5 = desc()
+    assert rec in Selector(
+        "field_contains(r, ['mailfrom', 'mailto'], ['hello'], word_boundary=True)")
+    assert rec not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto'], ['hello.'], word_boundary=True)")  # Check regex escaping...
+    assert rec not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto'], ['HELLO'], nocase=False, word_boundary=True)")
+    assert rec2 not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto'], ['hello'], word_boundary=True)")
+    assert rec2 not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto', 'nonexistingfield'], ['hello'], word_boundary=True)")
+    assert rec3 not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto'], ['hello'], word_boundary=True)")
+    assert rec4 in Selector(
+        "field_contains(r, ['mailfrom', 'mailto', 'foo'], ['hello'], word_boundary=True)")
+    assert rec5 not in Selector(
+        "field_contains(r, ['mailfrom', 'mailto', 'foo'], ['hello'], word_boundary=True)")
+
+    assert rec not in Selector("field_contains(r, ['content'], ['sting'], word_boundary=True)")
+    assert rec in Selector("field_contains(r, ['content'], ['testing'], word_boundary=True)")
+
+
+def test_field_regex():
+    desc = RecordDescriptor("test/record", [
+        ("string", "mailfrom"),
+        ("string", "mailto"),
+        ("string", "foo"),
+    ])
+    rec = desc("hello@world.com", "foo@bar.com", "testing")
+
+    assert rec in Selector(r"field_regex(r, ['mailfrom', 'mailto'], r'.+@.+\.com')")
+    assert rec in CompiledSelector(r"field_regex(r, ['mailfrom', 'mailto'], r'.+@.+\.com')")
+    assert rec not in Selector("field_regex(r, ['mailfrom', 'mailto'], r'.+@fox-it.com')")
+    assert rec not in CompiledSelector("field_regex(r, ['mailfrom', 'mailto'], r'.+@fox-it.com')")
+
+
+def test_selector_uri():
+    TestRecord = RecordDescriptor("test/uri", [
+        ("uri", "uri"),
+    ])
+    rec = TestRecord('http://www.google.com/evil.bin')
+    assert rec in Selector("r.uri.filename in ['evil.bin', 'foo.bar']")
+
+
+def test_selector_typed():
+    TestRecord = RecordDescriptor("test/uri_typed", [
+        ("uri", "urifield1"),
+        ("uri", "urifield2"),
+        ("string", "stringfield"),
+    ])
+    rec = TestRecord('helloworld.exe', 'another.bin', 'Fox-IT')
+    assert rec in Selector("Type.uri.filename == 'helloworld.exe'")
+    assert rec in CompiledSelector("Type.uri.filename == 'helloworld.exe'")
+    assert rec in Selector("Type.uri.filename != 'howdyworld.exe'")
+    assert rec in CompiledSelector("Type.uri.filename != 'howdyworld.exe'")
+    assert rec in Selector("'another' in Type.uri.filename")
+    assert rec in CompiledSelector("'another' in Type.uri.filename")
+    assert rec in Selector("field_contains(r, Type.uri.filename, ['hello'])")
+    assert rec in CompiledSelector("field_contains(r, Type.uri.filename, ['hello'])")
+    assert rec in Selector("field_equals(r, Type.uri.filename, ['another.bin'])")
+    assert rec in CompiledSelector("field_equals(r, Type.uri.filename, ['another.bin'])")
+    assert rec in Selector(r"field_regex(r, Type.uri.filename, r'hello\w{5}.exe')")
+    assert rec in CompiledSelector(r"field_regex(r, Type.uri.filename, r'hello\w{5}.exe')")
+
+    # Test TypeMatcher reuse
+    assert rec in Selector("Type.uri.filename == 'helloworld.exe' or Type.uri.filename == 'another.bin'")
+    assert rec in CompiledSelector("Type.uri.filename == 'helloworld.exe' or Type.uri.filename == 'another.bin'")
+
+    assert rec in Selector("Type.string == 'Fox-IT'")
+    assert rec in CompiledSelector("Type.string == 'Fox-IT'")
+    assert rec in Selector("field_equals(r, Type.string, ['Fox-IT'])")
+    assert rec in CompiledSelector("field_equals(r, Type.string, ['Fox-IT'])")
+    assert rec in Selector("field_contains(r, Type.string, ['Fox'])")
+    assert rec in CompiledSelector("field_contains(r, Type.string, ['Fox'])")
+    assert rec in Selector(r"field_regex(r, Type.string, r'Fox-\w{2}')")
+    assert rec in CompiledSelector(r"field_regex(r, Type.string, r'Fox-\w{2}')")
+
+    assert rec not in Selector("Type.filename == 'lalala'")
+    assert rec not in CompiledSelector("Type.filename == 'lalala'")
+    assert rec not in Selector("Type.uri.filename == 'lalala'")
+    assert rec not in CompiledSelector("Type.uri.filename == 'lalala'")
+    assert rec not in Selector("field_contains(r, Type.uri.filename, ['nope'])")
+    assert rec not in CompiledSelector("field_contains(r, Type.uri.filename, ['nope'])")
+    assert rec not in Selector("field_equals(r, Type.uri.filename, ['nope'])")
+    assert rec not in CompiledSelector("field_equals(r, Type.uri.filename, ['nope'])")
+    assert rec not in Selector("field_regex(r, Type.uri.filename, 'nope')")
+    assert rec not in CompiledSelector("field_regex(r, Type.uri.filename, 'nope')")
+
+    TestNamespaceRecord = RecordDescriptor("test/ip", [
+        ("net.ipv4.Address", "ip"),
+    ])
+    rec = TestNamespaceRecord('192.168.10.1')
+
+    # This will only work in "normal" selectors, because we need to override the behaviour
+    # of the __contains__ operator to unwrap the requested values
+    assert rec in Selector("Type.net.ipv4.Address in net.ipv4.Subnet('192.168.10.1/32')")
+    assert rec in Selector("Type.net.ipv4.Address in net.ipv4.Subnet('192.168.10.0/24')")
+    assert rec in Selector("Type.net.ipv4.Address in net.ipv4.Subnet('192.168.0.0/16')")
+    assert rec in Selector("Type.net.ipv4.Address in net.ipv4.Subnet('192.0.0.0/8')")
+    assert rec in Selector("Type.net.ipv4.Address in net.ipv4.Subnet('192.168.10.1')")
+    assert rec in Selector("Type.net.ipv4.Address not in net.ipv4.Subnet('10.0.0.0/8')")
+
+    with pytest.raises(InvalidOperation):
+        assert rec in Selector("Type.uri.filename.__class__ == 'invalid'")
+
+
+def test_selector_unicode():
+    TestRecord = RecordDescriptor("test/string", [
+        ("string", "name"),
+    ])
+    rec = TestRecord("Jack O'Neill")
+    assert rec not in Selector("field_contains(r, ['name'], [u'Jack O\u2019Neill'])")
+
+    rec = TestRecord(u"jack o\u2019neill")
+    assert rec in Selector("field_contains(r, ['name'], [u'Jack O\u2019Neill'])")
+
+
+def test_record_in_records():
+    RecordA = RecordDescriptor("test/record_a", [
+        ("datetime", "some_dt"),
+        ("string", "field"),
+    ])
+    RecordB = RecordDescriptor("test/record_b", [
+        ("record", "record"),
+        ("datetime", "some_dt"),
+    ])
+    RecordC = RecordDescriptor("test/record_c", [
+        ("record[]", "records"),
+    ])
+    RecordD = RecordDescriptor("test/record_d", [
+        ("string[]", "stringlist"),
+    ])
+
+    test_str = "this is a test"
+    dt = datetime.utcnow()
+    record_a = RecordA(
+        some_dt=dt,
+        field=test_str)
+    record_b = RecordB(
+        record=record_a,
+        some_dt=dt)
+
+    subrecords = []
+    record_d = None
+    for i in range(10):
+        record_d = RecordD(
+            stringlist=["aap", "noot", "mies", "Subrecord {}".format(i)])
+        subrecords.append(record_d)
+
+    subrecords.append(record_a)
+    record_c = RecordC(
+        records=subrecords)
+
+    subrecords.append(None)
+    record_c_with_none_values = RecordC(
+        records=subrecords)
+
+    assert record_b in Selector("r.record.field == '{}'".format(test_str))
+    assert record_b in Selector("Type.string == '{}'".format(test_str))
+    assert record_c in Selector("Type.string == '{}'".format(test_str))
+    assert record_d in Selector("any(s == 'Subrecord 9' for s in r.stringlist)")
+    assert record_c in Selector("any(s == 'Subrecord 9' for e in r.records for s in e.stringlist)")
+    assert record_c_with_none_values in Selector(
+        "any(s == 'Subrecord 9' for e in r.records for s in e.stringlist)")
+    assert record_d not in Selector("any(s == 'Subrecord 9' for s in r.nonexistingfield)")
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_non_existing_field(PSelector):
+    TestRecord = RecordDescriptor("test/record", [
+        ("string", "query"),
+        ("string", "url"),
+    ])
+
+    assert TestRecord("foo", "bar") not in PSelector("r.query and r.non_existing_field")
+    assert TestRecord("foo", "bar") in PSelector("not r.non_existing_field")
+    assert TestRecord("foo", "bar") in PSelector("r.query and r.url and not r.non_existing_field")
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_modulo(PSelector):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "counter"),
+    ])
+
+    records = []
+    for i in range(300):
+        records.append(TestRecord(i))
+
+    selected = [rec for rec in records if rec in PSelector("r.counter % 10 == 0")]
+    assert len(selected) == 30
+
+    for rec in records:
+        sel = PSelector("r.counter % 10 == 0")
+        if rec.counter % 10 == 0:
+            assert rec in sel
+        else:
+            assert rec not in sel
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_bit_and(PSelector):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "counter"),
+    ])
+
+    records = []
+    for i in range(300):
+        records.append(TestRecord(i))
+
+    for rec in records:
+        sel = PSelector("(r.counter & 0x0F) == 1")
+        if rec.counter & 0x0F == 1:
+            assert rec in sel
+        else:
+            assert rec not in sel
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_bit_or(PSelector):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "counter"),
+    ])
+
+    records = []
+    for i in range(300):
+        records.append(TestRecord(i))
+
+    for rec in records:
+        sel = PSelector("(r.counter | 0x10) == 0x11")
+        if rec.counter | 0x10 == 0x11:
+            assert rec in sel
+        else:
+            assert rec not in sel
+
+
+@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
+def test_selector_modulo_non_existing_field(PSelector):
+    TestRecord = RecordDescriptor("test/record", [
+        ("varint", "counter"),
+    ])
+
+    records = []
+    for i in range(300):
+        records.append(TestRecord(i))
+
+    sel = PSelector("r.counter % 10 == 0")
+    for rec in records:
+        if rec.counter % 10 == 0:
+            assert rec in sel
+        else:
+            assert rec not in sel
+
+    # Test with non existing fields
+    # using has_field() ensures that this works with CompiledSelector and Selector
+    sel = PSelector("has_field(r, 'counterz') and r.counterz % 10 == 0")
+    for rec in records:
+        if hasattr(rec, "counterz") and rec.counterz % 10 == 0:
+            assert rec in sel
+        else:
+            assert rec not in sel
+
+    # non existing field but without the precheck (this does not work with CompiledSelector)
+    if isinstance(PSelector, Selector):
+        sel = PSelector("r.counterz % 10 == 0")
+        for rec in records:
+            assert rec not in sel
+
+
+if __name__ == "__main__":
+    __import__("standalone_test").main(globals())
diff --git a/tests/test_splunk_adapter.py b/tests/test_splunk_adapter.py
new file mode 100644
index 0000000..38c910b
--- /dev/null
+++ b/tests/test_splunk_adapter.py
@@ -0,0 +1,112 @@
+from unittest import mock
+
+from flow.record import RecordDescriptor
+import flow.record.adapter.splunk
+from flow.record.adapter.splunk import splunkify
+
+
+def test_splunkify_reserved_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set(["foo"])
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+            [("string", "foo")]
+        )
+
+        test_record = test_record_descriptor(foo="bar")
+
+        output = splunkify(test_record)
+        assert output == 'type="test/record" rdtag=None rd_foo="bar"'
+
+
+def test_splunkify_normal_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set()
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+            [("string", "foo")]
+        )
+
+        test_record = test_record_descriptor(foo="bar")
+
+        output = splunkify(test_record)
+        assert output == 'type="test/record" rdtag=None foo="bar"'
+
+
+def test_splunkify_rdtag_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set()
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+        )
+
+        test_record = test_record_descriptor()
+
+        output = splunkify(test_record, tag="bar")
+        assert output == 'type="test/record" rdtag="bar"'
+
+
+def test_splunkify_none_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set()
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+            [("string", "foo")]
+        )
+
+        test_record = test_record_descriptor()
+
+        output = splunkify(test_record)
+        assert output == 'type="test/record" rdtag=None foo=None'
+
+
+def test_splunkify_byte_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set()
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+            [("bytes", "foo")]
+        )
+
+        test_record = test_record_descriptor(foo=b"bar")
+
+        output = splunkify(test_record)
+        assert output == 'type="test/record" rdtag=None foo="YmFy"'
+
+
+def test_splunkify_backslash_quote_field():
+
+    with mock.patch.object(
+        flow.record.adapter.splunk,
+        "RESERVED_SPLUNK_FIELDS",
+        set()
+    ):
+        test_record_descriptor = RecordDescriptor(
+            "test/record",
+            [("string", "foo")]
+        )
+
+        test_record = test_record_descriptor(foo=b"\\\"")
+
+        output = splunkify(test_record)
+        assert output == 'type="test/record" rdtag=None foo="\\\\\\""'
diff --git a/tests/utils_inspect.py b/tests/utils_inspect.py
new file mode 100644
index 0000000..4427491
--- /dev/null
+++ b/tests/utils_inspect.py
@@ -0,0 +1,58 @@
+"""
+Backport of `inspect.signature` for Python 2.
+
+Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
+"""
+
+import inspect
+import collections
+
+
+class _empty:
+    pass
+
+
+class Parameter:
+    POSITIONAL_ONLY = 0
+    POSITIONAL_OR_KEYWORD = 1
+    VAR_POSITIONAL = 2
+    KEYWORD_ONLY = 3
+    VAR_KEYWORD = 4
+
+    empty = _empty
+
+    def __init__(self, name, kind, default=_empty):
+        self.name = name
+        self.kind = kind
+        self.default = default
+
+
+class Signature:
+    empty = _empty
+
+    def __init__(self, parameters=None):
+        self.parameters = parameters
+
+
+def signature(obj):
+    try:
+        # Python 3
+        return inspect.signature(obj)
+    except AttributeError:
+        # Python 2
+        spec = inspect.getargspec(obj)
+
+        # Create parameter objects which are compatible with python 3 objects
+        parameters = collections.OrderedDict()
+        for i in range(0, len(spec.args)):
+            arg = spec.args[i]
+            default = _empty
+            if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
+                default = spec.defaults[i - len(spec.args)]
+            parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
+        if spec.varargs:
+            parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
+        if spec.keywords:
+            parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
+
+        return Signature(parameters=parameters)
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..7293d76
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,58 @@
+[tox]
+envlist = lint, py3, pypy3
+# This version of tox will autoprovision itself and the requirements defined in
+# requires if they are not available on the host system.
+minversion = 3.8.0
+# This version of virtualenv installs a pip version of at least 19.0.1 in its
+# venvs.
+# Requiring minimally this version of virtualenv to be available prevents the
+# need of having to explicitly specify a pip>=19.0 dependency in every testenv.
+# pip>=19.0 is needed to ensure the sdist build by tox (which is build
+# according to PEP 517 and PEP 518 by tox versions >= 3.4.0) is also installed
+# properly (according to PEP 517 and PEP 518 by pip>=19.0) in the virtualenvs.
+# If the dependency is not available on the host system, and the installed tox
+# version is >= 3.3.0, tox will self bootstrap an environment with the proper
+# versions (including the version of tox itself).
+requires = virtualenv>=16.3.0
+isolated_build = true
+# Putting the dist dir in the project directory instead of in the {toxworkdir},
+# makes the sdist more easily accesible and prevents the need of rebuilding it
+# for the [testenv:build] target.
+distdir = {toxinidir}/dist
+
+[testenv]
+deps =
+    pytest
+    pytest-cov
+    coverage
+commands =
+# Capturing output will fail on pypy, possibly due to this issue: https://github.com/pytest-dev/pytest/issues/5502
+    pytest --basetemp="{envtmpdir}" {posargs:--color=yes --capture=no --cov=flow --cov-report=term-missing -v tests}
+    coverage report
+    coverage xml
+
+[testenv:lint]
+# Force the Python version here, so linting will be done with the correct
+# Python version. There should be no difference between the CPython and pypy
+# implementations, so we pick one.
+basepython = python3
+deps =
+    flake8
+commands =
+    flake8 flow tests setup.py
+
+[testenv:build]
+# Force the Python version here, so building will be done with the correct
+# Python version. As the distributions are pure Python, there should be no
+# difference between the CPython and pypy implementations, so we pick one.
+basepython = python3
+deps =
+commands =
+    pip wheel --no-deps -w ./dist .
+
+[flake8]
+max-line-length = 120
+extend-ignore =
+    # See https://github.com/PyCQA/pycodestyle/issues/373
+    E203,
+statistics = True