From 8cc7130c3e30e19747968e8761af9f9aa7eab82a Mon Sep 17 00:00:00 2001
From: colinleach <colin.leach@comcast.net>
Date: Sat, 8 Jul 2023 10:59:58 -0700
Subject: [PATCH] vector-filtering concept

---
 concepts/vector-filtering/.meta/config.json |  5 ++
 concepts/vector-filtering/about.md          | 94 +++++++++++++++++++++
 concepts/vector-filtering/introduction.md   | 60 +++++++++++++
 concepts/vector-filtering/links.json        |  6 ++
 config.json                                 |  5 ++
 5 files changed, 170 insertions(+)
 create mode 100644 concepts/vector-filtering/.meta/config.json
 create mode 100644 concepts/vector-filtering/about.md
 create mode 100644 concepts/vector-filtering/introduction.md
 create mode 100644 concepts/vector-filtering/links.json

diff --git a/concepts/vector-filtering/.meta/config.json b/concepts/vector-filtering/.meta/config.json
new file mode 100644
index 00000000..3553bd49
--- /dev/null
+++ b/concepts/vector-filtering/.meta/config.json
@@ -0,0 +1,5 @@
+{
+  "authors": ["colinleach"],
+  "contributors": [],
+  "blurb": "R has powerful and flexible ways to filter out the desired elements from a vector."
+}
\ No newline at end of file
diff --git a/concepts/vector-filtering/about.md b/concepts/vector-filtering/about.md
new file mode 100644
index 00000000..b8659667
--- /dev/null
+++ b/concepts/vector-filtering/about.md
@@ -0,0 +1,94 @@
+# About
+
+We saw in the `vectors` concept that a vector can be used in a conditional expression, giving a vector of booleans. This in turn can be used in functions such as `all()` and `any()`.
+
+```R
+> v <- c(4, 7, 10)
+> v >= 6
+[1] FALSE  TRUE  TRUE
+> all(v > 6)
+[1] FALSE    # not all elements match this condition
+> any(v > 6)
+[1] TRUE     # at least one element matches
+```
+
+The technique is much more powerful than this.
+
+## Array subsets
+
+Selected elements of an array can be pulled out with an index number or a vector of indices:
+
+```R
+> v <- 5:10
+> v
+[1]  5  6  7  8  9 10
+> v[3] 
+[1] 7
+> v[c(2, 4)]
+[1] 6 8
+```
+
+Alternatively, use a vector of booleans to filter the original vector, returning a subset of entries matched to a `TRUE` value:
+
+```R
+> v <- 1:3
+> bools <- c(FALSE, TRUE, TRUE)
+> v[bools]
+[1] 2 3
+```
+
+It is a small step from there to generating the boolean vector with a conditional expression:
+
+```R
+> v[v >= 2]
+[1] 2 3
+```
+
+These conditional expressions can be arbitrarily complex, involving multiple vectors or functions:
+
+```R
+> v <- 1:10
+> w <- 10:1
+> v[v > w]
+[1]  6  7  8  9 10
+```
+
+## Recycling
+
+For simplicity, the examples above all compared vectors of the same length.
+What happens if the lengths are mismatched?
+
+The answer is something powerful if used deliberately but confusing if unexexpected.
+
+An example:
+
+```R
+> v <- 1:6
+> cond <- c(TRUE, FALSE)
+> v[cond]
+[1] 1 3 5
+```
+
+What happened there?
+We indexed a length-6 vector with a length-2 boolean, but got a length-3 vector as a result.
+
+If a vector is somehow "too short" for the expression it is used in, R will repeat it until the desired length is reached: called "recycling".
+This turned our `c(TRUE, FALSE)` vector into `c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)` and `v[cond]` then returned the `TRUE` values of `v`, in this case, the odd numbers.
+
+So what is happening in the next example?
+
+```R
+> v <- 1:3
+> 0.5 * v
+[1] 0.5 1.0 1.5
+> 2 + v
+[1] 3 4 5
+```
+
+It looks at first like scalar multiplication and (a slightly odd) scalar addition, but there are no scalars in R.
+
+Instead, the numbers are recyled to get a vector matching the length of `v`, then the arithmetic operation is applied element-wise using `c(0.5, 0.5, 0.5)` or `c(2, 2, 2)`.
+
+Very, very useful, but make sure this is what you intend. 
+A slight mismatch in vector lengths (e.g. 99 vs 100) can cause confusing bugs.
+Some authors recommend that recycling should only be used when one of the vectors has length 1.
diff --git a/concepts/vector-filtering/introduction.md b/concepts/vector-filtering/introduction.md
new file mode 100644
index 00000000..9e785423
--- /dev/null
+++ b/concepts/vector-filtering/introduction.md
@@ -0,0 +1,60 @@
+# Introduction
+
+We saw in the `vectors` concept that a vector can be used in a conditional expression, giving a vector of booleans. This in turn can be used in functions such as `all()` and `any()`.
+
+```R
+> v <- c(4, 7, 10)
+> v >= 6
+[1] FALSE  TRUE  TRUE
+> all(v > 6)
+[1] FALSE    # not all elements match this condition
+> any(v > 6)
+[1] TRUE     # at least one element matches
+```
+
+The technique is much more powerful than this.
+
+## Array subsets
+
+Selected elements of an array can be pulled out with an index number or a vector of indices:
+
+```R
+> v <- 5:10
+> v
+[1]  5  6  7  8  9 10
+> v[3] 
+[1] 7
+> v[c(2, 4)]
+[1] 6 8
+```
+
+Alternatively, use a vector of booleans to filter the original vector, returning a subset of entries matched to a `TRUE` value:
+
+```R
+> v <- 1:3
+> bools <- c(FALSE, TRUE, TRUE)
+> v[bools]
+[1] 2 3
+```
+
+It is a small step from there to generating the boolean vector with a conditional expression:
+
+```R
+> v[v >= 2]
+[1] 2 3
+```
+
+## Finding indices
+
+The `which()` function takes a boolean argument and returns a vector of indices that yield `TRUE`.
+
+```R
+> v
+[1] 2 7 9
+
+> v[v > 5]  # returns values
+[1]  7  9
+
+> which(v > 5) # returns indices
+[1] 2 3
+```
diff --git a/concepts/vector-filtering/links.json b/concepts/vector-filtering/links.json
new file mode 100644
index 00000000..9d89c11b
--- /dev/null
+++ b/concepts/vector-filtering/links.json
@@ -0,0 +1,6 @@
+[
+  {
+    "url": "https://intro2r.com/vectors.html#extracting-elements",
+    "description": "Introduction to R: Extracting Elements"
+  }
+]
diff --git a/config.json b/config.json
index 312e341d..9f6b70e2 100644
--- a/config.json
+++ b/config.json
@@ -540,6 +540,11 @@
       "uuid": "d75c1a77-9733-45b0-ae21-2b4f0f313ef4",
       "slug": "basics",
       "name": "Basics"
+    },
+    {
+      "uuid": "18919f40-a3fe-45f3-b181-84ec82bfa785",
+      "slug": "vector-filtering",
+      "name": "Vector Filtering"
     }
   ],
   "key_features": [