From 840bb32ccce6ba90fea7804b9faa979e81086782 Mon Sep 17 00:00:00 2001 From: timb Date: Mon, 11 Mar 2024 22:25:17 +0000 Subject: [PATCH] Updated vignette --- R/classify.R | 2 +- src/cpp_functions.cpp | 2 +- vignettes/clubpro.Rmd | 56 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/R/classify.R b/R/classify.R index 9c879e5..106fa2a 100644 --- a/R/classify.R +++ b/R/classify.R @@ -26,7 +26,7 @@ classify <- function(obs, target, imprecision, normalise_cols, display_progress) binary_matrix <- dichotemise_matrix(conformed_mat) matches <- 0 - for (i in seq_len(nrow(binary_matrix))) { + for (i in seq_len(dim(binary_matrix)[1])) { predicted_classification[i] <- paste0(levels(target)[which(binary_matrix[i, ] == 1)], collapse = "|") if (sum(binary_matrix[i, ]) == 1) { diff --git a/src/cpp_functions.cpp b/src/cpp_functions.cpp index 2bba58d..1b4ab5a 100644 --- a/src/cpp_functions.cpp +++ b/src/cpp_functions.cpp @@ -38,7 +38,7 @@ arma::mat to_indicator_matrix(arma::vec v) { arma::mat normalise_columns(arma::mat A) { for (size_t j {0}; j < A.n_cols; j++) { auto colfactor = std::sqrt(arma::accu(arma::square(A.col(j)))); - for (size_t i {}; i < A.n_rows; i++) { + for (size_t i {0}; i < A.n_rows; i++) { A(i, j) = colfactor == 0.0 ? 0.0 : A(i, j) / colfactor; } } diff --git a/vignettes/clubpro.Rmd b/vignettes/clubpro.Rmd index c5bfed2..a8bc409 100644 --- a/vignettes/clubpro.Rmd +++ b/vignettes/clubpro.Rmd @@ -14,55 +14,97 @@ knitr::opts_chunk$set( ) ``` +## Background + +`clubpro` is an implementation of a subset of the methods described in Grice (2011) for classification using binary procrustes rotation. Binary procrustes rotation can be used to quantify how well observed data can be classified into known categories. A high degree of classification accuracy indicates that the ordering of the observed data is well explained by the categories used. + +## Set up + +Once installed, load `clubpro`: + ```{r setup} library(clubpro) ``` +The plots provided by `clubpro` use the current palette. You can check which colours are used by default by calling the `palette()` function. Or, define your own palette by passing a vector of colours to `palette()`. + ```{r set_palette} palette(c("#0073C2", "#EFC000", "#868686")) ``` +## Classifying jellyfish location by size + +Hand et. at. (1994) provide data on `width` and `length` in mm of jellyfish caught at two `location`s in New South Wales, Australia. + +To quantify how well jellyfish `width` can be classified according to location, binary procrustes rotation can be performed with `clubpro` by passing a formula and a `data.frame` to the `club()` function. + ```{r model_jellyfish} mod <- club(width ~ location, data = jellyfish) ``` +The two most important statistics returned by the `club()` function are the percentage of correct classifications (PCC), and the chance-value. The PCC is the percenatge of observations in the data which are classified into the correct category. + +```{r pcc_jellyfish} +pcc(mod) +``` + +The chance-value is computed using a randomisation test to determine how frequently a PCC at least as high as that computed for the observed ordering of data is found from random reorderings of the data. + +```{r cval_jellyfish} +cval(mod) +``` + +More detailed results can be returned using the `summary()` function. + ```{r summary_jelyfish} summary(mod) ``` +The classification of the observed data by the binary procrustes rotation algorithm can be visualised by plotting the model object using the `plot()` function. + ```{r plot_jellyfish} plot(mod) ``` +For each observation, a classification strength index (CSI) between 0 and 1 is returned. A value of 1 indicates that an observed value was matched perfectly by the rotation, whereas lower CSI values indicate that observations were matched less well. The CSI values can be accessed using the `csi()` function, or visualised by plotting the object returned by a call to the `csi()` function. + ```{r plot_csi, fig.width=6, fig.height=8} mod_csi <- csi(mod) plot(mod_csi) ``` +The predicted categories determined by the model can be tabulated using the `predict()` function. In this case, of the 22 jellyfish caught at `Dangar Island`, 17 were classified as having come from `Dangar Island` and 5 were classified as having come from `Salamander Bay`. Of the 24 jellyfish caught at `Salamander Bay`, 2 were classified as having come from `Dangar Island` and 22 were correctly classified as having come from `Salamander Bay`. + ```{r predict_jellyfish} predict(mod) ``` +These predictions can be visualised as a mosaic plot by plotting the obect returned by the `predict()` function. + ```{r plot_predictions} plot(predict(mod)) ``` +Similarly, the accuracy of classifications can be tabulated as a function of category using the `accuracy()` function. + ```{r accuracy_jellyfish} accuracy(mod) ``` +As with predicted categories, prediction accuracy can also be plotted in the form of a mosiac plot using `plot(accuracy())`. + ```{r plot_accuracy} plot(accuracy(mod)) ``` -```{r pcc_jellyfish} -pcc(mod) -``` - -```{r cval_jellyfish} -cval(mod) -``` +The calculation of the chance-value as the frequency of occurance a PCC from randomly reordered data at least as high as the PCC of the observed data ordering can be visualised by plotting the output of the `pcc_replicates()` function. Calling the `plot()` function on the output of `pcc_replicates()` produces a histogram of the PCCs resulting from all random orderings of the data, along with the observed PCC as a dashed vertical line. ```{r plot_cval_dist} plot(pcc_replicates(mod)) ``` + +## References + +Grice, J. W. (2011). Observation oriented modeling: Analysis of cause in the behavioral sciences. Academic Press. + +Hand, D. J., Daly, F., Lunn, A. D., McConway, K. J. and Ostrowski, E. (1994). A Handbook of Small Data Sets. Chapman & Hall. \ No newline at end of file