This repository has been archived by the owner on Jan 11, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathui.R
97 lines (85 loc) · 3.57 KB
/
ui.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
library(shinydashboard)
# Define UI for application that draws a histogram
dash_header <- dashboardHeader(
title = "Exploravec(tor)"
)
corpus_tf_idf_tab <- tabItem(
tabName = "corpus_tf_idf",
h2("TF-IDF of the corpus documents"),
p("This view shows all the documents in the corpus, and shows the top 10 tokens for each document according to TF-IDF (terms that appear relatively frequently in that individual document, but rarely across the larger corpus, suggesting that they are terms distinctive to that document.)"),
box(dataTableOutput("document_metadata"),
title = "Document TF-IDF",
width = 12)
)
termsovertime_tab <- tabItem(
tabName = "termsovertime",
h2("Terms over time"),
p("Add tokens to the box below to chart what percentage of documents in the corpus contain them at different points in time. Currently, the documents are clustered into halfyear buckets."),
list(
box(
selectizeInput("wordchart_tokens", choices = NULL, selected = NULL, multiple = TRUE, label = "Pick tokens to plot over time"),
p("Begin typing to generate token suggestions. Click on a token and press \"Delete\" to remove it from the list.")
),
box(
plotOutput("termsovertime_chart"),
width = 12,
height = 650
),
box(
dataTableOutput("termsovertime_metadata"),
title = "Documents with these tokens",
width = 12
)
)
)
yearly_tfidf_tab <- tabItem(
tabName = "yearly_tfidf",
h2("TF-IDF by year"),
p("This experiment runs TF-IDF again, but tries it on entire years, to find what words are distinctive to particular years.",
strong("I am very dubious about this as an approach, but it was a cheap thing to try before diving in to topic modeling...")),
list(
box(
dataTableOutput("yearly_tf_idf_table")
)
)
)
topic_model_tab <- tabItem(
tabName = "topic_models",
h2("Topic Models"),
p("LDA model that attempts to model latent clusters of terms ('topics') that tend to co-appear in documents. Documents may belong to multiple topics."),
p("N.b. I need to pre-compute these models, presently I only have 5- and 10-topic models for the 'JSTOR Artificial Intelligence' corpus."),
selectInput("n_topics", label = "Number of topics", choices = c(5, 10, 15, 20), selected = 5, multiple = FALSE),
uiOutput("tm_html")
)
corpus_selector <- selectInput("corpus_menu", choices = NULL, selected = 1, multiple = FALSE, label = "Corpus")
corpus_inclusive <- selectizeInput("corpus_include", choices = NULL, selected = "", multiple = TRUE, label = "Must include terms")
corpus_exclusive <- selectizeInput("corpus_exclude", choices = NULL, selected = "", multiple = TRUE, label = "Must not include terms")
corpus_data <- div(
p("Number of docs: ", textOutput("corpus_size", inline = TRUE))
)
dash_sidebar <- dashboardSidebar(
corpus_selector,
corpus_inclusive,
corpus_exclusive,
corpus_data,
sidebarMenu(
menuItem("Historical Term Frequency", tabName = "termsovertime", icon = icon("chart-line")),
menuItem("TF-IDF", tabName = "corpus_tf_idf", icon = icon("sort-amount-down")),
menuItem("Annual TF-IDF", tabName = "yearly_tfidf", icon = icon("calendar-check")),
menuItem("Topic Models", tabName = "topic_models", icon = icon("object-group"))
)
)
dash_body <- dashboardBody(
tabItems(
termsovertime_tab,
corpus_tf_idf_tab,
yearly_tfidf_tab,
topic_model_tab
)
)
dashboardPage(
dash_header,
dash_sidebar,
dash_body,
title = "AI and Ethics Corpus Explorer"
)