-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori.R
125 lines (115 loc) · 3.91 KB
/
apriori.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
library(arules)
library(data.table)
library(stringr)
library(gtools)
# Test #1 data
data("Groceries")
itemset=Groceries
itemset=itemset[1:20]
#Test#2 data
A=c("M","O","N","K","E","Y")
B=c("D","O","N","K","E","Y")
C=c("M","A","K","E")
D=c("M","U","C","K","Y")
E=c("C","O","O","K","I","E")
it = list(A,B,C,D,E)
itemset=it
##############################################################################################################
ap=apriori(itemset, parameter=list(support=0.09,target="frequent itemsets"))
arules::inspect(ap)
fi=apr(itemset,0.09,0.8)
fi
apr<-function(itemset,s=0.6,c=0.8){
# --------------------------------------------------------------
# Apriori algorithm impementation for Frequent sets and Association rule
# --------------------------------------------------------------
FI.temp=NULL
item.list=as(itemset, "list") # transform the input to a list
print(s)
min.supp=s*length(item.list)
min.conf=c
item.list=lapply(1:length(item.list),function(x) unique(item.list[[x]]))
tran.item.n=data.frame(l=lengths(item.list)) # get number of items in each transaction
tran.item.n$RN=as.numeric(rownames(tran.item.n))
temp=tran.item.n[order(tran.item.n$l),] #sort the transaction id aka index acc. to the number of items
max.items=max(tran.item.n$l) #get the maximum number of items present in any single transaction in the candidate itemset
#Get unique items
a=unlist(item.list)
item=unique(a)
n=length(item) #number of unique items
c=1
#sort the items in transactions
item.list=lapply(1:length(item.list),function(x) unique(sort(item.list[[x]])))
item.list.ord=lapply(temp$RN,function(x) item.list[[x]])
z=unlist(lapply(item, function(x) sum(str_count(a, x))))
##### Initialization
L1=data.frame(itemset=item,freq=z) #get the frequency from the k=1 candidate itemset
Lk=L1[L1$freq>=min.supp,] #get the frequent itemsets by filtering the frq by min-support.
row.names(Lk)=NULL
FI.temp[[c]]=Lk
for (k in 2:max.items){
ix=which(lengths(item.list.ord)>=k) #get the itemsets with k or more items
temp.list=lapply(ix,function(x) item.list.ord[[x]])
#SELFJOIN candidate list
titems=unique(unlist(Lk[,-ncol(Lk)]))
n=length(titems)
sj=as.data.frame(combinations(n,k,titems))
#get frequency of each itemset
z=NULL
for (i in 1:nrow(sj)){
b=unlist(lapply(temp.list,function(x) sum(x%in%unlist(sj[i,]))==k)) #check in the transaction for the candidate itemsets
z=append(z,sum(b))
}
Ltemp=sj
Ltemp$freq=z
#Check min Support criteria
Lk=Ltemp[Ltemp$freq>=min.supp,]
row.names(Lk)=NULL
if (nrow(Lk)>0){
c=c+1
print(c)
print(Lk)
FI.temp[[c]]=Lk
if (nrow(Lk)==1)
break
}
else
break
}
#Get rules
if (length(FI.temp) >1){
rules.temp=NULL;arule=data.frame(NA,NA,NA);r=0;colnames(arule)=c('ant','desc','conf')
for (i in 2:length(FI.temp)){
s=FI.temp[[i-1]]
a=FI.temp[[i]]
for (j in 1:nrow(a)){
if (i > 1)
rules.temp=permutations(ncol(a)-1,ncol(a)-1,unlist(a[j,-ncol(a)]))
else
rules.temp=a[,-ncol(a)]
for (k in 1:nrow(rules.temp)){
ant=c(rules.temp[k,1:ncol(rules.temp)-1])
des=c(rules.temp[k,ncol(rules.temp)])
t=ncol(s)-1
for (l in 1:nrow(s)){
if (sum(ant%in%unlist(s[l,1:t]))==t){
supp=s[l,ncol(s)];
break
}
}
conf=a[j,"freq"]/supp;
if (conf >= min.conf){
r=r+1
arule[r,1]=toString(sort(c(ant)))
arule$desc[r]=des
arule$conf[r]=conf
}
}
}
}
arule =unique(arule)
print("Association Rules")
print(arule)
}
return(FI.temp)
}