-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsample.py
executable file
·167 lines (141 loc) · 5.18 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#! /usr/bin/env python
#
# sample.py
#
# Copyright (c) 2017 Junpei Kawamoto
#
# This file is part of rgmining-tripadvisor-dataset.
#
# rgmining-tripadvisor-dataset is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-tripadvisor-dataset is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
"""Evaluate a review graph mining algorithm with the Trip Advisor dataset.
"""
# pylint: disable=invalid-name
from __future__ import absolute_import, division
import logging
from logging import getLogger
import sys
import dsargparse
import tripadvisor
LOGGER = getLogger(__name__)
#--------------------------
# Loading algorithms
#--------------------------
ALGORITHMS = {}
"""Dictionary of graph loading functions associated with installed algorithms.
"""
# Load and register RIA.
try:
import ria
except ImportError:
LOGGER.info("rgmining-ria is not installed.")
else:
def ignore_args(func):
"""Returns a wrapped function which ignore given arguments."""
def _(*_args):
"""The function body."""
return func()
return _
ALGORITHMS["ria"] = ria.ria_graph
ALGORITHMS["one"] = ignore_args(ria.one_graph)
ALGORITHMS["onesum"] = ignore_args(ria.one_sum_graph)
ALGORITHMS["mra"] = ignore_args(ria.mra_graph)
# Load and register RSD.
try:
import rsd # pylint: disable=wrong-import-position
except ImportError:
LOGGER.info("rgmining-rsd is not installed.")
else:
ALGORITHMS["rsd"] = rsd.ReviewGraph
# Load and register Fraud Eagle.
try:
import fraud_eagle # pylint: disable=wrong-import-position
except ImportError:
LOGGER.info("rgmining-fraud-eagle is not installed.")
else:
ALGORITHMS["feagle"] = fraud_eagle.ReviewGraph
# Load and register FRAUDAR.
try:
import fraudar # pylint: disable=wrong-import-position
except ImportError:
LOGGER.info("rgmining-fraudar is not installed.")
else:
def create_fraudar_graph(nblock=1):
"""Create a review graph defined in Fraud Eagle package.
"""
return fraudar.ReviewGraph(int(nblock))
ALGORITHMS["fraudar"] = create_fraudar_graph
#--------------------------
def run(method, loop, threshold, output, param):
"""Run a given algorithm with the Trip Advisor dataset.
Runs a given algorithm and outputs anomalous scores and summaries after
each iteration finishes. The function will ends if a given number of loops
ends or the update of one iteration becomes smaller than a given threshold.
Some algorithm requires a set of parameters. For example, feagle requires
parameter `epsilon`. Argument `param` specifies those parameters, and
if you want to set 0.1 to the `epsilon`, pass `epsilon=0.1` via the
argument.
Args:
method: name of algorithm.
loop: the number of iteration (default: 20).
threshold: threshold to judge an update is negligible (default: 10^-3).
output: writable object where the output will be written.
param: list of key and value pair which are connected with "=".
"""
kwargs = {key: float(value)
for key, value in [v.split("=") for v in param]}
g = ALGORITHMS[method](**kwargs)
tripadvisor.load(g)
tripadvisor.print_state(g, 0, output)
# Updates
logging.info("Start iterations.")
for i in xrange(loop if not method.startswith("one") else 1):
diff = g.update()
if diff is not None and diff < threshold:
break
# Current summary
logging.info("Iteration %d ends. (diff=%s)", i + 1, diff)
tripadvisor.print_state(g, i + 1, output)
# Print final state.
tripadvisor.print_state(g, "final", output)
def main():
"""The main function.
"""
if not ALGORITHMS:
logging.error("No algorithms are installed.")
sys.exit(1)
parser = dsargparse.ArgumentParser(main=main)
parser.add_argument("method", choices=sorted(ALGORITHMS.keys()))
parser.add_argument(
"--output", default=sys.stdout,
type=dsargparse.FileType("w"), # pylint: disable=no-member
help="file path to store results (Default: stdout).")
parser.add_argument("--loop", type=int, default=20)
parser.add_argument("--threshold", type=float, default=10^-3)
parser.add_argument(
"--param", action="append", default=[],
help=(
"key and value pair which are connected with '='.\n"
"This option can be set multiply."))
run(**vars(parser.parse_args()))
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, stream=sys.stderr)
try:
main()
except KeyboardInterrupt:
pass
except Exception: # pylint: disable=broad-except
logging.exception("Untracked exception occurred.")
finally:
logging.shutdown()