-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbanditProcess.js
85 lines (62 loc) · 3.21 KB
/
banditProcess.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
'use-strict';
import PuppeteerUtil from "./utils/puppeteerUtil";
import CrawlerUtil from './utils/crawlerUtil';
import HtmlUtil from './utils/htmlUtil';
import { GaussianNB } from 'ml-naivebayes';
import EpsilonGreedy from './epsilonGreedy';
import FeaturesConst from './consts/featuares';
import TextUtil from "./utils/textUtil";
import logger from './core/logger/app-logger'
export default class BanditProcess {
static async initilize(node, puppeteer = null, queue, criterion, evaluation, elementsAccessed = [], itens = null, epsilonGreedyAlg, actuallyIndex = 0, contNodeNumber = 1, withOutSearchKeyWord = false ) {
if (puppeteer == null) {
puppeteer = await PuppeteerUtil.createPuppetterInstance();
}
let page = puppeteer.getFirstPage();
const currentPage = page;
const lengthQueueBefore = queue.length;
node.initializeFeatures();
try {
const nodeCrawledResult = await CrawlerUtil.crawlerNode(criterion, evaluation, node, page, puppeteer, elementsAccessed, itens, queue, withOutSearchKeyWord);
queue = nodeCrawledResult.queue;
node = nodeCrawledResult.node;
elementsAccessed = nodeCrawledResult.elementsAccessed;
itens = nodeCrawledResult.itens;
} catch (e) {
logger.warn("Click error: ", e);
}
for (let edge of queue) {
console.log("queue nodes: ****:", edge.getSource().value, ' level: ', edge.getLevel());
}
page = currentPage;
if (queue.length > 0 && CrawlerUtil.checkItensComplete(itens) === false) {
epsilonGreedyAlg.updateNumArms(queue.length);
node.updateRewardNodes();
if (node.getLevel() > 1 && lengthQueueBefore < queue.length) {
for (let i = 0; i < queue.length; i++) {
const maxReward = queue[i].getMaxReward();
if (maxReward > 0)
epsilonGreedyAlg.update(i, maxReward)
}
}
const index = epsilonGreedyAlg.chooseArm();
console.log("values ======================== ", epsilonGreedyAlg.values)
console.log("index ======================== ", index)
const newNode = queue[index]
queue.splice(index, 1);
epsilonGreedyAlg.values.splice(index, 1);
epsilonGreedyAlg.counts.splice(index, 1);
if (newNode.getLevel() > 0 && !HtmlUtil.isUrl(newNode.getSource().getValue())) {
await page.waitForNavigation().catch(e => void e);
await PuppeteerUtil.accessParent(page, newNode.getSourcesParents());
}
return BanditProcess.initilize(newNode, puppeteer, queue, criterion, evaluation, elementsAccessed, itens, epsilonGreedyAlg, actuallyIndex, ++contNodeNumber, withOutSearchKeyWord);
}
if (itens === null)
itens = await CrawlerUtil.identificationItens(criterion.name, page, itens, currentPage, evaluation, node);
logger.info("Close Puppeteer ...");
await puppeteer.getBrowser().close()
logger.info("Returning Criterion: " + criterion.name);
return { "itens": itens, "contNodeNumber": contNodeNumber };
};
}