-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimageLinkFinder.js
139 lines (106 loc) · 3.22 KB
/
imageLinkFinder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
var request = require('request')
<<<<<<< HEAD
var $ = require('cheerio')
=======
>>>>>>> 85aced643fcc9b317f01039680722ec6cae072bd
var url=require('url');
var sleep = require('sleep');
expect = require('expect.js')
var neo4j = require('neo4j')
<<<<<<< HEAD
var biggestImages = {};
var targetUrl = 'http://www.teamtalk.com/liverpool'
var express = require('express');
var app = express();
getBiggestImage("http://lifehacker.com/5330687/items-you-can-get-great-deals-on-in-a-recession");
=======
var biggestImages = {};
var targetUrl = 'http://www.teamtalk.com/liverpool'
var express = require('express');
var app = express();
app.all('/*', function(req, res, next) {
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "X-Requested-With");
next();
});
getBiggestImage("http://lifehacker.com/5330687/items-you-can-get-great-deals-on-in-a-recession");
>>>>>>> 85aced643fcc9b317f01039680722ec6cae072bd
app.get('/setUrls', function(req, res) {
//getBiggestImage(targetUrl)
res.send({"status" : "success" });
});
app.get('/biggestImages', function(req, res) {
res.send(biggestImages);
});
function getBiggestImage(urlToFindBiggestImage) {
var domain = url.parse(urlToFindBiggestImage).hostname;
var allImages = {}; // We're not using this
var allImagesArray = [];
function gotHTML(err, resp, html) {
allImages = {};
allImagesArray = [];
if (err) {
return console.error(err) ;
}
var imageURLs = []
var parsedHTML = $.load(html)
var b = parsedHTML('img').map(function(i, link) {
var href = $(link).attr('src');
if (href == null) {
// TODO: not sure why this doesn't find images, I can see them in the HTML
console.log("Can't find image: " + urlToFindBiggestImage);
return;
}
if (href.indexOf('/') === 0) {
imageURLs.push(domain + href);
} else {
imageURLs.push(href);
}
});
for (var i = 0; i < imageURLs.length; i++) {
var imgUrl = imageURLs[i];
(function(theUrl, idx) {
request(theUrl, function (err, res, body){
if (res == null) {
console.log("null response from: " + theUrl);
return;
}
var entry = {
"length" : parseInt(res.headers['content-length']),
"url" : theUrl,
};
allImages[theUrl] = entry;
allImagesArray.push(entry);
});
})(imgUrl,i);
}
}
function compare(a,b) {
if (a.length < b.length)
return 1;
if (a.length > b.length)
return -1;
return 0;
}
request(urlToFindBiggestImage, gotHTML)
setTimeout(function() {
if (allImagesArray < 1) {
return;
}
var allImagesArraySorted = allImagesArray.sort(compare);
console.log(JSON.stringify(allImagesArraySorted));
biggestImages[urlToFindBiggestImage] = allImagesArraySorted[0].url;
}, 5000);
}
var db = new neo4j.GraphDatabase('http://netgear.rohidekar.com:7474');
results = db.query ('start n=node(28974) match n-->c where has(c.title) return c.url as url,c.title as title ', function(err, result) {
if(err) throw err;
for (var j = 0; j < result.length; j++) {
if (result[j] == null) {
continue;
}
getBiggestImage(result[j].url); // delivers an array of query results
}
});
app.listen(3000);
console.log('Listening on port http://localhost:4452');