-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProgram.cs
155 lines (137 loc) · 5.81 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
namespace GoogleSearchImageDownloader
{
class Program
{
private static HttpClient myHTTPClient; // never use USING with httpclient, this should be application life scope
private const string baseGoogleImageSearchURL = "https://www.google.com/search?source=lnms&tbm=isch&sa=X&q=";
private static string pathToSaveFiles;
private static string imagePrefix;
private static string imageSearchTerm;
static void Main(string[] args)
{
if(args.Length < 3)
{
Console.WriteLine("Please provide three arguments, the first is the path to save images to, the second is the google search term and the last is the image prefix.");
return;
}
Console.WriteLine("");
Console.WriteLine("");
try
{
pathToSaveFiles = args[0];
imageSearchTerm = args[1];
imagePrefix = args[2];
HttpClientHandler httpHandler = new HttpClientHandler();
// code to disable ssl validation (needed only for dev/staging)
httpHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpHandler.ServerCertificateCustomValidationCallback = (APIHTTPClient, cert, cetChain, policyErrors) =>
{
return true;
};
myHTTPClient = new HttpClient(httpHandler);
// headers
myHTTPClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (X11; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0");
Console.WriteLine("Grabbing Google search results and parsing out URLs...");
List<string> urls = GetUrls(GetHtmlCode(imageSearchTerm));
Console.WriteLine("Spinning up background threads to download images...");
int t = 0;
Task[] tasks = new Task[urls.Count];
foreach (string tmpString in urls)
{
ImageObject tmpImgObj = new ImageObject { ImageNumber = t, ImageURL = tmpString };
tasks[t] = Task.Factory.StartNew(() =>
{
DownloadImage(tmpImgObj);
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default);
t++;
}
Task.WaitAll(tasks);
}
catch(Exception e)
{
Console.WriteLine(e);
}
}
private static void DownloadImage(ImageObject tmpImgObj)
{
try
{
byte[] image = GetImage(tmpImgObj.ImageURL);
if (image != null)
{
File.WriteAllBytes(pathToSaveFiles + $"\\{imagePrefix}_glImage_{tmpImgObj.ImageNumber}.jpg", image);
}
Console.WriteLine($"Downloaded image {tmpImgObj.ImageNumber}...");
}catch(Exception e)
{
Console.WriteLine($"An error occured downloading or saving the image {tmpImgObj.ImageNumber}");
}
}
private static string GetHtmlCode(string searchString)
{
try
{
myHTTPClient.DefaultRequestHeaders.Accept.Clear();
myHTTPClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("text/html"));
myHTTPClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/xhtml+xml"));
myHTTPClient.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("*/*"));
var googleImageQueryHttpResponse = myHTTPClient.GetAsync(new Uri(baseGoogleImageSearchURL + searchString)).Result;
return googleImageQueryHttpResponse.Content.ReadAsStringAsync().Result;
}
catch(Exception e)
{
Console.WriteLine(e);
}
return "";
}
private static List<string> GetUrls(string html)
{
try
{
var urls = new List<string>();
int ndx = html.IndexOf("\"ou\"", StringComparison.Ordinal);
while (ndx >= 0)
{
ndx = html.IndexOf("\"", ndx + 4, StringComparison.Ordinal);
ndx++;
int ndx2 = html.IndexOf("\"", ndx, StringComparison.Ordinal);
string url = html.Substring(ndx, ndx2 - ndx);
urls.Add(url);
ndx = html.IndexOf("\"ou\"", ndx2, StringComparison.Ordinal);
}
return urls;
}
catch(Exception e)
{
Console.WriteLine("An error occured attempting to parse urls from html...");
}
return null;
}
private static byte[] GetImage(string url)
{
try
{
var googleImageQueryHttpResponse = myHTTPClient.GetAsync(new Uri(url)).Result;
if (googleImageQueryHttpResponse.IsSuccessStatusCode)
return googleImageQueryHttpResponse.Content.ReadAsByteArrayAsync().Result;
else
return null;
} catch(Exception e)
{
Console.WriteLine("An error occured attempting to download the image...");
}
return null;
}
}
public class ImageObject
{
public string ImageURL { get; set; }
public int ImageNumber { get; set; }
}
}