-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.go
337 lines (287 loc) · 10.2 KB
/
app.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
package main
import (
"context"
"flag"
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/chromedp/chromedp"
"github.com/schollz/progressbar/v3"
)
// CreateFolder creates the directory to save images
func createFolder(folder string) error {
if _, err := os.Stat(folder); os.IsNotExist(err) {
err := os.Mkdir(folder, os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create folder: %v", err)
}
}
return nil
}
// DownloadImage downloads the image from the given URL to the specified folder with a sequential name
func downloadImage(url, folder, query string, counter int, extension string) error {
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("failed to download image: %v", err)
}
defer resp.Body.Close()
// Create a file with sequential name
fileName := filepath.Join(folder, fmt.Sprintf("%s%d%s", query, counter, extension))
out, err := os.Create(fileName)
if err != nil {
return fmt.Errorf("failed to create file: %v", err)
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
return fmt.Errorf("failed to save image: %v", err)
}
return nil
}
// SearchYandexImages searches for images on Yandex using chromedp and returns the image URLs
func searchYandexImages(ctx context.Context, query string) ([]string, error) {
var links []string
searchURL := fmt.Sprintf("https://yandex.com/images/search?text=%s", strings.Replace(query, " ", "+", -1))
// Run tasks to load the Yandex image search page and extract image URLs from <a> tags
err := chromedp.Run(ctx,
// Navigate to Yandex image search
chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second),
)
logError(err)
for i := 0; i < 5; i++ {
err = chromedp.Run(ctx,
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
chromedp.Sleep(500*time.Millisecond),
)
logError(err)
}
err = chromedp.Run(ctx,
chromedp.Evaluate(`Array.from(document.querySelectorAll('a.Link.ContentImage-Cover')).map(a => a.href)`, &links),
)
if err != nil {
logError(err)
return nil, fmt.Errorf("failed to fetch Yandex image links: %v", err)
}
// Parse img_url parameter from the href attribute to get the actual image URLs
imageURLs := parseYandexImageURLs(links)
return imageURLs, nil
}
func logError(err error) {
if err != nil {
log.Print(err)
}
}
// Parse img_url parameter from the Yandex href to extract the actual image URLs
func parseYandexImageURLs(links []string) []string {
var imageURLs []string
for _, link := range links {
// Parse the href to extract the img_url query parameter
u, err := url.Parse(link)
if err != nil {
continue
}
// Extract img_url parameter from the href
imgURL := u.Query().Get("img_url")
if imgURL != "" {
imageURLs = append(imageURLs, imgURL)
}
}
return imageURLs
}
// SearchGoogleImages searches for images on Google using chromedp and returns the image URLs
func searchGoogleImages(ctx context.Context, query string) ([]string, error) {
var imageURLs []string
searchURL := fmt.Sprintf("https://www.google.com/search?q=%s&tbm=isch&udm=2", strings.Replace(query, " ", "+", -1))
// Run tasks to load the Google image search page, scroll, and extract full-size image URLs
err := chromedp.Run(ctx,
// Navigate to Google image search
chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second), // Wait for the page to load
// Scroll down to load more images (simulate user interaction)
chromedp.ActionFunc(func(ctx context.Context) error {
for i := 0; i < 10; i++ { // Scroll multiple times to load more images
err := chromedp.Run(ctx, chromedp.Evaluate(`window.scrollBy(0, document.body.scrollHeight);`, nil))
if err != nil {
return err
}
time.Sleep(500 * time.Millisecond) // Wait for images to load after each scroll
}
return nil
}),
// Wait for additional images to load
chromedp.Sleep(2*time.Second),
// Extract full-size image URLs from the page (use 'src' from 'img' elements)
chromedp.Evaluate(`Array.from(document.querySelectorAll('img')).map(img => img.src)`, &imageURLs),
)
if err != nil {
return nil, fmt.Errorf("failed to fetch Google images: %v", err)
}
// Filter out irrelevant images (Google logos, base64 images, favicon images, etc.)
filteredImageURLs := filterGoogleImageURLs(imageURLs)
return filteredImageURLs, nil
}
// Filter out irrelevant Google image URLs (like Google logos, base64 images, and favicon images)
func filterGoogleImageURLs(imageURLs []string) []string {
var filtered []string
for _, url := range imageURLs {
// Filter out small icons, base64 images, favicon images, and irrelevant URLs
if strings.HasPrefix(url, "https") && !strings.Contains(url, "google") && !strings.Contains(url, "base64") && !strings.Contains(url, "FAVICON") {
filtered = append(filtered, url)
}
}
return filtered
}
// SearchBingImages searches for images on Bing using chromedp and returns the image URLs
func searchBingImages(ctx context.Context, query string) ([]string, error) {
var imageURLs []string
searchURL := fmt.Sprintf("https://www.bing.com/images/search?q=%s", strings.Replace(query, " ", "+", -1))
// Run tasks to load the Bing image search page and extract image URLs
err := chromedp.Run(ctx,
// Navigate to Bing image search
chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second), // Wait for the page to load
// Scroll down to load more images (simulate user interaction)
chromedp.ActionFunc(func(ctx context.Context) error {
for i := 0; i < 5; i++ { // Scroll multiple times to load more images
err := chromedp.Run(ctx, chromedp.Evaluate(`window.scrollBy(0, document.body.scrollHeight);`, nil))
if err != nil {
return err
}
time.Sleep(500 * time.Millisecond) // Wait for images to load after each scroll
}
return nil
}),
chromedp.Evaluate(`Array.from(document.querySelectorAll('a.iusc')).map(a => a.getAttribute('m')).map(json => JSON.parse(json).murl)`, &imageURLs),
)
if err != nil {
return nil, fmt.Errorf("failed to fetch Bing images: %v", err)
}
return imageURLs, nil
}
func downloadImages(imageURLs []string, folder, query string) {
imageProgressBar := progressbar.NewOptions(len(imageURLs), progressbar.OptionSetDescription("Downloading images to "+folder), progressbar.OptionEnableColorCodes(true))
err := os.MkdirAll(folder, os.ModePerm)
if err != nil {
fmt.Printf("Failed to create folder: %v\n", err)
return
}
// Set up a wait group to download images concurrently
var wg sync.WaitGroup
for i, url := range imageURLs {
wg.Add(1)
go func(i int, url string) {
defer wg.Done()
// Append .jpg extension to all downloaded images
err := downloadImage(url, folder, query, i+1, ".jpg")
if err != nil {
log.Printf("Failed to download image %d: %v\n", i+1, err)
}
imageProgressBar.Add(1)
}(i, url)
}
// Wait for all download tasks to complete
wg.Wait()
}
func defineStringFlag(longName string, shortName string, defaultValue string, usage string) *string {
val := flag.String(longName, defaultValue, usage)
flag.StringVar(val, shortName, defaultValue, usage)
return val
}
func main() {
// Parse CLI arguments
query := defineStringFlag("query", "q", "", "Search query for images (required)")
targets := defineStringFlag("targets", "t", "all", "Comma-separated search targets: google, bing, yandex, or all (default: all)")
out := defineStringFlag("out", "o", "images", "Directory to save images (default: images)")
logFile := defineStringFlag("log", "l", "logs.log", "File to save logs (default: logs.log)")
flag.Parse()
// Set up logging to a file
file, err := os.OpenFile(*logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
log.Fatalf("Failed to open log file: %v\n", err)
}
defer file.Close()
log.SetOutput(file)
// Validate query input
if *query == "" {
log.Fatal("Please provide a search query using the -query or -q flag.")
}
// Set up search targets
var searchTargets []string
if *targets == "all" {
searchTargets = []string{"google", "bing", "yandex"}
} else {
searchTargets = strings.Split(*targets, ",")
for i := range searchTargets {
searchTargets[i] = strings.TrimSpace(searchTargets[i])
}
}
// Set up a wait group to handle concurrency across search engines
var wg sync.WaitGroup
// Iterate over the search targets and run each search concurrently
for _, target := range searchTargets {
wg.Add(1)
go func(target string) {
defer wg.Done()
fmt.Printf("Searching on %s...\n", target)
// Create a new context and ChromeDP instance for this search
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
// Start a new ChromeDP instance
opts := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.Flag("headless", true))
allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...)
defer cancelAlloc()
// Create a new ChromeDP context
taskCtx, cancelTask := chromedp.NewContext(allocCtx)
defer cancelTask()
switch target {
case "google":
googleImages, err := searchGoogleImages(taskCtx, *query)
if err == nil {
if len(googleImages) == 0 {
log.Printf("No images found in yandex for query: %v", query)
return
}
downloadImages(googleImages, filepath.Join(*out, "google"), *query)
} else {
log.Printf("Failed to search on Google: %v\n", err)
}
case "bing":
bingImages, err := searchBingImages(taskCtx, *query)
if err == nil {
if len(bingImages) == 0 {
log.Printf("No images found in yandex for query: %v", query)
return
}
downloadImages(bingImages, filepath.Join(*out, "bing"), *query)
} else {
log.Printf("Failed to search on Bing: %v\n", err)
}
case "yandex":
yandexImages, err := searchYandexImages(taskCtx, *query)
if err == nil {
if len(yandexImages) == 0 {
log.Printf("No images found in yandex for query: %v", query)
return
}
downloadImages(yandexImages, filepath.Join(*out, "yandex"), *query)
} else {
log.Printf("Failed to search on Yandex: %v\n", err)
}
default:
log.Printf("Unknown search target: %s\n", target)
}
}(target)
}
// Wait for all search engine tasks to complete
wg.Wait()
fmt.Println()
fmt.Println("Image search and download completed.")
}