From 80ace0dd3724a8b9f85f3ca741c987e9063d85b1 Mon Sep 17 00:00:00 2001 From: Dave Molk Date: Thu, 13 Oct 2022 20:50:18 -0600 Subject: [PATCH] hacky but working (basic) prototype --- .gitignore | 2 + fof/helpers.go | 40 ++++++++++++++++++ fof/main.go | 64 ++++++++++++++++++++++++++++ fof/parsers.go | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ fof/queries.go | 50 ++++++++++++++++++++++ fof/requests.go | 80 +++++++++++++++++++++++++++++++++++ go.mod | 7 ++++ go.sum | 11 +++++ 8 files changed, 362 insertions(+) create mode 100644 fof/helpers.go create mode 100644 fof/main.go create mode 100644 fof/parsers.go create mode 100644 fof/queries.go create mode 100644 fof/requests.go create mode 100644 go.sum diff --git a/.gitignore b/.gitignore index 32fca78..fdc76ef 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ results.json scanResults.json errorResults.json notes.md +foo.py +test.txt # Binaries for programs and plugins *.exe *.exe~ diff --git a/fof/helpers.go b/fof/helpers.go new file mode 100644 index 0000000..a52432c --- /dev/null +++ b/fof/helpers.go @@ -0,0 +1,40 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "os" +) + +func (f *fof) readInput(name string) ([]string, error) { + var names []string + n, err := os.Open(name) + if err != nil { + return names, err + } + defer n.Close() + + scanner := bufio.NewScanner(n) + for scanner.Scan() { + names = append(names, scanner.Text()) + } + return names, scanner.Err() +} + +func (f *fof) getTerms() { + var terms []string + switch { + case f.config.file != "": + terms, err := f.readInput(f.config.file) + if err != nil { + log.Fatal(err) + } + f.terms = terms + case f.config.term != "": + terms = append(terms, f.config.term) + f.terms = terms + default: + fmt.Println("no search terms supplied") + } +} diff --git a/fof/main.go b/fof/main.go new file mode 100644 index 0000000..143b8c3 --- /dev/null +++ b/fof/main.go @@ -0,0 +1,64 @@ +package main + +import ( + "flag" + "fmt" +) + +type config struct { + query string + term string + file string + timeout int +} + +type fof struct { + config config + searches *searchesMap + terms []string +} + +func main() { + var config config + flag.StringVar(&config.query, "q", "", "search target (please enclose phrases in quotes)") + flag.StringVar(&config.term, "term", "", "term to search for") + flag.StringVar(&config.file, "file", "", "file name containing a list of terms") + flag.IntVar(&config.timeout, "t", 5000, "timeout (in ms, default 5000)") + + flag.Parse() + + searches := newSearchMap() + + f := &fof{ + config: config, + searches: searches, + } + + f.getTerms() + + qd := f.makeQueryData() + + // conditional to determine term vs terms + + urlB, controlB := f.makeQueryString(qd[1]) + + fmt.Println(urlB, controlB) + + for i, u := range urlB { + s, err := f.makeRequest(u, config.timeout) + if err != nil { + fmt.Println(err) + } + + f.parseYahoo(s, controlB[i]) + } + + // s, err := f.makeRequest(urlB[0], config.timeout) + // if err != nil { + // fmt.Println(err) + // } + + // f.parseYahoo(s, controlB[0]) + + fmt.Println(f.searches.searches) +} diff --git a/fof/parsers.go b/fof/parsers.go new file mode 100644 index 0000000..8aa5761 --- /dev/null +++ b/fof/parsers.go @@ -0,0 +1,108 @@ +package main + +import ( + "fmt" + "strings" + "sync" + + "github.com/PuerkitoBio/goquery" +) + +type search struct { + Blurb string + URL string +} + +type searchesMap struct { + mu sync.Mutex + searches map[string][]search +} + +// some type of selector map thing + +func newSearchMap() *searchesMap { + return &searchesMap{ + searches: make(map[string][]search), + } +} + +func (s *searchesMap) store(term string, search search) { + s.mu.Lock() + s.searches[term] = append(s.searches[term], search) + s.mu.Unlock() +} + +func (f *fof) parseBing(data, control string) { + sr := search{} + doc, err := goquery.NewDocumentFromReader(strings.NewReader(data)) + if err != nil { + fmt.Println(err) + } + + test := doc.Find("head title").Text() + if test == control { + fmt.Println("match") + } else { + fmt.Println("no match") + } + + doc.Find("li.b_algo").Each(func(i int, s *goquery.Selection) { + if link, ok := s.Find("h2 a").Attr("href"); ok { + sr.URL = link + } else { + sr.URL = "" + } + blurb := s.Find("div.b_caption p").Text() + sr.Blurb = blurb + f.searches.store("music", sr) + }) +} + +// eventually just pass in selectors to one function... +func (f *fof) parseGoogle(data, control string) { + sr := search{} + doc, err := goquery.NewDocumentFromReader(strings.NewReader(data)) + if err != nil { + fmt.Println(err) + } + + test := doc.Find("head title").Text() + if test == control { + fmt.Println("match") + } else { + fmt.Println("no match") + } + + doc.Find("div.g").Each(func(i int, s *goquery.Selection) { + if link, ok := s.Find("a").Attr("href"); ok { + sr.URL = link + } + blurb := s.Find("div[style='-webkit-line-clamp:2'] span").Text() + sr.Blurb = blurb + f.searches.store("music", sr) + }) +} + +func (f *fof) parseYahoo(data, control string) { + sr := search{} + doc, err := goquery.NewDocumentFromReader(strings.NewReader(data)) + if err != nil { + fmt.Println(err) + } + + test := doc.Find("head title").Text() + if test == control { + fmt.Println("match") + } else { + fmt.Println("no match") + } + + doc.Find("div.algo").Each(func(i int, s *goquery.Selection) { + if link, ok := s.Find("h3 > a").Attr("href"); ok { + sr.URL = link + } + blurb := s.Find("div.compText").Text() + sr.Blurb = blurb + f.searches.store("music", sr) + }) +} diff --git a/fof/queries.go b/fof/queries.go new file mode 100644 index 0000000..d667666 --- /dev/null +++ b/fof/queries.go @@ -0,0 +1,50 @@ +package main + +type queryData struct { + base string + controlName string + query string + queryString string + spacer string + terms []string +} + +type queryMap struct { + queries map[string]queryData +} + +func (f *fof) makeQueryData() []*queryData { + var qd []*queryData + + bing := &queryData{ + base: "https://bing.com/search?q=", + controlName: "- Search", + query: f.config.query, + spacer: "%20", + terms: f.terms, + } + qd = append(qd, bing) + + // blocks a lot + google := &queryData{ + base: "https://www.google.com/search?q=", + controlName: "- Google Search", + query: f.config.query, + spacer: "+", + terms: f.terms, + } + + _ = google + + yahoo := &queryData{ + base: "https://search.yahoo.com/search?p=", + controlName: "- Yahoo Search Results", + query: f.config.query, + spacer: "+", + terms: f.terms, + } + + qd = append(qd, yahoo) + + return qd +} diff --git a/fof/requests.go b/fof/requests.go new file mode 100644 index 0000000..24f6f54 --- /dev/null +++ b/fof/requests.go @@ -0,0 +1,80 @@ +package main + +import ( + "context" + "fmt" + "io" + "math/rand" + "net/http" + "strings" + "time" +) + +func (f *fof) makeQueryString(data *queryData) ([]string, []string) { + cleanQ := strings.Replace(data.query, " ", data.spacer, -1) + var urls []string + var controls []string + for _, t := range data.terms { + url := fmt.Sprintf("%s%s%s%s", data.base, cleanQ, data.spacer, t) + control := fmt.Sprintf("%s %s %s", data.query, t, data.controlName) + urls = append(urls, url) + controls = append(controls, control) + } + return urls, controls +} + +func (f *fof) makeRequest(url string, timeout int) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Millisecond) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", err + } + + uAgent := f.randomUA() + req.Header.Set("User-Agent", uAgent) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("HTTP response: %d", resp.StatusCode) + } + + b, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // fmt.Println(string(b)) + return string(b), nil +} + +// randomUA picks a random user agent from the slice and returns it. +func (f *fof) randomUA() string { + userAgents := f.getUA() + r := rand.New(rand.NewSource(time.Now().UnixNano())) + random := r.Intn(len(userAgents)) + return userAgents[random] +} + +// getUA returns a string slice of ten user agents. +func (f *fof) getUA() []string { + return []string{ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4692.56 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4889.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko)", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36", + } +} diff --git a/go.mod b/go.mod index 760b983..e078b94 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module github.com/davemolk/scripts go 1.19 + +require github.com/PuerkitoBio/goquery v1.8.0 + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d5e55e1 --- /dev/null +++ b/go.sum @@ -0,0 +1,11 @@ +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=