Skip to content

Commit

Permalink
hacky but working (basic) prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
davemolk committed Oct 14, 2022
1 parent 4afc8c9 commit 80ace0d
Show file tree
Hide file tree
Showing 8 changed files with 362 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ results.json
scanResults.json
errorResults.json
notes.md
foo.py
test.txt
# Binaries for programs and plugins
*.exe
*.exe~
Expand Down
40 changes: 40 additions & 0 deletions fof/helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package main

import (
"bufio"
"fmt"
"log"
"os"
)

func (f *fof) readInput(name string) ([]string, error) {
var names []string
n, err := os.Open(name)
if err != nil {
return names, err
}
defer n.Close()

scanner := bufio.NewScanner(n)
for scanner.Scan() {
names = append(names, scanner.Text())
}
return names, scanner.Err()
}

func (f *fof) getTerms() {
var terms []string
switch {
case f.config.file != "":
terms, err := f.readInput(f.config.file)
if err != nil {
log.Fatal(err)
}
f.terms = terms
case f.config.term != "":
terms = append(terms, f.config.term)
f.terms = terms
default:
fmt.Println("no search terms supplied")
}
}
64 changes: 64 additions & 0 deletions fof/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package main

import (
"flag"
"fmt"
)

type config struct {
query string
term string
file string
timeout int
}

type fof struct {
config config
searches *searchesMap
terms []string
}

func main() {
var config config
flag.StringVar(&config.query, "q", "", "search target (please enclose phrases in quotes)")
flag.StringVar(&config.term, "term", "", "term to search for")
flag.StringVar(&config.file, "file", "", "file name containing a list of terms")
flag.IntVar(&config.timeout, "t", 5000, "timeout (in ms, default 5000)")

flag.Parse()

searches := newSearchMap()

f := &fof{
config: config,
searches: searches,
}

f.getTerms()

qd := f.makeQueryData()

// conditional to determine term vs terms

urlB, controlB := f.makeQueryString(qd[1])

fmt.Println(urlB, controlB)

for i, u := range urlB {
s, err := f.makeRequest(u, config.timeout)
if err != nil {
fmt.Println(err)
}

f.parseYahoo(s, controlB[i])
}

// s, err := f.makeRequest(urlB[0], config.timeout)
// if err != nil {
// fmt.Println(err)
// }

// f.parseYahoo(s, controlB[0])

fmt.Println(f.searches.searches)
}
108 changes: 108 additions & 0 deletions fof/parsers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package main

import (
"fmt"
"strings"
"sync"

"github.com/PuerkitoBio/goquery"
)

type search struct {
Blurb string
URL string
}

type searchesMap struct {
mu sync.Mutex
searches map[string][]search
}

// some type of selector map thing

func newSearchMap() *searchesMap {
return &searchesMap{
searches: make(map[string][]search),
}
}

func (s *searchesMap) store(term string, search search) {
s.mu.Lock()
s.searches[term] = append(s.searches[term], search)
s.mu.Unlock()
}

func (f *fof) parseBing(data, control string) {
sr := search{}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
fmt.Println(err)
}

test := doc.Find("head title").Text()
if test == control {
fmt.Println("match")
} else {
fmt.Println("no match")
}

doc.Find("li.b_algo").Each(func(i int, s *goquery.Selection) {
if link, ok := s.Find("h2 a").Attr("href"); ok {
sr.URL = link
} else {
sr.URL = ""
}
blurb := s.Find("div.b_caption p").Text()
sr.Blurb = blurb
f.searches.store("music", sr)
})
}

// eventually just pass in selectors to one function...
func (f *fof) parseGoogle(data, control string) {
sr := search{}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
fmt.Println(err)
}

test := doc.Find("head title").Text()
if test == control {
fmt.Println("match")
} else {
fmt.Println("no match")
}

doc.Find("div.g").Each(func(i int, s *goquery.Selection) {
if link, ok := s.Find("a").Attr("href"); ok {
sr.URL = link
}
blurb := s.Find("div[style='-webkit-line-clamp:2'] span").Text()
sr.Blurb = blurb
f.searches.store("music", sr)
})
}

func (f *fof) parseYahoo(data, control string) {
sr := search{}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
fmt.Println(err)
}

test := doc.Find("head title").Text()
if test == control {
fmt.Println("match")
} else {
fmt.Println("no match")
}

doc.Find("div.algo").Each(func(i int, s *goquery.Selection) {
if link, ok := s.Find("h3 > a").Attr("href"); ok {
sr.URL = link
}
blurb := s.Find("div.compText").Text()
sr.Blurb = blurb
f.searches.store("music", sr)
})
}
50 changes: 50 additions & 0 deletions fof/queries.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package main

type queryData struct {
base string
controlName string
query string
queryString string
spacer string
terms []string
}

type queryMap struct {
queries map[string]queryData
}

func (f *fof) makeQueryData() []*queryData {
var qd []*queryData

bing := &queryData{
base: "https://bing.com/search?q=",
controlName: "- Search",
query: f.config.query,
spacer: "%20",
terms: f.terms,
}
qd = append(qd, bing)

// blocks a lot
google := &queryData{
base: "https://www.google.com/search?q=",
controlName: "- Google Search",
query: f.config.query,
spacer: "+",
terms: f.terms,
}

_ = google

yahoo := &queryData{
base: "https://search.yahoo.com/search?p=",
controlName: "- Yahoo Search Results",
query: f.config.query,
spacer: "+",
terms: f.terms,
}

qd = append(qd, yahoo)

return qd
}
80 changes: 80 additions & 0 deletions fof/requests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package main

import (
"context"
"fmt"
"io"
"math/rand"
"net/http"
"strings"
"time"
)

func (f *fof) makeQueryString(data *queryData) ([]string, []string) {
cleanQ := strings.Replace(data.query, " ", data.spacer, -1)
var urls []string
var controls []string
for _, t := range data.terms {
url := fmt.Sprintf("%s%s%s%s", data.base, cleanQ, data.spacer, t)
control := fmt.Sprintf("%s %s %s", data.query, t, data.controlName)
urls = append(urls, url)
controls = append(controls, control)
}
return urls, controls
}

func (f *fof) makeRequest(url string, timeout int) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Millisecond)
defer cancel()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return "", err
}

uAgent := f.randomUA()
req.Header.Set("User-Agent", uAgent)

resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}

defer resp.Body.Close()

if resp.StatusCode != 200 {
return "", fmt.Errorf("HTTP response: %d", resp.StatusCode)
}

b, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}

// fmt.Println(string(b))
return string(b), nil
}

// randomUA picks a random user agent from the slice and returns it.
func (f *fof) randomUA() string {
userAgents := f.getUA()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
random := r.Intn(len(userAgents))
return userAgents[random]
}

// getUA returns a string slice of ten user agents.
func (f *fof) getUA() []string {
return []string{
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4692.56 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4889.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
}
}
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
module github.com/davemolk/scripts

go 1.19

require github.com/PuerkitoBio/goquery v1.8.0

require (
github.com/andybalholm/cascadia v1.3.1 // indirect
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
)
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

0 comments on commit 80ace0d

Please sign in to comment.