From bdfd0ef10c173da93df1409ad6914ed67ceb3e81 Mon Sep 17 00:00:00 2001 From: Dave Molk Date: Sun, 23 Oct 2022 14:42:44 -0600 Subject: [PATCH] rough wl prototype --- README.md | 15 +++++++++++++-- wl/main.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ wl/process.go | 37 +++++++++++++++++++++++++++++++++++++ wl/requests.go | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 wl/main.go create mode 100644 wl/process.go create mode 100644 wl/requests.go diff --git a/README.md b/README.md index 6a52efd..f7a8fdf 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,20 @@ # scripts a collection of odds, ends, and everything in between (added below once stable) +# go +## fof +(under development) +enter a search term and a file name (list of additional search terms, line-separated) and get back results (blurb plus url) from ask, bing, brave, duckduckgo, google, yahoo, and yandex. ## scanner -two different ways to make a port scanner in Go. +two different ways to make a port scanner ## tas -throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are). written in Go. \ No newline at end of file +throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are) + +# python +## tas +throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are) (not concurrent...yet) + +## dj +dad joke, ftw. use -t to enter a search term, otherwise enjoy a randomly selected dad joke \ No newline at end of file diff --git a/wl/main.go b/wl/main.go new file mode 100644 index 0000000..ff65e42 --- /dev/null +++ b/wl/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "flag" + "fmt" + "log" + "regexp" + "sync" +) + +type config struct { + timeout int + url string +} + +type wl struct { + config + noBlank *regexp.Regexp +} + +func main() { + var config config + flag.IntVar(&config.timeout, "t", 5000, "request timeout (in ms)") + flag.StringVar(&config.url, "u", "", "url to search") + flag.Parse() + + noBlank := regexp.MustCompile(`\s{2,}`) + w := &wl{ + config: config, + noBlank: noBlank, + } + + g, err := w.makeRequest(config.url, config.timeout) + if err != nil { + log.Fatal(err) + } + + words := w.processData(g) + var wg sync.WaitGroup + for _, word := range words { + wg.Add(1) + go func(word string) { + defer wg.Done() + word = w.removePunctuation(word) + fmt.Println(word) + }(word) + } + wg.Wait() +} \ No newline at end of file diff --git a/wl/process.go b/wl/process.go new file mode 100644 index 0000000..df103c0 --- /dev/null +++ b/wl/process.go @@ -0,0 +1,37 @@ +package main + +import ( + "strings" + + "github.com/PuerkitoBio/goquery" +) + +func (w *wl) processData(doc *goquery.Document) []string { + doc.Find("script").Remove() + doc.Find("style").Remove() + body := doc.Text() + body = w.noBlank.ReplaceAllString(body, " ") + body = strings.Replace(body, "\n", "", -1) + + return strings.Split(body, " ") +} + +func (w *wl) removePunctuation(word string) string { + punc := w.getPunctuation() + for _, p := range punc { + word = strings.TrimSuffix(word, p) + } + return strings.ToLower(word) +} + +// prob add more +func (w *wl) getPunctuation() []string { + return []string{ + ",", + ".", + ":", + ";", + "!", + "?", + } +} \ No newline at end of file diff --git a/wl/requests.go b/wl/requests.go new file mode 100644 index 0000000..a440c24 --- /dev/null +++ b/wl/requests.go @@ -0,0 +1,36 @@ +package main + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/PuerkitoBio/goquery" +) + +func (w *wl) makeRequest(url string, timeout int) (*goquery.Document, error) { + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(5000) * time.Millisecond) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("statusCode: %d", resp.StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + + return doc, nil +} \ No newline at end of file