Skip to content

Commit

Permalink
rough wl prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
davemolk committed Oct 23, 2022
1 parent 8791bfb commit bdfd0ef
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 2 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
# scripts
a collection of odds, ends, and everything in between (added below once stable)

# go
## fof
(under development)
enter a search term and a file name (list of additional search terms, line-separated) and get back results (blurb plus url) from ask, bing, brave, duckduckgo, google, yahoo, and yandex.

## scanner
two different ways to make a port scanner in Go.
two different ways to make a port scanner

## tas
throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are). written in Go.
throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are)

# python
## tas
throw against site (pull down archived links from Wayback Machine, run against the site, see what status codes currently are) (not concurrent...yet)

## dj
dad joke, ftw. use -t to enter a search term, otherwise enjoy a randomly selected dad joke
49 changes: 49 additions & 0 deletions wl/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package main

import (
"flag"
"fmt"
"log"
"regexp"
"sync"
)

type config struct {
timeout int
url string
}

type wl struct {
config
noBlank *regexp.Regexp
}

func main() {
var config config
flag.IntVar(&config.timeout, "t", 5000, "request timeout (in ms)")
flag.StringVar(&config.url, "u", "", "url to search")
flag.Parse()

noBlank := regexp.MustCompile(`\s{2,}`)
w := &wl{
config: config,
noBlank: noBlank,
}

g, err := w.makeRequest(config.url, config.timeout)
if err != nil {
log.Fatal(err)
}

words := w.processData(g)
var wg sync.WaitGroup
for _, word := range words {
wg.Add(1)
go func(word string) {
defer wg.Done()
word = w.removePunctuation(word)
fmt.Println(word)
}(word)
}
wg.Wait()
}
37 changes: 37 additions & 0 deletions wl/process.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package main

import (
"strings"

"github.com/PuerkitoBio/goquery"
)

func (w *wl) processData(doc *goquery.Document) []string {
doc.Find("script").Remove()
doc.Find("style").Remove()
body := doc.Text()
body = w.noBlank.ReplaceAllString(body, " ")
body = strings.Replace(body, "\n", "", -1)

return strings.Split(body, " ")
}

func (w *wl) removePunctuation(word string) string {
punc := w.getPunctuation()
for _, p := range punc {
word = strings.TrimSuffix(word, p)
}
return strings.ToLower(word)
}

// prob add more
func (w *wl) getPunctuation() []string {
return []string{
",",
".",
":",
";",
"!",
"?",
}
}
36 changes: 36 additions & 0 deletions wl/requests.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package main

import (
"context"
"fmt"
"net/http"
"time"

"github.com/PuerkitoBio/goquery"
)

func (w *wl) makeRequest(url string, timeout int) (*goquery.Document, error) {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(5000) * time.Millisecond)
defer cancel()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != 200 {
return nil, fmt.Errorf("statusCode: %d", resp.StatusCode)
}

doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}

return doc, nil
}

0 comments on commit bdfd0ef

Please sign in to comment.