-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
252 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ test.json | |
foo/ | ||
.DS_Store | ||
filter.txt | ||
|
||
# Binaries for programs and plugins | ||
*.exe | ||
*.exe~ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package main | ||
|
||
import ( | ||
"flag" | ||
) | ||
|
||
type config struct { | ||
exclude string | ||
filetype string | ||
inTitle bool | ||
inURL bool | ||
query string | ||
queryExact string | ||
site string | ||
timeout int | ||
} | ||
|
||
type ngd struct { | ||
config config | ||
} | ||
|
||
func main() { | ||
var config config | ||
flag.StringVar(&config.filetype, "ft", "", "file type") | ||
flag.BoolVar(&config.inURL, "url", false, "term to find in URL") | ||
flag.StringVar(&config.query, "q", "", "search query") | ||
flag.StringVar(&config.queryExact, "qe", "", "search query (exact matching)") | ||
flag.StringVar(&config.site, "site", "", "site/domain to search") | ||
flag.StringVar(&config.exclude, "no", "", "site/domain to exclude") | ||
flag.IntVar(&config.timeout, "t", 5000, "timeout for request") | ||
flag.BoolVar(&config.inTitle, "title", false, "term to find in site title") | ||
flag.Parse() | ||
|
||
n := &ngd{ | ||
config: config, | ||
} | ||
|
||
n.validateInput(config) | ||
|
||
qd := n.makeQueryData() | ||
pd := n.makeParseData() | ||
url := n.makeQueryString(qd) | ||
|
||
n.getAndParseData(url, config.timeout, pd) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
"strings" | ||
|
||
"github.com/PuerkitoBio/goquery" | ||
) | ||
|
||
func (n *ngd) getAndParseData(url string, timeout int, pd *parseData) { | ||
log.Println("getting", url) | ||
|
||
s, err := n.makeRequest(url, timeout) | ||
if err != nil { | ||
log.Println(err) | ||
} | ||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(s)) | ||
if err != nil { | ||
log.Fatal("failed to parse html") | ||
} | ||
|
||
doc.Find(pd.itemSelector).Each(func(i int, s *goquery.Selection) { | ||
// no link, no point in getting blurb | ||
if link, ok := s.Find(pd.linkSelector).Attr("href"); !ok { | ||
return | ||
} else { | ||
fmt.Println(link) | ||
blurb := s.Find(pd.blurbSelector).Text() | ||
fmt.Println(blurb) | ||
fmt.Println() | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
) | ||
|
||
type queryData struct { | ||
base string | ||
colon string | ||
spacer string | ||
} | ||
|
||
type parseData struct { | ||
blurbSelector string | ||
itemSelector string | ||
linkSelector string | ||
name string | ||
} | ||
|
||
func (n *ngd) makeQueryData() *queryData { | ||
ddg := &queryData{ | ||
base: "https://html.duckduckgo.com/html?q=", | ||
colon: "%3A", | ||
spacer: "+", // %20 on fof... | ||
} | ||
|
||
return ddg | ||
} | ||
|
||
func (n *ngd) makeQueryString(qd *queryData) string { | ||
var components []string | ||
var cleanedQuery string | ||
switch { | ||
case n.config.query != "": | ||
cleanedQuery = strings.Replace(n.config.query, " ", qd.spacer, -1) | ||
components = append(components, cleanedQuery) | ||
case n.config.queryExact != "": | ||
cleanedQuery = strings.Replace(n.config.queryExact, " ", qd.spacer, -1) | ||
cleanedQuery = fmt.Sprintf("\"%s\"", cleanedQuery) | ||
components = append(components, cleanedQuery) | ||
} | ||
|
||
if n.config.inTitle { | ||
intitle := fmt.Sprintf("intitle%s%s", qd.colon, cleanedQuery) | ||
components = append(components, intitle) | ||
} | ||
|
||
if n.config.inURL { | ||
inurl := fmt.Sprintf("inurl%s%s", qd.colon, cleanedQuery) | ||
components = append(components, inurl) | ||
} | ||
|
||
if n.config.filetype != "" { | ||
filetype := fmt.Sprintf("filetype%s%s", qd.colon, n.config.filetype) | ||
components = append(components, filetype) | ||
} | ||
|
||
if n.config.site != "" { | ||
site := fmt.Sprintf("site%s%s", qd.colon, n.config.site) | ||
components = append(components, site) | ||
} | ||
|
||
if n.config.exclude != "" { | ||
exclude := fmt.Sprintf("-site%s%s", qd.colon, n.config.exclude) | ||
components = append(components, exclude) | ||
} | ||
|
||
params := strings.Join(components, "+") | ||
return fmt.Sprintf("%s%s", qd.base, params) | ||
} | ||
|
||
func (n *ngd) makeParseData() *parseData { | ||
duck := &parseData{ | ||
blurbSelector: "div.links_main > a", | ||
itemSelector: "div.web-result", | ||
linkSelector: "div.links_main > a", | ||
name: "duck", | ||
} | ||
return duck | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"io" | ||
"math/rand" | ||
"net/http" | ||
"time" | ||
) | ||
|
||
func (n *ngd) makeRequest(url string, timeout int) (string, error) { | ||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Millisecond) | ||
defer cancel() | ||
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
uAgent := n.randomUA() | ||
req.Header.Set("User-Agent", uAgent) | ||
|
||
resp, err := http.DefaultClient.Do(req) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
defer resp.Body.Close() | ||
|
||
if resp.StatusCode != 200 { | ||
return "", fmt.Errorf("HTTP response: %d for %s", resp.StatusCode, url) | ||
} | ||
|
||
b, err := io.ReadAll(resp.Body) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return string(b), nil | ||
} | ||
|
||
func (n *ngd) randomUA() string { | ||
userAgents := n.getUA() | ||
r := rand.New(rand.NewSource(time.Now().UnixNano())) | ||
random := r.Intn(len(userAgents)) | ||
return userAgents[random] | ||
} | ||
|
||
func (n *ngd) getUA() []string { | ||
return []string{ | ||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", | ||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4692.56 Safari/537.36", | ||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4889.0 Safari/537.36", | ||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko)", | ||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7", | ||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", | ||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", | ||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0", | ||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", | ||
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36", | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package main | ||
|
||
import "log" | ||
|
||
func (n *ngd) validateInput(config config) { | ||
switch { | ||
case config.query == "" && config.queryExact == "": | ||
log.Println("no search term supplied...") | ||
case config.filetype != "": | ||
n.checkFileType(config.filetype) | ||
} | ||
} | ||
|
||
func (n *ngd) checkFileType(file string) { | ||
allowed := []string{"pdf", "doc(x)", "xls(x)", "ppt(x)", "html"} | ||
for _, v := range allowed { | ||
if file == v { | ||
return | ||
} | ||
} | ||
log.Fatal("filetype unsupported") | ||
} |