Skip to content

Commit

Permalink
Added function 'search_images_ddg' to search Duck Duck Go with same a…
Browse files Browse the repository at this point in the history
…pi as 'search_images_bing', except for the need for the api-key. Also, this function removes duplicated urls. Accordingly, the paramater is 'max_n' (fastai#250)
  • Loading branch information
prairie-guy authored Nov 29, 2020
1 parent 741295a commit a8701a6
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,33 @@ def search_images_bing(key, term, min_sz=128, max_images=150):

# -

def search_images_ddg(key,max_n=200):
"""Search for 'key' with DuckDuckGo and return a unique urls of 'max_n' images
(Adopted from https://github.com/deepanprabhu/duckduckgo-images-api)
"""
url = 'https://duckduckgo.com/'
params = {'q':key}
res = requests.post(url,data=params)
searchObj = re.search(r'vqd=([\d-]+)\&',res.text)
if not searchObj: print('Token Parsing Failed !'); return
requestUrl = url + 'i.js'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'}
params = (('l','us-en'),('o','json'),('q',key),('vqd',searchObj.group(1)),('f',',,,'),('p','1'),('v7exp','a'))
urls = []
while True:
try:
res = requests.get(requestUrl,headers=headers,params=params)
data = json.loads(res.text)
for obj in data['results']:
urls.append(obj['image'])
max_n = max_n - 1
if max_n < 1: return L(set(urls)) # dedupe
if 'next' not in data: return L(set(urls))
requestUrl = url + data['next']
except:
pass


def plot_function(f, tx=None, ty=None, title=None, min=-2, max=2, figsize=(6,4)):
x = torch.linspace(min,max)
fig,ax = plt.subplots(figsize=figsize)
Expand Down

0 comments on commit a8701a6

Please sign in to comment.