Skip to content

Commit

Permalink
Add tests for custom parsers and naive detection
Browse files Browse the repository at this point in the history
  • Loading branch information
msukmanowsky committed Jul 20, 2013
1 parent c230699 commit 8eedac1
Showing 1 changed file with 37 additions and 10 deletions.
47 changes: 37 additions & 10 deletions tests/test_serps.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from urlparse import urlparse

try:
from serpextract import extract, is_serp, get_all_query_params
from serpextract import SearchEngineParser, extract, is_serp,\
get_all_query_params, add_custom_parser
except ImportError:
import os, sys
basedir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
sys.path.append(basedir)
from serpextract import extract, is_serp, get_all_query_params
from serpextract import SearchEngineParser, extract, is_serp,\
get_all_query_params, add_custom_parser


class TestSERPs(unittest.TestCase):
Expand All @@ -23,21 +25,26 @@ class TestSERPs(unittest.TestCase):
country case and the keywords with crazy characters case.
"""

def assertInvalidSERP(self, url):
self.assertIsNone(extract(url))
self.assertFalse(is_serp(url))
def setUp(self):
self.custom_serp_url = 'http://search.piccshare.com/search.php?cat=web&channel=main&hl=en&q=test'
self.custom_parser = SearchEngineParser(u'PiccShare', u'q',
u'/search.php?q={k}',u'utf-8')

def assertValidSERP(self, url, expected_engine_name, expected_keyword):
def assertInvalidSERP(self, url, **kwargs):
self.assertIsNone(extract(url, **kwargs))
self.assertFalse(is_serp(url, **kwargs))

def assertValidSERP(self, url, expected_engine_name, expected_keyword, **kwargs):
# Test both the URL and a parsed URL version
for url in (url, urlparse(url)):
res = extract(url)
res = extract(url, **kwargs)
self.assertEqual(res.keyword, expected_keyword)
self.assertEqual(res.engine_name, expected_engine_name)
self.assertTrue(is_serp(url))
self.assertTrue(is_serp(url, **kwargs))

def assertValidSERPs(self, expected_serps):
def assertValidSERPs(self, expected_serps, **kwargs):
for url, engine_name, keyword in expected_serps:
self.assertValidSERP(url, engine_name, keyword)
self.assertValidSERP(url, engine_name, keyword, **kwargs)

def test_google(self):
serps = (
Expand Down Expand Up @@ -88,6 +95,26 @@ def test_path_engines(self):
('http://www.1.cz/s/ars-technica/', '1.cz', u'ars-technica'), # These guys do not properly URL encode their keywords
)

def test_custom_parser_explicit(self):
self.assertInvalidSERP(self.custom_serp_url)
self.assertValidSERP(self.custom_serp_url,
self.custom_parser.engine_name,
u'test',
parser=self.custom_parser)

def test_custom_parser_implicit(self):
from serpextract.serpextract import _get_search_engines, _engines
self.assertInvalidSERP(self.custom_serp_url)
add_custom_parser(u'search.piccshare.com', self.custom_parser)
self.assertValidSERP(self.custom_serp_url,
self.custom_parser.engine_name,
u'test')
del _engines[u'search.piccshare.com']

def test_naive_detection(self):
self.assertInvalidSERP(self.custom_serp_url)
self.assertValidSERP(self.custom_serp_url, u'piccshare', u'test', use_naive_method=True)

def test_get_all_query_params(self):
"""Ensure that get_all_query_params is a non-empty list."""
params = get_all_query_params()
Expand Down

0 comments on commit 8eedac1

Please sign in to comment.