Skip to content

Commit

Permalink
[Change]use python parse php array
Browse files Browse the repository at this point in the history
  • Loading branch information
boneyao committed May 12, 2016
1 parent 292f96b commit c1bd7bb
Showing 1 changed file with 57 additions and 10 deletions.
67 changes: 57 additions & 10 deletions update_list.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Update the search_engines.pickle list contained within the package.
Use this before deploying an update"""
from __future__ import absolute_import, division, print_function

import argparse
import os
import sys
from collections import OrderedDict
Expand All @@ -20,22 +20,69 @@

_here = lambda *paths: os.path.join(os.path.dirname(os.path.abspath(__file__)),
*paths)
def array(*args, **kwargs):
if args:
return list(args)
if kwargs:
return OrderedDict(kwargs)

piwik_search_engines = []
null = None

def parse_php(php_script):
if_index = php_script.find('if ')

start_index = php_script.find('array(', if_index) + 6
end_index = php_script.rfind(');')

json_body = php_script[start_index:end_index]

for line in json_body.split('\n'):
line = line.strip()

# comments
line = line.replace('//', '#')
if (line.startswith('#')
or line.startswith('/*')
or line.startswith('*')
or line.startswith('*/')
or line == ''):
continue
k, v = line.split('=>', 1)
k = k.strip().strip("'")
v = eval(v)[0]
piwik_search_engines.append((k, v))

# print(json_body)
# indent
# json_body = 'if 1:\n' + json_body
# exec(json_body)

return OrderedDict(piwik_search_engines)

def main():
parser = argparse.ArgumentParser(description="SearchEngines.php file path")
parser.add_argument("--file",
dest="file",
type=str,
default="https://raw.githubusercontent.com/piwik/piwik/2.14.3/core/DataFiles/SearchEngines.php",
help="SearchEngines.php")
args = parser.parse_args()

py_version = sys.version_info[0]
filename = _here('serpextract', 'search_engines.py{}.pickle'.format(py_version))
print('Updating search engine parser definitions (requires PHP).')

url = urlopen('https://raw.githubusercontent.com/piwik/piwik/2.14.3/core/DataFiles/SearchEngines.php')
php_script = url.readlines()
php_script.append(b'echo(json_encode($GLOBALS["Piwik_SearchEngines"]));\n')
php_script = b''.join(php_script)
process = Popen(['php'], stdout=PIPE, stdin=PIPE, stderr=PIPE)
json_string = process.communicate(input=php_script)[0]
# Ordering of the dictionary from PHP matters so we keep it in an
# OrderedDict
piwik_engines = json.loads(json_string, object_pairs_hook=OrderedDict)
# from piwik
if args.file.startswith('http'):
url = urlopen(args.file)
php_script = url.read()
# from local
else:
with open(args.file) as f:
php_script = f.read()

piwik_engines = parse_php(php_script)
with open(filename, 'wb') as pickle_file:
pickle.dump(piwik_engines, pickle_file)

Expand Down

0 comments on commit c1bd7bb

Please sign in to comment.