Skip to content

Commit

Permalink
First working version
Browse files Browse the repository at this point in the history
  • Loading branch information
marians committed Oct 31, 2018
1 parent f09d4e8 commit e6e1f61
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/__pycache__
/venv
/secrets
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,6 @@ venv.bak/

# mypy
.mypy_cache/


/secrets
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.6.7-slim-jessie

ADD requirements.txt /
RUN pip install --no-cache-dir -r requirements.txt

ADD jsonhandler.py /
ADD main.py /

ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "main:app"]

EXPOSE 5000
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
docker-build:
docker build -t quay.io/netzbegruenung/green-spider-api .

docker-run:
docker run --rm \
-p 5000:5000 \
-v $(shell pwd)/secrets:/secrets \
-e GCLOUD_DATASTORE_CREDENTIALS_PATH=/secrets/green-spider-api.json \
quay.io/netzbegruenung/green-spider-api
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,43 @@
# green-spider-api

Web service API für Green Spider

## API Dokumentation

### `GET /api/v1/spider-results/last-updated/`

Gibt den Zeitpunkt der letzten Aktualisierung der Spider-Ergebnisse zurück.

```json
{
"last_updated": "2018-10-25T15:23:30.589683"
}
```

### `GET /api/v1/spider-results/compact/`

Gibt die kompakte Liste aller Sites aus. Diese enthält nur die Details, die für eine Übersicht benötigt werden.

```json
[
{
"input_url": "https://www.gruenekoeln.de/bezirke/bezirk7.html",
"created": "2018-10-31T01:21:03.361931+00:00",
"meta": {
"level": "DE:ORTSVERBAND",
"state": "Nordrhein-Westfalen",
"type": "REGIONAL_CHAPTER",
"city": "Köln-Porz/Poll",
"district": "Köln"
},
"score": 11.5
},
...
]
```

## Konfiguration

Umgebungsvariablen:

- `GCLOUD_DATASTORE_CREDENTIALS_PATH`: Pfad der JSON-Datei mit Google Cloud Service-Account-Credentials. Benötigt lesenden Zugriff auf `spider-results` Datastore-Entitäten.
38 changes: 38 additions & 0 deletions jsonhandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import six

from datetime import date, datetime

from falcon import errors
from falcon.media import BaseHandler
from falcon.util import json

class ComplexEncoder(json.JSONEncoder):

"""JSONENcoder that handles date and datetime"""

def default(self, obj):
if isinstance(obj, date) or isinstance(obj, datetime):
return obj.isoformat()
# Let the base class default method raise the TypeError
return json.JSONEncoder.default(self, obj)

class JSONHandler(BaseHandler):
"""Handler built using Python's :py:mod:`json` module."""

def deserialize(self, raw):
try:
return json.loads(raw.decode('utf-8'))
except ValueError as err:
raise errors.HTTPBadRequest(
'Invalid JSON',
'Could not parse JSON body - {0}'.format(err)
)

def serialize(self, media):
result = json.dumps(media,
ensure_ascii=False,
cls=ComplexEncoder)
if six.PY3 or not isinstance(result, bytes):
return result.encode('utf-8')

return result
94 changes: 94 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from datetime import datetime
from os import getenv
from wsgiref import simple_server

import falcon
from falcon import media
import jsonhandler

from google.cloud import datastore


credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH')
datastore_client = datastore.Client.from_service_account_json(credentials_path)

entity_kind = 'spider-results'


def get_compact_results(client):
query = client.query(kind=entity_kind,
order=['-created'],
#projection=['created', 'meta', 'score'],
)

out = []
for entity in query.fetch(eventual=True):

# handle creation date in different ways, depending on whether the lib returns
# a str, int, or datetime.datetime
created = entity.get('created')
dt = ''
if type(created) == datetime:
dt = created
elif type(created) == int:
dt = datetime.utcfromtimestamp(created / 1000000)
elif type(created) == str:
dt = datetime.utcfromtimestamp(int(created) / 1000000)

out.append({
'input_url': entity.key.name,
'created': dt.isoformat(),
'meta': entity.get('meta'),
'score': entity.get('score'),
})
return out


class LastUpdated(object):

def on_get(self, req, resp):
"""
Informs about the most recent update to the spider results data
"""
query = datastore_client.query(kind=entity_kind,
order=['-created'],
projection=['created'])
items = list(query.fetch(limit=1, eventual=True))
ts = int(items[0].get('created')) / 1000000
dt = datetime.utcfromtimestamp(ts).isoformat()

maxage = 60 * 60 # one hour in seconds
resp.cache_control = ["max_age=%d" % maxage]
resp.media = {
"last_updated": dt
}


class CompactResults(object):

def on_get(self, req, resp):
"""
Returns compact sites overview and score
"""
out = get_compact_results(datastore_client)

maxage = 6 * 60 * 60 # six hours in seconds
resp.cache_control = ["max_age=%d" % maxage]
resp.media = out


handlers = media.Handlers({
'application/json': jsonhandler.JSONHandler(),
})

app = falcon.API()

app.req_options.media_handlers = handlers
app.resp_options.media_handlers = handlers

app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
app.add_route('/api/v1/spider-results/compact/', CompactResults())

if __name__ == '__main__':
httpd = simple_server.make_server('127.0.0.1', 5000, app)
httpd.serve_forever()
28 changes: 28 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
astroid==2.0.4
cachetools==2.1.0
certifi==2018.10.15
chardet==3.0.4
falcon==1.4.1
google-api-core==1.5.1
google-auth==1.5.1
google-cloud-core==0.28.1
google-cloud-datastore==1.7.1
googleapis-common-protos==1.5.3
grpcio==1.16.0
gunicorn==19.9.0
idna==2.7
isort==4.3.4
lazy-object-proxy==1.3.1
mccabe==0.6.1
protobuf==3.6.1
pyasn1==0.4.4
pyasn1-modules==0.2.2
pylint==2.1.1
python-mimeparse==1.6.0
pytz==2018.7
requests==2.20.0
rsa==4.0
six==1.11.0
typed-ast==1.1.0
urllib3==1.24
wrapt==1.10.11

0 comments on commit e6e1f61

Please sign in to comment.