-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmain.py
131 lines (111 loc) · 3.84 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""Server
Attributes:
app (fastapi.applications.FastAPI): FastAPI instance
PORT (int): Port number
"""
import os
import re
import json
import uvicorn
import botocore
from botocore.exceptions import ClientError
import boto3
import dotenv
from fastapi import FastAPI, Response
from utils import image
dotenv.load_dotenv()
from core.storage import S3Bucket
AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
config = botocore.config.Config(
read_timeout=400, connect_timeout=400, retries={"max_attempts": 0}
)
credentials = {
"aws_access_key_id": AWS_ACCESS_KEY_ID,
"aws_secret_access_key": AWS_SECRET_ACCESS_KEY,
}
botoclient = boto3.client("s3", **credentials, config=config)
bucket_name = os.environ.get("AWS_S3_BUCKET_NAME")
s3_storage = S3Bucket(botoclient, bucket_name)
app = FastAPI()
def get_drawing_prefix(doc_id):
"""Maps document ids to their drawing path prefixes
For US Patent No. 7,654,321 the drawings are stored as follows:
07654321-1.tif
07654321-2.tif
...
...
For US patent applications, say, for US20080156487A1, they're stored as:
US20080156487A1-1.tif
US20080156487A1-2.tif
...
...
This function creates the appropriate key prefix on the basis of whether
the supplied number is a patent or an application.
Args:
doc_id (str): Document identifier (e.g. patent number)
Returns:
str: Drawing prefix
"""
if len(doc_id) > 12:
return f"images/{doc_id}-"
num = re.search(r"\d+", doc_id).group(0)
while len(num) < 8:
num = "0" + num
return f"images/{num}-"
@app.get("/documents/{doc_id}")
@app.get("/patents/{doc_id}")
async def get_doc(doc_id: str):
"""Return a document's data in JSON format
"""
try:
doc = s3_storage.get(f"patents/{doc_id}.json")
except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
return Response(status_code=404)
return Response(status_code=500)
return json.loads(doc)
@app.get("/patents/{doc_id}/drawings")
async def list_drawings(doc_id: str):
"""Return a list of drawings associated with a document, e.g., [1, 2, 3]
"""
prefix = get_drawing_prefix(doc_id)
keys = s3_storage.ls(prefix)
if not keys:
return Response(status_code=404)
drawings = [re.search(r"-(\d+)", key).group(1) for key in keys]
return {"drawings": drawings}
@app.get("/patents/{doc_id}/drawings/{drawing_num}")
async def get_drawing(doc_id: str, drawing_num: int):
"""Return image data of a particular drawing
"""
if drawing_num < 1:
return Response(status_code=404)
prefix = get_drawing_prefix(doc_id)
key = f"{prefix}{drawing_num}.tif"
try:
tif_data = s3_storage.get(key)
except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
return Response(status_code=404)
return Response(status_code=500)
return Response(content=tif_data, media_type="image/tiff")
@app.get('/patents/{doc_id}/thumbnails/{thumbnail_num}')
def get_patent_thumbnail(doc_id: str, thumbnail_num: str, w: int = 100, h: int = 100):
"""Returns image data of a particular thumbnail.
"""
if thumbnail_num < 1:
return Response(status_code=404)
prefix = get_drawing_prefix(doc_id)
key = f"{prefix}{thumbnail_num}.tif"
try:
tif_data = s3_storage.get(key)
except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
return Response(status_code=404)
return Response(status_code=500)
tif_data_thumbnail = image.get_resized_image(tif_data, w, h)
return Response(content=tif_data_thumbnail, media_type="image/tiff")
if __name__ == "__main__":
port = int(os.environ["PORT"])
uvicorn.run(app, host="0.0.0.0", port=port)