-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathutils.py
148 lines (124 loc) · 5.28 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#-*- coding: utf-8 -*-
import logging
import traceback
import datetime
import platform
import os
from bs4 import CData
from bs4 import NavigableString
def make_dir(dir):
log('make dir:%s' % dir)
if not os.path.exists(dir):
os.makedirs(dir)
def log(msg, level = logging.DEBUG):
logging.log(level, msg)
print('%s [level:%s] msg:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), level, msg))
if level == logging.WARNING or level == logging.ERROR:
for line in traceback.format_stack():
print(line.strip())
for line in traceback.format_stack():
logging.log(level, line.strip())
def get_first_text(soup, strip = False, types = (NavigableString, CData)):
data = None
for s in soup._all_strings(strip, types = types):
data = s
break
return data
def get_texts(soup, strip = False, types = (NavigableString, CData)):
texts = []
for s in soup._all_strings(strip, types = types):
texts.append(s)
return texts
def get_platform():
plat = platform.platform()
if plat.find('Darwin') != -1:
return 'mac'
elif plat.find('Linux') != -1:
return 'linux'
else:
return 'mac'
def get_date():
return datetime.datetime.today().strftime('%Y-%m-%d')
def get_create_table_command(table_name):
command = (
"CREATE TABLE IF NOT EXISTS {} ("
"`id` INT(8) NOT NULL AUTO_INCREMENT UNIQUE,"
"`name` TEXT NOT NULL,"
"`price` TEXT DEFAULT NULL,"
# "`metacritic_score` TEXT DEFAULT NULL,"
# "`reviews_overall_positive` TEXT NOT NULL,"
# "`reviews_overall_positive_percent` TEXT NOT NULL,"
# "`reviews_recent_positive` TEXT NOT NULL,"
# "`reviews_recent_positive_percent` TEXT NOT NULL,"
# "`tags` TEXT DEFAULT NULL,"
# "`review_all` TEXT DEFAULT NULL,"
# "`review_positive` TEXT DEFAULT NULL,"
# "`review_negative` TEXT DEFAULT NULL,"
# "`review_purchase_steam` TEXT DEFAULT NULL,"
# "`review_purchase_cd_key` TEXT DEFAULT NULL,"
# "`review_chinese_language` TEXT DEFAULT NULL,"
# "`achievements` TEXT DEFAULT NULL,"
# "`curators` TEXT DEFAULT NULL,"
# "`category` TEXT NOT NULL,"
# "`genre` TEXT NOT NULL,"
# "`developer` TEXT NOT NULL,"
# "`publisher` TEXT NOT NULL,"
# "`release_date` TEXT NOT NULL,"
# "`dlc_number` TEXT DEFAULT NULL,"
# "`dlc_names` TEXT DEFAULT NULL,"
# "`dlc_prices` TEXT DEFAULT NULL,"
"`url` TEXT NOT NULL,"
# "`language_number` TEXT DEFAULT NULL,"
# "`languages` TEXT DEFAULT NULL,"
# "`description` TEXT NOT NULL,"
# "`save_time` TIMESTAMP NOT NULL,"
"PRIMARY KEY(id)"
") ENGINE=InnoDB".format(table_name))
# command = (
# "CREATE TABLE IF NOT EXISTS {} ("
# "`id` INT(8) NOT NULL AUTO_INCREMENT UNIQUE,"
# "`name` TEXT NOT NULL,"
# "`price` INT(4) DEFAULT NULL,"
# "`metacritic_score` INT(3) DEFAULT NULL,"
# "`reviews_overall_positive` INT(6) NOT NULL,"
# "`reviews_overall_positive_percent` CHAR(3) NOT NULL,"
# "`reviews_recent_positive` INT(6) NOT NULL,"
# "`reviews_recent_positive_percent` CHAR(3) NOT NULL,"
# "`tags` TEXT DEFAULT NULL,"
# "`review_all` INT(7) DEFAULT NULL,"
# "`review_positive` INT(7) DEFAULT NULL,"
# "`review_negative` INT(7) DEFAULT NULL,"
# "`review_purchase_steam` INT(7) DEFAULT NULL,"
# "`review_purchase_cd_key` INT(7) DEFAULT NULL,"
# "`review_chinese_language` INT(7) DEFAULT NULL,"
# "`achievements` INT(7) DEFAULT NULL,"
# "`curators` INT(5) DEFAULT NULL,"
# "`category` TEXT NOT NULL,"
# "`genre` TEXT NOT NULL,"
# "`developer` TEXT NOT NULL,"
# "`publisher` TEXT NOT NULL,"
# "`release_date` DATE NOT NULL,"
# "`dlc_number` INT(4) DEFAULT NULL,"
# "`dlc_names` TEXT DEFAULT NULL,"
# "`dlc_prices` TEXT DEFAULT NULL,"
# "`url` TEXT NOT NULL,"
# "`language_number` INT(2) DEFAULT NULL,"
# "`languages` TEXT DEFAULT NULL,"
# "`description` TEXT NOT NULL,"
# "`save_time` TIMESTAMP NOT NULL,"
# "PRIMARY KEY(id)"
# ") ENGINE=InnoDB".format(table_name))
return command
def get_insert_data_command(table_name):
command = ("INSERT IGNORE INTO {} "
"(id, name, price, url)"
"VALUES(%s, %s, %s, %s)".format(table_name))
# command = ("INSERT IGNORE INTO {} "
# "(id, name, price, metacritic_score, reviews_overall_positive, reviews_overall_positive_percent, "
# "reviews_recent_positive, reviews_recent_positive_percent, tags, review_all, review_positive, "
# "review_negative, review_purchase_steam, review_purchase_cd_key, review_chinese_language, "
# "achievements, curators, category, genre, developer, publisher, release_date, dlc_number, dlc_names, "
# "dlc_prices, url, language_number, languages, description, save_time)"
# "VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, "
# "%s, %s, %s, %s, %s, %s, %s)".format(table_name))
return command