Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuSeiri committed Mar 10, 2024
1 parent b0e4f8a commit 1d40a8a
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 843 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: excute python
run: |
git config --local user.email "bot@github.com"
git config --local user.name "bot"
git remote set-url origin https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git pull --rebase
echo $AUTHORIZATION
python get_qiita_article.py
git add .
git commit -m "update by github action"
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
412 changes: 0 additions & 412 deletions archive/total_result.csv

This file was deleted.

1 change: 1 addition & 0 deletions archive/total_result.json

Large diffs are not rendered by default.

48 changes: 30 additions & 18 deletions get_qiita_article.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import os
import util
import requests
import pandas as pd
from datetime import datetime as dt

URL = 'https://qiita.com/api/v2'
URL = 'https://qiita.com/'

HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
'Authorization' : os.environ['AUTHORIZATION']
'Authorization': os.environ.get('AUTHORIZATION')
}


def get_qiita_articles_by_page(page):
'''
第何ページ目の文章を取得
Expand All @@ -20,9 +21,10 @@ def get_qiita_articles_by_page(page):
params = {
'page': page,
'per_page': 100,
'query': 'likes_count:>0'
'query': 'stocks:>0'
}
response = requests.get(f'{URL}/items', headers=HEADERS, params=params)
response = requests.get(f'{URL}api/v2/items',
headers=HEADERS, params=params)
if response.status_code == 200:
article = response.json()
return article
Expand All @@ -35,7 +37,7 @@ def get_qiita_articles():
'''
articles = []
for i in range(1, 9999):
for i in range(1, 2):
article = get_qiita_articles_by_page(i)
if article:
articles.extend(article)
Expand All @@ -44,23 +46,33 @@ def get_qiita_articles():
return articles


def out_put_articels(articles, file_name):
def out_put_articels(datas, file_name):
'''
qiitaの文章を出力
'''
if articles:
df = pd.DataFrame(articles)
df.drop(columns=['id', 'rendered_body', 'body', 'coediting', 'group', 'private', 'reactions_count',
'stocks_count', 'tags', 'updated_at', 'team_membership', 'organization_url_name', 'page_views_count'], axis=1, inplace=True)
df.sort_values(by='likes_count', axis=0, ascending=False, inplace=True)
df = df[df['likes_count'] > 0]
df = df.reindex(columns=['title', 'url', 'likes_count', 'comments_count', 'tags' , 'created_at', 'user'])
df.to_csv(file_name, encoding='utf-8', index=False)
if datas:
articles = [{
'title': article.get('title'),
'url': article.get('url'),
'likes_count': article.get('likes_count'),
'stocks_count': article.get('stocks_count'),
'tags': ','.join([tag.get('name') for tag in article.get('tags')]),
'created_at': article.get('created_at'),
'user_id': article.get('user').get('id'),
'user_name': article.get('user').get('name'),
'user_url': f'{URL}{article.get("user").get("id")}'} for article in datas]
util.sort_articles(articles, 'stocks_count')
file_name = util.make_path(file_name)
util.write_json_file(articles, file_name)
else:
print('文章を取得できなかった!')


if __name__ == '__main__':
date = dt.now().strftime('%Y%m%d')
def start():
articles = get_qiita_articles()
out_put_articels(articles, f'./archive/total_result.csv')
out_put_articels(articles, 'archive/total_result.json')
out_put_articels(articles, 'total_result.json')


if __name__ == '__main__':
start()
412 changes: 0 additions & 412 deletions total_result.csv

This file was deleted.

1 change: 1 addition & 0 deletions total_result.json

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
import json


def merge_data(current_data, before_data):
'''
データをマージする
param:current_data 現在のデータ
param:before_data 過去のデータ
'''
tmp_obj = {}
if current_data and before_data:
concat_data = before_data + current_data
for data in concat_data:
tmp_obj[data.get('href')] = data.copy()
merge_datas = [tmp_obj.get(title) for title in tmp_obj]
return merge_datas
elif current_data:
return current_data
else:
return before_data


def sort_articles(articles, sort_name):
'''
ソートする
'''
return sorted(articles, key=lambda x: int(x[sort_name]), reverse=False)


def make_path(path):
'''
パスを作成する
'''
paths = path.split('/')
if len(paths) > 1 and not os.path.exists('/'.join(paths[:-1])):
os.makedirs('/'.join(paths[:-1]))
return path


def write_json_file(data, path):
'''
jsonファイル作成
'''
jsonstr = json.dumps(data, ensure_ascii=False)
with open(path, 'w', encoding='utf-8') as f:
f.write(jsonstr)

0 comments on commit 1d40a8a

Please sign in to comment.