From 993706298d2676c3e98392f9cee5f573770bc9f2 Mon Sep 17 00:00:00 2001 From: AntiViruS90 <131804906+AntiViruS90@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:09:22 +0300 Subject: [PATCH] Refactoring app files --- build.sh | 2 +- database.sql | 18 ++ page_analyzer.sql | 19 -- page_analyzer/app.py | 125 +++++++------ page_analyzer/db.py | 202 ++++++++++---------- page_analyzer/html_parser.py | 34 +--- page_analyzer/templates/base.html | 48 ++--- page_analyzer/templates/details.html | 48 +++-- page_analyzer/templates/errors/404.html | 7 +- page_analyzer/templates/errors/500.html | 7 +- page_analyzer/templates/index.html | 25 +-- page_analyzer/templates/list.html | 103 +++++----- page_analyzer/url_validator.py | 8 + page_analyzer/utils.py | 16 -- poetry.lock | 237 +++++++++++++++++++++++- pyproject.toml | 1 + 16 files changed, 550 insertions(+), 350 deletions(-) create mode 100644 database.sql delete mode 100644 page_analyzer.sql create mode 100644 page_analyzer/url_validator.py delete mode 100644 page_analyzer/utils.py diff --git a/build.sh b/build.sh index 44fab98..32a66bf 100644 --- a/build.sh +++ b/build.sh @@ -1,3 +1,3 @@ #!/usr/bin/env bash -make install && psql -a -d $DATABASE_URL -f page_analyzer.sql \ No newline at end of file +make install && psql -a -d $DATABASE_URL -f database.sql \ No newline at end of file diff --git a/database.sql b/database.sql new file mode 100644 index 0000000..9f50d5f --- /dev/null +++ b/database.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS url_checks; +DROP TABLE IF EXISTS urls; + +CREATE TABLE urls ( + id SERIAL PRIMARY KEY, + name varchar(255) UNIQUE NOT NULL, + created_at date DEFAULT CURRENT_DATE +); + +CREATE TABLE url_checks ( + id SERIAL PRIMARY KEY, + url_id int, + status_code int, + h1 varchar(255), + title varchar(255), + description text, + created_at date DEFAULT CURRENT_DATE +) \ No newline at end of file diff --git a/page_analyzer.sql b/page_analyzer.sql deleted file mode 100644 index a2b65f8..0000000 --- a/page_analyzer.sql +++ /dev/null @@ -1,19 +0,0 @@ -DROP TABLE IF EXISTS urls; -DROP TABLE IF EXISTS url_checks; - - -CREATE TABLE IF NOT EXISTS urls ( - id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY, - name varchar(255) NOT NULL UNIQUE, - created_at DATE DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE IF NOT EXISTS url_checks ( - id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY, - url_id bigint REFERENCES urls(id) ON DELETE CASCADE, - status_code int, - h1 varchar(255), - title varchar(255), - description varchar(255), - created_at DATE DEFAULT CURRENT_TIMESTAMP -); \ No newline at end of file diff --git a/page_analyzer/app.py b/page_analyzer/app.py index 603c45d..202cc64 100644 --- a/page_analyzer/app.py +++ b/page_analyzer/app.py @@ -1,105 +1,114 @@ +import os +import requests +from dotenv import load_dotenv +from urllib.parse import urlparse +from page_analyzer.url_validator import validate +from page_analyzer.html_parser import parse_page from flask import ( Flask, render_template, request, - flash, - redirect, url_for, - abort + redirect, + flash, + get_flashed_messages, +) +from page_analyzer.db import ( + add_url_to_db, + get_url_by_id, + get_url_by_name, + add_check_to_db, + get_checks_desc, + get_urls_with_latest_check ) -from page_analyzer.db import DbManager -from page_analyzer.html_parser import HTMLParser -from page_analyzer.utils import normalize_url, validate_url -import os -import requests -from dotenv import load_dotenv load_dotenv() app = Flask(__name__) - app.config['SECRET_KEY'] = os.getenv('SECRET_KEY') -app.config['DATABASE_URL'] = os.getenv('DATABASE_URL') - -db_manager = DbManager(app) @app.errorhandler(404) -def page_not_found(error): +def page_not_found(e): return render_template('errors/404.html'), 404 @app.errorhandler(500) -def internal_server_error(error): +def internal_server_error(e): return render_template('errors/500.html'), 500 -@app.route('/') -def index(): - return render_template('index.html'), 200 +@app.get('/') +def page_analyzer(): + message = get_flashed_messages(with_categories=True) + return render_template('index.html', message=message) -@app.post('/urls') -def show_url_page(): - url_check = request.form.get('url') - normal_url = normalize_url(url_check) - validation_error = validate_url(normal_url) - if validation_error: - flash(validation_error, 'danger') - return render_template('index.html'), 422 +@app.post('/urls') +def add_url(): + new_url = request.form.get('url') - url_id = db_manager.get_url_by_name(normal_url) + error = validate(new_url) - if url_id: - flash('Страница уже существует', 'warning') - return redirect(url_for('get_url_list', id=url_id)) + if error: + flash(f'{error}', 'danger') + message = get_flashed_messages(with_categories=True) + return render_template('index.html', message=message), 422 - url = db_manager.insert_url_in_db(normal_url) - flash('Страница успешно добавлена', 'success') + parsed_url = urlparse(new_url) + normal_url = f"{parsed_url.scheme}://{parsed_url.netloc}" - return redirect(url_for('get_url_list', id=url.id)) + if get_url_by_name(normal_url): + old_url_data = get_url_by_name(normal_url) + flash('Страница уже существует', 'primary') + return redirect(url_for('show_url', id=old_url_data[0].id)) -@app.get('/urls') -def urls(): - all_urls = db_manager.get_urls_list() + add_url_to_db(normal_url) + new_url_data = get_url_by_name(normal_url) + flash('Страница успешно добавлена', 'success') - return render_template('details.html', urls=all_urls) + return redirect(url_for('show_url', id=new_url_data[0].id)) -@app.get('/urls/') -def get_url_list(id): - url = db_manager.get_url_from_urls_list(id) +@app.get('/urls') +def show_all_urls(): + all_urls = get_urls_with_latest_check() + message = get_flashed_messages(with_categories=True) + return render_template('details.html', all_urls=all_urls, message=message) - if not url: - abort(404) - check_records = db_manager.get_url_from_urls_checks_list(id) - return render_template('list.html', - url=url, checks_list=check_records) +@app.get('/urls/') +def show_url(id): + url_data = get_url_by_id(id) + all_checks = get_checks_desc(id) + message = get_flashed_messages(with_categories=True) + return render_template( + 'list.html', + url_data=url_data, + all_checks=all_checks, + message=message + ) -@app.post('/urls//check') -def check_url(url_id): - url_record = db_manager.get_url_from_urls_list(url_id) - if not url_record: - abort(404) +@app.post('/urls//checks') +def add_check(id): + url = get_url_by_id(id) try: - response = requests.get(url_record.name) + response = requests.get(url[0].name) response.raise_for_status() + except requests.exceptions.RequestException: flash('Произошла ошибка при проверке', 'danger') - return redirect(url_for('get_url_list', id=url_id)) - page_content = response.content - page_parser = HTMLParser(page_content) - page_data = page_parser.get_data_page() - full_check = dict(page_data, url_id=url_id, response=response.status_code) + return redirect(url_for('show_url', id=id)) - db_manager.insert_url_check_in_db(full_check) + status_code = response.status_code + page_data = parse_page(response.text) + add_check_to_db(id, status_code, page_data) flash('Страница успешно проверена', 'success') - return redirect(url_for('get_url_list', id=url_id)) + return redirect(url_for('show_url', id=id)) diff --git a/page_analyzer/db.py b/page_analyzer/db.py index 35f6f55..029a98d 100644 --- a/page_analyzer/db.py +++ b/page_analyzer/db.py @@ -1,103 +1,107 @@ import psycopg2 +import os +from dotenv import load_dotenv from psycopg2.extras import NamedTupleCursor +load_dotenv() -class DbManager: - - def __init__(self, app): - self.app = app - - @staticmethod - def exec_with_in_db(commit): - def flag(func): - def inner(self, *args, **kwargs): - try: - with psycopg2.connect( - self.app.config['DATABASE_URL'] - ) as conn: - with conn.cursor( - cursor_factory=NamedTupleCursor - ) as cursor: - result = func(self, cursor, *args, **kwargs) - if commit: - conn.commit() - return result - else: - conn.commit() - return result - except psycopg2.Error as e: - print(f"Ошибка при выполнении транзакции {e}") - raise e - - return inner - - return flag - - @exec_with_in_db(commit=True) - def insert_url_in_db(self, cursor, url): - cursor.execute( - "INSERT INTO urls (name) VALUES (%s) RETURNING *", - (url,) - ) - url_data = cursor.fetchone() - return url_data - - @exec_with_in_db(commit=True) - def insert_url_check_in_db(self, cursor, check): - cursor.execute( - "INSERT INTO url_checks(" - "url_id, " - "status_code, " - "h1, " - "title, " - "description" - ")" - - "VALUES (%s, %s, %s, %s, %s)", - ( - check['url_id'], - check['response'], - check['h1'], - check['title'], - check.get('description', ' ') - ) - ) - - @exec_with_in_db(commit=False) - def get_url_from_urls_list(self, cursor, url_id): - cursor.execute("SELECT * FROM urls WHERE id=%s", (url_id,)) - decired_url = cursor.fetchone() - return decired_url if decired_url else False - - @exec_with_in_db(commit=False) - def get_url_from_urls_checks_list(self, cursor, url_id): - cursor.execute( - "SELECT * FROM url_checks WHERE url_id=%s " - "ORDER BY id DESC", (url_id, ) - ) - result = cursor.fetchall() - - return result - - @exec_with_in_db(commit=True) - def get_url_by_name(self, cursor, url): - cursor.execute("SELECT * FROM urls WHERE name=%s", (url, )) - url_id = cursor.fetchone() - - return url_id.id if url_id else None - - @exec_with_in_db(commit=False) - def get_urls_list(self, cursor): - query = ( - "SELECT DISTINCT ON (urls.id) urls.id AS id, " - "url_checks.id AS check_id, " - "url_checks.status_code AS status_code, " - "url_checks.created_at AS created_at, " - "urls.name AS name " - "FROM urls " - "LEFT JOIN url_checks ON urls.id = url_checks.url_id " - "ORDER BY urls.id DESC, check_id DESC" - ) - cursor.execute(query) - - return cursor.fetchall() +DATABASE_URL = os.getenv('DATABASE_URL') + + +def get_connection(): + return psycopg2.connect(DATABASE_URL) + + +def fetch_all(connection, query, values=()): + with connection.cursor(cursor_factory=NamedTupleCursor) as cur: + cur.execute(query, values) + + data = cur.fetchall() + connection.commit() + connection.close() + return data + + +def add_url_to_db(url): + conn = get_connection() + with conn.cursor() as cur: + cur.execute("INSERT INTO urls (name) VALUES (%s)", (url,)) + conn.commit() + conn.close() + + +def get_url_by_name(url): + conn = get_connection() + query = "SELECT * FROM urls WHERE name = %s" + value = (url,) + + url_data = fetch_all(conn, query, value) + + return url_data + + +def get_url_by_id(url_id): + conn = get_connection() + query = "SELECT * FROM urls WHERE id = %s" + value = (url_id,) + + url_data = fetch_all(conn, query, value) + + return url_data + + +def add_check_to_db(url_id, status_code, page_data): + conn = get_connection() + + with conn.cursor() as cur: + cur.execute("INSERT INTO url_checks (" + "url_id, " + "status_code," + "h1, " + "title, " + "description " + ") " + "VALUES (%s, %s, %s, %s, %s)", + (url_id, + status_code, + page_data['h1'], + page_data['title'], + page_data['description'] + ) + ) + conn.commit() + conn.close() + + +def get_urls_with_latest_check(): + conn = get_connection() + query = "SELECT urls.id, " \ + "urls.name, " \ + "COALESCE(url_checks.status_code::text, '') as status_code, " \ + "COALESCE(MAX(url_checks.created_at)::text, '') as latest_check " \ + "FROM urls " \ + "LEFT JOIN url_checks ON urls.id = url_checks.url_id " \ + "GROUP BY urls.id, url_checks.status_code " \ + "ORDER BY urls.id DESC" + + all_urls_with_latest_check = fetch_all(conn, query) + + return all_urls_with_latest_check + + +def get_checks_desc(url_id): + conn = psycopg2.connect(DATABASE_URL) + query = "SELECT id, " \ + "status_code, " \ + "COALESCE(h1, '') as h1, " \ + "COALESCE(title, '') as title, " \ + "COALESCE(description, '') as description, " \ + "created_at::text " \ + "FROM url_checks " \ + "WHERE url_id = %s " \ + "ORDER BY id DESC" + value = (url_id,) + + all_checks = fetch_all(conn, query, value) + + return all_checks diff --git a/page_analyzer/html_parser.py b/page_analyzer/html_parser.py index bd2fdf7..bc44dba 100644 --- a/page_analyzer/html_parser.py +++ b/page_analyzer/html_parser.py @@ -1,31 +1,15 @@ from bs4 import BeautifulSoup -class HTMLParser: +def parse_page(response_text): + html_data = BeautifulSoup(response_text, 'html.parser') + page_data = {'title': html_data.title.string if html_data.title else None, + 'h1': html_data.h1.string if html_data.h1 else None} - def __init__(self, html): - self.soup = BeautifulSoup(html, 'html.parser') + description = html_data.find('meta', {'name': 'description'}) + if description: + description = description.get('content') - def get_title(self): - title_tag = self.soup.title - return title_tag.string if title_tag else None + page_data['description'] = description - def get_h1(self): - h1_tag = self.soup.h1 - return h1_tag.string if h1_tag else None - - def get_content(self): - content = [meta.get('content') - for meta in self.soup.find_all('meta') - if meta.get('name') == 'description' - ] - - return content[0][:255] if content else None - - def get_data_page(self): - result = { - 'title': self.get_title(), - 'h1': self.get_h1(), - 'content': self.get_content() - } - return result + return page_data diff --git a/page_analyzer/templates/base.html b/page_analyzer/templates/base.html index b116ae0..53580aa 100644 --- a/page_analyzer/templates/base.html +++ b/page_analyzer/templates/base.html @@ -1,26 +1,26 @@ - + + - - - - Анализатор страниц + + + {% block title %}Анализатор страниц{% endblock %} - +
- - {# Flash messages #} + {# Flash messages #} {% with messages = get_flashed_messages(with_categories=true) %} {% if messages %} {% for category, message in messages %} @@ -29,27 +29,13 @@ {% endif %} {% endwith %} -
+
{% block content %}{% endblock %} -
- -