From 908ff825aef6459a4d8931c9eed493964aa7e9c9 Mon Sep 17 00:00:00 2001 From: holysoles Date: Wed, 18 Dec 2024 00:03:32 +0000 Subject: [PATCH] feat(seo): sitemap and robots.txt --- .github/workflows/image_build.yaml | 4 ++ Dockerfile | 10 +++-- app.py | 29 +++++++++++-- static/robots.txt | 67 ++++++++++++++++++++++++++++++ templates/sitemap.xml.j2 | 25 +++++++++++ 5 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 static/robots.txt create mode 100644 templates/sitemap.xml.j2 diff --git a/.github/workflows/image_build.yaml b/.github/workflows/image_build.yaml index e855205..77e7d30 100644 --- a/.github/workflows/image_build.yaml +++ b/.github/workflows/image_build.yaml @@ -29,6 +29,8 @@ jobs: uses: docker/metadata-action@v5.5.1 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + - name: get current date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - name: image build and push uses: docker/build-push-action@v6.2.0 with: @@ -36,3 +38,5 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + "BUILD_DATE=${{ steps.date.outputs.date }}" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index f57a490..78797a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,12 @@ -# Use the official Python 3.8 slim image as the base image +ARG BUILD_DATE + FROM python:3.10-slim WORKDIR /app +ENV PORT 5000 +ENV BUILD_DATE=$BUILD_DATE + # Copy the necessary files and directories into the container COPY static/ ./static/ COPY templates/ ./templates/ @@ -12,5 +16,5 @@ COPY app.py requirements.txt ./ # Upgrade pip and install Python dependencies RUN pip3 install --upgrade pip && pip install --no-cache-dir -r requirements.txt -EXPOSE 5000 -CMD ["gunicorn", "app:app", "-b", "0.0.0.0:5000", "-w", "4"] +EXPOSE ${PORT}/tcp +CMD ['gunicorn', 'app:app', '-b', '"0.0.0.0:${PORT}"', '-w', '4'] diff --git a/app.py b/app.py index c771bef..fe02c02 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,14 @@ import re -from os import listdir +from os import listdir, getenv from os.path import join, splitext import yaml -from flask import Flask, request, render_template +from flask import Flask, request, Response, render_template, url_for from werkzeug.middleware.proxy_fix import ProxyFix from flask_minify import minify -app = Flask(__name__) +DEFAULT_LAST_MOD = getenv('BUILD_DATE') + +app = Flask(__name__, static_folder='static', static_url_path='') minify(app=app, html=True, js=True, cssless=True) @@ -103,4 +105,23 @@ def contact(): @app.route("/projects", methods=["GET"]) def projects(): - return render_template('projects.html.j2') \ No newline at end of file + return render_template('projects.html.j2') + +def has_no_empty_params(rule): + defaults = rule.defaults if rule.defaults is not None else () + arguments = rule.arguments if rule.arguments is not None else () + return len(defaults) >= len(arguments) + +@app.route("/sitemap.xml", methods=["GET"]) +def sitemap(): + static_pages = [] + for rule in app.url_map.iter_rules(): + if "GET" in rule.methods and has_no_empty_params(rule): + url = url_for(rule.endpoint, **(rule.defaults or {})) + static_pages.append(url) + + _, timeline = get_posts() + xml = render_template('sitemap.xml.j2', host_url=request.host_url[:-1], static_pages=static_pages, blog_posts=timeline, default_last_mod=DEFAULT_LAST_MOD) + r = Response(response=xml, status=200, mimetype="application/xml") + r.headers["Content-Type"] = "text/xml; charset=utf-8" + return r diff --git a/static/robots.txt b/static/robots.txt new file mode 100644 index 0000000..3cbd73a --- /dev/null +++ b/static/robots.txt @@ -0,0 +1,67 @@ +User-agent: Mediapartners-Google +Disallow: / + +User-agent: SemrushBot +Disallow: / + +user-agent: Pinterestbot +disallow: / + +User-agent: AhrefsBot +Disallow: / + +User-agent: dotbot +Disallow: / + +User-agent: Semrush +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: OAI-SearchBot +Disallow: / + +User-agent: PerplexityBot +Disallow: / + +User-agent: Amazonbot +Disallow: / + +User-agent: ClaudeBot +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Claude-Web +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-Agent: Applebot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: YouBot +Disallow: / + +User-agent: CCBot +Disallow: / + +Sitemap: https://pve.dev/sitemap.xml \ No newline at end of file diff --git a/templates/sitemap.xml.j2 b/templates/sitemap.xml.j2 new file mode 100644 index 0000000..f324248 --- /dev/null +++ b/templates/sitemap.xml.j2 @@ -0,0 +1,25 @@ + + + + + {% for page in static_pages %} + + {{ host_url + page }} + {{ default_last_mod }} + + {% endfor %} + {% for year in blog_posts %} + {% for month in blog_posts[year]|sort(reverse=True) %} + {% for day in blog_posts[year][month]|sort %} + + {{ host_url + blog_posts[year][month][day]['link_to_post'] }} + {{year}}-{{month}}-{{day}} + + {% endfor %} + {% endfor %} + {% endfor %} + \ No newline at end of file