add sitemap plugin

remove projects and use cathegories instead
6 years ago · 812f1da659
13 changed files with 417 additions and 37 deletions
--- a/content/pages/projects/01_3d_printer.md
+++ b/content/pages/projects/01_3d_printer.md
@ -1,10 +1,8 @@
-title: Projects
+title: 3d printer
 date: 2019-05-20
 author: Philipp Schönberger
-template: page
+tags: 3d printer, ender2, cat
 category: projects
-url: projects/01_3d_printer.html
-save_as: projects/01_3d_printer.html

 Here you can view the.. latest projects i've started.

--- a/content/blog/03_quadrocopter_en.md
+++ b/content/blog/03_quadrocopter_en.md
@ -0,0 +1,9 @@
+title: quadrocopter
+date: 2019-05-20
+author: Philipp Schönberger
+tags: bataflight, quadrocopter, fpv
+category: projects
+
+
+Here you can view the.. latest projects i've started.
+
--- a/content/extra/robots.txt
+++ b/content/extra/robots.txt
@ -0,0 +1,6 @@
+User-agent: *
+Disallow: /stats/
+Allow: /
+
+Sitemap: http://www.phschoen.de/sitemap.xml
+
--- a/content/pages/index_de.md
+++ b/content/pages/index_de.md
@ -2,17 +2,18 @@ title: home
 author: Philipp Schönberger
 lang: de
 template: page
+save_as: index.html

-Welcome to my webpage.
+Willkommen auf meiner Website.

-It contains things I work or worked on and lot of other things I had fun with.
+Hier kannst du Arbeiten und Projekte finden an denen ich gearbeitet habe oder Dinge an denen ich Freude hatte.

-I’m Philipp Schönberger born in in Zeitz(Germany) within the 1988.
-In Constance I made my bachelor degree within ‘computer engineering‘.
-Afterwards I absolved my master degree within the ‘robotics and intelligent embedded systems‘ in Lübeck.
-Currently I am employed as software engineer/architect for a satellite aided router in Germany near the Lake Constance.
+Ich selbst bin Philipp Schönberger in Zeitz im Jahre 1988 geboren .
+In Konstanz habe ich meinen Bachelor Abschluss gemacht in "Technischer Informatik"
+Anschließend habe ich noch meinen Master in "Robotik und Intelligente eingebettete Systeme' in Lübeck gemacht.
+
+Im Moment bin ich als Software-Ingenieur und -Architekt für einen Satelliten gestützten Router am Standort Bodensee angestellt.

-Enjoy your stay.

 ## Contact ##

--- a/content/pages/index_en.md
+++ b/content/pages/index_en.md
@ -2,6 +2,8 @@ title: home
 author: Philipp Schönberger
 lang: en
 template: page
+save_as: index.html
+

 Welcome to my webpage.

--- a/content/pages/projects/projects.md
+++ b/content/pages/projects/projects.md
@ -1,14 +0,0 @@
-title: Projects
-date: 2019-05-20
-author: Philipp Schönberger
-template: projects
-category: projects
-url: projects/index.html
-save_as: projects/index.html
-
-
-Here you can view the latest projects i've started.
-
-This page contains tutorials, rough drafts I work on. The articles in here aren't necessarily finished and some of them aren't open to discussion, because they      are work in progress (or chaos). Some of them <em>might</em> be turned into self-contained blog posts, some of them will never get out of this dungeon.
-
-
--- a/pelicanconf.py
+++ b/pelicanconf.py
@ -10,6 +10,15 @@ EMAIL = 'mail AT phschoen.de'
 SITENAME = 'https://phschoen.de'
 OUTPUT_PATH = 'output/'

+STATIC_PATHS = [
+    'images',
+    'extra',  # this
+]
+EXTRA_PATH_METADATA = {
+    'extra/robots.txt': {'path': 'robots.txt'},
+    'extra/favicon.ico': {'path': 'favicon.ico'},
+}
+
 # Base URL this page is hosted at:
 SITENAME = 'phschoen.de'
 # SITEURL = 'http://skylx125.ndsatcom.com:8000'
@ -104,7 +113,6 @@ INDEX_SAVE_AS = 'blog/index.html'

 # Navigation menu:
 SECTIONS = [('blog', '/blog'),
-            ('Projects', '/projects'),
            ('Gallery', '/gallery'),
            ('about', '/about'),
            ('Impressum', '/impressum'),
@ -127,13 +135,25 @@ DEFAULT_CATEGORY = 'uncategorized'
 # DEFAULT_PAGINATION = 10

 PLUGIN_PATHS = ['plugins', ]
-PLUGINS = ['lightgallery', 'i18n_subsites']
-
+PLUGINS = ['lightgallery', 'i18n_subsites', 'sitemap']
+
+SITEMAP = {
+    'format': 'xml',
+    'priorities': {
+        'articles': 0.5,
+        'indexes': 0.5,
+        'pages': 0.5
+    },
+    'changefreqs': {
+        'articles': 'weekly',
+        'indexes': 'daily',
+        'pages': 'weekly'
+    }
+}

 GITHUB_SOURCE_PATH = "wooot"

-I18N_SUBSITES = {
-                 'de': {'SITENAME': 'phschoen.de',
+I18N_SUBSITES = {'de': {'SITENAME': 'phschoen.de',
                        'THEME_STATIC_DIR': 'theme',
                        'OUTPUT_PATH': './output/de',
                        },
@ -145,12 +165,14 @@ I18N_SUBSITES = {
                 }

 languages_lookup = {'en': {'name': 'English',
-                           'icon': SITEURL+'/images/en.jpg'
+                           'icon': SITEURL + '/images/en.jpg'
                           },
                    'de': {'name': 'Deutsch',
-                           'icon': SITEURL+'/images/de.jpg'
+                           'icon': SITEURL + '/images/de.jpg'
                           },
                    }
+
+
 def lookup_lang_ico(lang_code):
    return languages_lookup[lang_code]['icon']

@ -165,10 +187,12 @@ def getGitHubPage(source_file):


 def getBasename(path):
+    import ntpath
    return ntpath.basename(path)


 def month_name(month_number):
+    import calendar
    return calendar.month_name[month_number]


--- a/plugins/sitemap/Readme.rst
+++ b/plugins/sitemap/Readme.rst
@ -0,0 +1,78 @@
+Sitemap
+-------
+
+This plugin generates plain-text or XML sitemaps. You can use the ``SITEMAP``
+variable in your settings file to configure the behavior of the plugin.
+
+The ``SITEMAP`` variable must be a Python dictionary and can contain these keys:
+
+- ``format``, which sets the output format of the plugin (``xml`` or ``txt``)
+
+- ``priorities``, which is a dictionary with three keys:
+
+  - ``articles``, the priority for the URLs of the articles and their
+    translations
+
+  - ``pages``, the priority for the URLs of the static pages
+
+  - ``indexes``, the priority for the URLs of the index pages, such as tags,
+     author pages, categories indexes, archives, etc...
+
+  All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
+
+- ``changefreqs``, which is a dictionary with three items:
+
+  - ``articles``, the update frequency of the articles
+
+  - ``pages``, the update frequency of the pages
+
+  - ``indexes``, the update frequency of the index pages
+
+  Valid frequency values are ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
+  ``yearly`` and ``never``.
+
+You can exclude URLs from being included in the sitemap via regular expressions.
+For example, to exclude all URLs containing ``tag/`` or ``category/`` you can
+use the following ``SITEMAP`` setting.
+
+.. code-block:: python
+
+    SITEMAP = {
+        'exclude': ['tag/', 'category/']
+    }
+
+If a key is missing or a value is incorrect, it will be replaced with the
+default value.
+
+You can also exclude an individual URL by adding metadata to it setting ``private``
+to ``True``.
+
+The sitemap is saved in ``<output_path>/sitemap.<format>``.
+
+.. note::
+   ``priorities`` and ``changefreqs`` are information for search engines.
+   They are only used in the XML sitemaps.
+   For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
+
+**Example**
+
+Here is an example configuration (it's also the default settings):
+
+.. code-block:: python
+    # Where your plug-ins reside
+    PLUGIN_PATHS = ['/where/you/cloned/it/pelican-plugins/', ]
+    PLUGINS=['sitemap',]
+
+    SITEMAP = {
+        'format': 'xml',
+        'priorities': {
+            'articles': 0.5,
+            'indexes': 0.5,
+            'pages': 0.5
+        },
+        'changefreqs': {
+            'articles': 'monthly',
+            'indexes': 'daily',
+            'pages': 'monthly'
+        }
+    }
--- a/plugins/sitemap/init.py
+++ b/plugins/sitemap/init.py
@ -0,0 +1 @@
+from .sitemap import *
--- a/plugins/sitemap/init.pyc
+++ b/plugins/sitemap/init.pyc
--- a/plugins/sitemap/sitemap.py
+++ b/plugins/sitemap/sitemap.py
@ -0,0 +1,273 @@
+# -*- coding: utf-8 -*-
+'''
+Sitemap
+-------
+
+The sitemap plugin generates plain-text or XML sitemaps.
+'''
+
+from __future__ import unicode_literals
+
+import re
+import collections
+import os.path
+
+from datetime import datetime
+from logging import warning, info
+from codecs import open
+from pytz import timezone
+
+from pelican import signals, contents
+from pelican.utils import get_date
+
+TXT_HEADER = """{0}/index.html
+{0}/archives.html
+{0}/tags.html
+{0}/categories.html
+"""
+
+XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+"""
+
+XML_URL = """
+<url>
+<loc>{0}/{1}</loc>
+<lastmod>{2}</lastmod>
+<changefreq>{3}</changefreq>
+<priority>{4}</priority>
+</url>
+"""
+
+XML_FOOTER = """
+</urlset>
+"""
+
+
+def format_date(date):
+    if date.tzinfo:
+        tz = date.strftime('%z')
+        tz = tz[:-2] + ':' + tz[-2:]
+    else:
+        tz = "-00:00"
+    return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
+
+
+class SitemapGenerator(object):
+
+    def __init__(self, context, settings, path, theme, output_path, *null):
+
+        self.output_path = output_path
+        self.context = context
+        self.now = datetime.now()
+        self.siteurl = settings.get('SITEURL')
+
+        self.default_timezone = settings.get('TIMEZONE', 'UTC')
+        self.timezone = getattr(self, 'timezone', self.default_timezone)
+        self.timezone = timezone(self.timezone)
+
+        self.format = 'xml'
+
+        self.changefreqs = {
+            'articles': 'monthly',
+            'indexes': 'daily',
+            'pages': 'monthly'
+        }
+
+        self.priorities = {
+            'articles': 0.5,
+            'indexes': 0.5,
+            'pages': 0.5
+        }
+
+        self.sitemapExclude = []
+
+        config = settings.get('SITEMAP', {})
+
+        if not isinstance(config, dict):
+            warning("sitemap plugin: the SITEMAP setting must be a dict")
+        else:
+            fmt = config.get('format')
+            pris = config.get('priorities')
+            chfreqs = config.get('changefreqs')
+            self.sitemapExclude = config.get('exclude', [])
+
+            if fmt not in ('xml', 'txt'):
+                warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
+                warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
+            elif fmt == 'txt':
+                self.format = fmt
+                return
+
+            valid_keys = ('articles', 'indexes', 'pages')
+            valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never')
+
+            if isinstance(pris, dict):
+                # We use items for Py3k compat. .iteritems() otherwise
+                for k, v in pris.items():
+                    if k in valid_keys and not isinstance(v, (int, float)):
+                        default = self.priorities[k]
+                        warning("sitemap plugin: priorities must be numbers")
+                        warning("sitemap plugin: setting SITEMAP['priorities']"
+                                "['{0}'] on {1}".format(k, default))
+                        pris[k] = default
+                self.priorities.update(pris)
+            elif pris is not None:
+                warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
+                warning("sitemap plugin: using the default values")
+
+            if isinstance(chfreqs, dict):
+                # .items() for py3k compat.
+                for k, v in chfreqs.items():
+                    if k in valid_keys and v not in valid_chfreqs:
+                        default = self.changefreqs[k]
+                        warning("sitemap plugin: invalid changefreq `{0}'".format(v))
+                        warning("sitemap plugin: setting SITEMAP['changefreqs']"
+                                "['{0}'] on '{1}'".format(k, default))
+                        chfreqs[k] = default
+                self.changefreqs.update(chfreqs)
+            elif chfreqs is not None:
+                warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
+                warning("sitemap plugin: using the default values")
+
+    def write_url(self, page, fd):
+
+        if getattr(page, 'status', 'published') != 'published':
+            return
+
+        if getattr(page, 'private', 'False') == 'True':
+            return
+
+        # We can disable categories/authors/etc by using False instead of ''
+        if not page.save_as:
+            return
+
+        page_path = os.path.join(self.output_path, page.save_as)
+        if not os.path.exists(page_path):
+            return
+
+        lastdate = getattr(page, 'date', self.now)
+        try:
+            lastdate = self.get_date_modified(page, lastdate)
+        except ValueError:
+            warning("sitemap plugin: " + page.save_as + " has invalid modification date,")
+            warning("sitemap plugin: using date value as lastmod.")
+        lastmod = format_date(lastdate)
+
+        if isinstance(page, contents.Article):
+            pri = self.priorities['articles']
+            chfreq = self.changefreqs['articles']
+        elif isinstance(page, contents.Page):
+            pri = self.priorities['pages']
+            chfreq = self.changefreqs['pages']
+        else:
+            pri = self.priorities['indexes']
+            chfreq = self.changefreqs['indexes']
+
+        pageurl = '' if page.url == 'index.html' else page.url
+
+        # Exclude URLs from the sitemap:
+        if self.format == 'xml':
+            flag = False
+            for regstr in self.sitemapExclude:
+                if re.match(regstr, pageurl):
+                    flag = True
+                    break
+            if not flag:
+                fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri))
+        else:
+            fd.write(self.siteurl + '/' + pageurl + '\n')
+
+    def get_date_modified(self, page, default):
+        if hasattr(page, 'modified'):
+            if isinstance(page.modified, datetime):
+                return page.modified
+            return get_date(page.modified)
+        else:
+            return default
+
+    def set_url_wrappers_modification_date(self, wrappers):
+        for (wrapper, articles) in wrappers:
+            lastmod = datetime.min.replace(tzinfo=self.timezone)
+            for article in articles:
+                lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone))
+                try:
+                    modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone)
+                    lastmod = max(lastmod, modified)
+                except ValueError:
+                    # Supressed: user will be notified.
+                    pass
+            setattr(wrapper, 'modified', str(lastmod))
+
+    def generate_output(self, writer):
+        path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
+
+        print("######################################################################")
+        print(self.context)
+        pages = self.context['pages'] \
+                + self.context['articles'] \
+                + [c for (c, a) in self.context['categories']] \
+                + [t for (t, a) in self.context['tags']] \
+                + [a for (a, b) in self.context['authors']]
+
+        self.set_url_wrappers_modification_date(self.context['categories'])
+        self.set_url_wrappers_modification_date(self.context['tags'])
+        self.set_url_wrappers_modification_date(self.context['authors'])
+
+        for article in self.context['articles']:
+            pages += article.translations
+
+        info('writing {0}'.format(path))
+
+        with open(path, 'w', encoding='utf-8') as fd:
+
+            if self.format == 'xml':
+                fd.write(XML_HEADER)
+            else:
+                fd.write(TXT_HEADER.format(self.siteurl))
+
+            FakePage = collections.namedtuple('FakePage',
+                                              ['status',
+                                               'date',
+                                               'url',
+                                               'save_as'])
+
+            for standard_page_url in ['index.html',
+                                      'archives.html',
+                                      'tags.html',
+                                      'categories.html']:
+                fake = FakePage(status='published',
+                                date=self.now,
+                                url=standard_page_url,
+                                save_as=standard_page_url)
+                self.write_url(fake, fd)
+
+            # add template pages
+            # We use items for Py3k compat. .iteritems() otherwise
+            for path, template_page_url in self.context['TEMPLATE_PAGES'].items():
+
+                # don't add duplicate entry for index page
+                if template_page_url == 'index.html':
+                    continue
+
+                fake = FakePage(status='published',
+                                date=self.now,
+                                url=template_page_url,
+                                save_as=template_page_url)
+                self.write_url(fake, fd)
+
+            for page in pages:
+                self.write_url(page, fd)
+
+            if self.format == 'xml':
+                fd.write(XML_FOOTER)
+
+
+def get_generators(generators):
+    return SitemapGenerator
+
+
+def register():
+    signals.get_generators.connect(get_generators)
--- a/plugins/sitemap/sitemap.pyc
+++ b/plugins/sitemap/sitemap.pyc
--- a/themes/minimal/templates/projects.html
+++ b/themes/minimal/templates/projects.html
@ -5,11 +5,13 @@

 {% block content %}
 <h1>Pages, Drafts, Tutorials, ...</h1>
-<p>This page contains tutorials, rough drafts I work on. The articles in here aren't necessarily finished and some of them aren't open to discussion, because they are work in progress (or chaos). Some of them <em>might</em> be turned into self-contained blog posts, some of them will never get out of this dungeon.</p>
-{% endblock %}
+<p>nothing to see here </p>
 dfsafsdf
 <ul>
-{% for page in pages | sort(attribute='title') %}
-  <li><a href="{{ SITEURL }}/{{ page.url }}">{{ page.title }}</a></li>
-{% endfor %}
+    {% for article in (articles_page.object_list if articles_page else articles) %}
+        <div class="blogItem">
+            <h1><a href="{{ SITEURL }}/{{ article.url }}">{{ article.title }}</a></h1>
+        {{ article.url}}
+    {% endfor %}
 </ul>
+{% endblock %}