13 changed files with 417 additions and 37 deletions
-
6content/blog/02_3d_printer_en.md
-
9content/blog/03_quadrocopter_en.md
-
6content/extra/robots.txt
-
15content/pages/index_de.md
-
2content/pages/index_en.md
-
14content/pages/projects/projects.md
-
38pelicanconf.py
-
78plugins/sitemap/Readme.rst
-
1plugins/sitemap/__init__.py
-
BINplugins/sitemap/__init__.pyc
-
273plugins/sitemap/sitemap.py
-
BINplugins/sitemap/sitemap.pyc
-
12themes/minimal/templates/projects.html
@ -1,10 +1,8 @@ |
|||||
title: Projects |
|
||||
|
title: 3d printer |
||||
date: 2019-05-20 |
date: 2019-05-20 |
||||
author: Philipp Schönberger |
author: Philipp Schönberger |
||||
template: page |
|
||||
|
tags: 3d printer, ender2, cat |
||||
category: projects |
category: projects |
||||
url: projects/01_3d_printer.html |
|
||||
save_as: projects/01_3d_printer.html |
|
||||
|
|
||||
Here you can view the.. latest projects i've started. |
Here you can view the.. latest projects i've started. |
||||
|
|
@ -0,0 +1,9 @@ |
|||||
|
title: quadrocopter |
||||
|
date: 2019-05-20 |
||||
|
author: Philipp Schönberger |
||||
|
tags: bataflight, quadrocopter, fpv |
||||
|
category: projects |
||||
|
|
||||
|
|
||||
|
Here you can view the.. latest projects i've started. |
||||
|
|
@ -0,0 +1,6 @@ |
|||||
|
User-agent: * |
||||
|
Disallow: /stats/ |
||||
|
Allow: / |
||||
|
|
||||
|
Sitemap: http://www.phschoen.de/sitemap.xml |
||||
|
|
@ -1,14 +0,0 @@ |
|||||
title: Projects |
|
||||
date: 2019-05-20 |
|
||||
author: Philipp Schönberger |
|
||||
template: projects |
|
||||
category: projects |
|
||||
url: projects/index.html |
|
||||
save_as: projects/index.html |
|
||||
|
|
||||
|
|
||||
Here you can view the latest projects i've started. |
|
||||
|
|
||||
This page contains tutorials, rough drafts I work on. The articles in here aren't necessarily finished and some of them aren't open to discussion, because they are work in progress (or chaos). Some of them <em>might</em> be turned into self-contained blog posts, some of them will never get out of this dungeon. |
|
||||
|
|
||||
|
|
@ -0,0 +1,78 @@ |
|||||
|
Sitemap |
||||
|
------- |
||||
|
|
||||
|
This plugin generates plain-text or XML sitemaps. You can use the ``SITEMAP`` |
||||
|
variable in your settings file to configure the behavior of the plugin. |
||||
|
|
||||
|
The ``SITEMAP`` variable must be a Python dictionary and can contain these keys: |
||||
|
|
||||
|
- ``format``, which sets the output format of the plugin (``xml`` or ``txt``) |
||||
|
|
||||
|
- ``priorities``, which is a dictionary with three keys: |
||||
|
|
||||
|
- ``articles``, the priority for the URLs of the articles and their |
||||
|
translations |
||||
|
|
||||
|
- ``pages``, the priority for the URLs of the static pages |
||||
|
|
||||
|
- ``indexes``, the priority for the URLs of the index pages, such as tags, |
||||
|
author pages, categories indexes, archives, etc... |
||||
|
|
||||
|
All the values of this dictionary must be decimal numbers between ``0`` and ``1``. |
||||
|
|
||||
|
- ``changefreqs``, which is a dictionary with three items: |
||||
|
|
||||
|
- ``articles``, the update frequency of the articles |
||||
|
|
||||
|
- ``pages``, the update frequency of the pages |
||||
|
|
||||
|
- ``indexes``, the update frequency of the index pages |
||||
|
|
||||
|
Valid frequency values are ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``, |
||||
|
``yearly`` and ``never``. |
||||
|
|
||||
|
You can exclude URLs from being included in the sitemap via regular expressions. |
||||
|
For example, to exclude all URLs containing ``tag/`` or ``category/`` you can |
||||
|
use the following ``SITEMAP`` setting. |
||||
|
|
||||
|
.. code-block:: python |
||||
|
|
||||
|
SITEMAP = { |
||||
|
'exclude': ['tag/', 'category/'] |
||||
|
} |
||||
|
|
||||
|
If a key is missing or a value is incorrect, it will be replaced with the |
||||
|
default value. |
||||
|
|
||||
|
You can also exclude an individual URL by adding metadata to it setting ``private`` |
||||
|
to ``True``. |
||||
|
|
||||
|
The sitemap is saved in ``<output_path>/sitemap.<format>``. |
||||
|
|
||||
|
.. note:: |
||||
|
``priorities`` and ``changefreqs`` are information for search engines. |
||||
|
They are only used in the XML sitemaps. |
||||
|
For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions> |
||||
|
|
||||
|
**Example** |
||||
|
|
||||
|
Here is an example configuration (it's also the default settings): |
||||
|
|
||||
|
.. code-block:: python |
||||
|
# Where your plug-ins reside |
||||
|
PLUGIN_PATHS = ['/where/you/cloned/it/pelican-plugins/', ] |
||||
|
PLUGINS=['sitemap',] |
||||
|
|
||||
|
SITEMAP = { |
||||
|
'format': 'xml', |
||||
|
'priorities': { |
||||
|
'articles': 0.5, |
||||
|
'indexes': 0.5, |
||||
|
'pages': 0.5 |
||||
|
}, |
||||
|
'changefreqs': { |
||||
|
'articles': 'monthly', |
||||
|
'indexes': 'daily', |
||||
|
'pages': 'monthly' |
||||
|
} |
||||
|
} |
@ -0,0 +1 @@ |
|||||
|
from .sitemap import * |
@ -0,0 +1,273 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
''' |
||||
|
Sitemap |
||||
|
------- |
||||
|
|
||||
|
The sitemap plugin generates plain-text or XML sitemaps. |
||||
|
''' |
||||
|
|
||||
|
from __future__ import unicode_literals |
||||
|
|
||||
|
import re |
||||
|
import collections |
||||
|
import os.path |
||||
|
|
||||
|
from datetime import datetime |
||||
|
from logging import warning, info |
||||
|
from codecs import open |
||||
|
from pytz import timezone |
||||
|
|
||||
|
from pelican import signals, contents |
||||
|
from pelican.utils import get_date |
||||
|
|
||||
|
TXT_HEADER = """{0}/index.html |
||||
|
{0}/archives.html |
||||
|
{0}/tags.html |
||||
|
{0}/categories.html |
||||
|
""" |
||||
|
|
||||
|
XML_HEADER = """<?xml version="1.0" encoding="utf-8"?> |
||||
|
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" |
||||
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
||||
|
""" |
||||
|
|
||||
|
XML_URL = """ |
||||
|
<url> |
||||
|
<loc>{0}/{1}</loc> |
||||
|
<lastmod>{2}</lastmod> |
||||
|
<changefreq>{3}</changefreq> |
||||
|
<priority>{4}</priority> |
||||
|
</url> |
||||
|
""" |
||||
|
|
||||
|
XML_FOOTER = """ |
||||
|
</urlset> |
||||
|
""" |
||||
|
|
||||
|
|
||||
|
def format_date(date): |
||||
|
if date.tzinfo: |
||||
|
tz = date.strftime('%z') |
||||
|
tz = tz[:-2] + ':' + tz[-2:] |
||||
|
else: |
||||
|
tz = "-00:00" |
||||
|
return date.strftime("%Y-%m-%dT%H:%M:%S") + tz |
||||
|
|
||||
|
|
||||
|
class SitemapGenerator(object): |
||||
|
|
||||
|
def __init__(self, context, settings, path, theme, output_path, *null): |
||||
|
|
||||
|
self.output_path = output_path |
||||
|
self.context = context |
||||
|
self.now = datetime.now() |
||||
|
self.siteurl = settings.get('SITEURL') |
||||
|
|
||||
|
self.default_timezone = settings.get('TIMEZONE', 'UTC') |
||||
|
self.timezone = getattr(self, 'timezone', self.default_timezone) |
||||
|
self.timezone = timezone(self.timezone) |
||||
|
|
||||
|
self.format = 'xml' |
||||
|
|
||||
|
self.changefreqs = { |
||||
|
'articles': 'monthly', |
||||
|
'indexes': 'daily', |
||||
|
'pages': 'monthly' |
||||
|
} |
||||
|
|
||||
|
self.priorities = { |
||||
|
'articles': 0.5, |
||||
|
'indexes': 0.5, |
||||
|
'pages': 0.5 |
||||
|
} |
||||
|
|
||||
|
self.sitemapExclude = [] |
||||
|
|
||||
|
config = settings.get('SITEMAP', {}) |
||||
|
|
||||
|
if not isinstance(config, dict): |
||||
|
warning("sitemap plugin: the SITEMAP setting must be a dict") |
||||
|
else: |
||||
|
fmt = config.get('format') |
||||
|
pris = config.get('priorities') |
||||
|
chfreqs = config.get('changefreqs') |
||||
|
self.sitemapExclude = config.get('exclude', []) |
||||
|
|
||||
|
if fmt not in ('xml', 'txt'): |
||||
|
warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'") |
||||
|
warning("sitemap plugin: Setting SITEMAP['format'] on `xml'") |
||||
|
elif fmt == 'txt': |
||||
|
self.format = fmt |
||||
|
return |
||||
|
|
||||
|
valid_keys = ('articles', 'indexes', 'pages') |
||||
|
valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never') |
||||
|
|
||||
|
if isinstance(pris, dict): |
||||
|
# We use items for Py3k compat. .iteritems() otherwise |
||||
|
for k, v in pris.items(): |
||||
|
if k in valid_keys and not isinstance(v, (int, float)): |
||||
|
default = self.priorities[k] |
||||
|
warning("sitemap plugin: priorities must be numbers") |
||||
|
warning("sitemap plugin: setting SITEMAP['priorities']" |
||||
|
"['{0}'] on {1}".format(k, default)) |
||||
|
pris[k] = default |
||||
|
self.priorities.update(pris) |
||||
|
elif pris is not None: |
||||
|
warning("sitemap plugin: SITEMAP['priorities'] must be a dict") |
||||
|
warning("sitemap plugin: using the default values") |
||||
|
|
||||
|
if isinstance(chfreqs, dict): |
||||
|
# .items() for py3k compat. |
||||
|
for k, v in chfreqs.items(): |
||||
|
if k in valid_keys and v not in valid_chfreqs: |
||||
|
default = self.changefreqs[k] |
||||
|
warning("sitemap plugin: invalid changefreq `{0}'".format(v)) |
||||
|
warning("sitemap plugin: setting SITEMAP['changefreqs']" |
||||
|
"['{0}'] on '{1}'".format(k, default)) |
||||
|
chfreqs[k] = default |
||||
|
self.changefreqs.update(chfreqs) |
||||
|
elif chfreqs is not None: |
||||
|
warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict") |
||||
|
warning("sitemap plugin: using the default values") |
||||
|
|
||||
|
def write_url(self, page, fd): |
||||
|
|
||||
|
if getattr(page, 'status', 'published') != 'published': |
||||
|
return |
||||
|
|
||||
|
if getattr(page, 'private', 'False') == 'True': |
||||
|
return |
||||
|
|
||||
|
# We can disable categories/authors/etc by using False instead of '' |
||||
|
if not page.save_as: |
||||
|
return |
||||
|
|
||||
|
page_path = os.path.join(self.output_path, page.save_as) |
||||
|
if not os.path.exists(page_path): |
||||
|
return |
||||
|
|
||||
|
lastdate = getattr(page, 'date', self.now) |
||||
|
try: |
||||
|
lastdate = self.get_date_modified(page, lastdate) |
||||
|
except ValueError: |
||||
|
warning("sitemap plugin: " + page.save_as + " has invalid modification date,") |
||||
|
warning("sitemap plugin: using date value as lastmod.") |
||||
|
lastmod = format_date(lastdate) |
||||
|
|
||||
|
if isinstance(page, contents.Article): |
||||
|
pri = self.priorities['articles'] |
||||
|
chfreq = self.changefreqs['articles'] |
||||
|
elif isinstance(page, contents.Page): |
||||
|
pri = self.priorities['pages'] |
||||
|
chfreq = self.changefreqs['pages'] |
||||
|
else: |
||||
|
pri = self.priorities['indexes'] |
||||
|
chfreq = self.changefreqs['indexes'] |
||||
|
|
||||
|
pageurl = '' if page.url == 'index.html' else page.url |
||||
|
|
||||
|
# Exclude URLs from the sitemap: |
||||
|
if self.format == 'xml': |
||||
|
flag = False |
||||
|
for regstr in self.sitemapExclude: |
||||
|
if re.match(regstr, pageurl): |
||||
|
flag = True |
||||
|
break |
||||
|
if not flag: |
||||
|
fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri)) |
||||
|
else: |
||||
|
fd.write(self.siteurl + '/' + pageurl + '\n') |
||||
|
|
||||
|
def get_date_modified(self, page, default): |
||||
|
if hasattr(page, 'modified'): |
||||
|
if isinstance(page.modified, datetime): |
||||
|
return page.modified |
||||
|
return get_date(page.modified) |
||||
|
else: |
||||
|
return default |
||||
|
|
||||
|
def set_url_wrappers_modification_date(self, wrappers): |
||||
|
for (wrapper, articles) in wrappers: |
||||
|
lastmod = datetime.min.replace(tzinfo=self.timezone) |
||||
|
for article in articles: |
||||
|
lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone)) |
||||
|
try: |
||||
|
modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone) |
||||
|
lastmod = max(lastmod, modified) |
||||
|
except ValueError: |
||||
|
# Supressed: user will be notified. |
||||
|
pass |
||||
|
setattr(wrapper, 'modified', str(lastmod)) |
||||
|
|
||||
|
def generate_output(self, writer): |
||||
|
path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format)) |
||||
|
|
||||
|
print("######################################################################") |
||||
|
print(self.context) |
||||
|
pages = self.context['pages'] \ |
||||
|
+ self.context['articles'] \ |
||||
|
+ [c for (c, a) in self.context['categories']] \ |
||||
|
+ [t for (t, a) in self.context['tags']] \ |
||||
|
+ [a for (a, b) in self.context['authors']] |
||||
|
|
||||
|
self.set_url_wrappers_modification_date(self.context['categories']) |
||||
|
self.set_url_wrappers_modification_date(self.context['tags']) |
||||
|
self.set_url_wrappers_modification_date(self.context['authors']) |
||||
|
|
||||
|
for article in self.context['articles']: |
||||
|
pages += article.translations |
||||
|
|
||||
|
info('writing {0}'.format(path)) |
||||
|
|
||||
|
with open(path, 'w', encoding='utf-8') as fd: |
||||
|
|
||||
|
if self.format == 'xml': |
||||
|
fd.write(XML_HEADER) |
||||
|
else: |
||||
|
fd.write(TXT_HEADER.format(self.siteurl)) |
||||
|
|
||||
|
FakePage = collections.namedtuple('FakePage', |
||||
|
['status', |
||||
|
'date', |
||||
|
'url', |
||||
|
'save_as']) |
||||
|
|
||||
|
for standard_page_url in ['index.html', |
||||
|
'archives.html', |
||||
|
'tags.html', |
||||
|
'categories.html']: |
||||
|
fake = FakePage(status='published', |
||||
|
date=self.now, |
||||
|
url=standard_page_url, |
||||
|
save_as=standard_page_url) |
||||
|
self.write_url(fake, fd) |
||||
|
|
||||
|
# add template pages |
||||
|
# We use items for Py3k compat. .iteritems() otherwise |
||||
|
for path, template_page_url in self.context['TEMPLATE_PAGES'].items(): |
||||
|
|
||||
|
# don't add duplicate entry for index page |
||||
|
if template_page_url == 'index.html': |
||||
|
continue |
||||
|
|
||||
|
fake = FakePage(status='published', |
||||
|
date=self.now, |
||||
|
url=template_page_url, |
||||
|
save_as=template_page_url) |
||||
|
self.write_url(fake, fd) |
||||
|
|
||||
|
for page in pages: |
||||
|
self.write_url(page, fd) |
||||
|
|
||||
|
if self.format == 'xml': |
||||
|
fd.write(XML_FOOTER) |
||||
|
|
||||
|
|
||||
|
def get_generators(generators): |
||||
|
return SitemapGenerator |
||||
|
|
||||
|
|
||||
|
def register(): |
||||
|
signals.get_generators.connect(get_generators) |
Write
Preview
Loading…
Cancel
Save
Reference in new issue