Skip to content

An extension for in-page OCX metadata

The aim of this extension is to take metadata embedded as YAML in a page of markdown and render it as JSON-LD in the HTML created by mkdocs.

I was helped in writing this by reference to Nikita Sivakov's full-yaml-metadata extension (but sadly couldn't get that fully working, nor does is do as much as I want, hence the fresh start).

The YAML must be separated from the rest of the text by --- before and after.

Test metadata

"@id": "#Lesson1"
"@type":
    - oer:Lesson
    - CreativeWork
learningResourceType: LessonPlan
hasPart:
  "@id": "#activity1-1"
author:
    "@type": Person
    name: Phil Barker
<script type="application/ld+json">
{ "@context": [ "http://schema.org",
    { "oer": "http://oerschema.org/",
      "ocx": "https://github.com/K12OCX/k12ocx-specs/",
    }
  ],
  "@id": "#Lesson1",
  "@type":["CreativeWork", "oer:Lesson"],
  "learningResourceType": "LessonPlan",
  "name": "Practice Counting Strategies",
  "hasPart": {
    "@id": "#activity1-1"
  }
  "author": {
    "@type": "Person"
    "name": "Phil Barker"    
  }
}
</script>

Note

The context is taken as a constant for OCX so not in YAML, but could easily be set in YAML or possibly provided as an option.

from markdown.extensions import Extension
from markdown.preprocessors import Preprocessor
from markdown.inlinepatterns import SimpleTagInlineProcessor
import yaml, json

OCX_YAML_STARTER = '---'
YAML_CONTEXT = '''
"@context":
    - "http://schema.org"
    - "oer": "http://oerschema.org/"
    - "ocx": "https://github.com/K12OCX/k12ocx-specs/"
'''
SCRIPT_STARTER = '<script type="application/ld+json">'

class OCXMetadata(Extension):
    """Python-Markdown extension for parsing OCX metadata from YAML."""
    def extendMarkdown(self, md):
        md.registerExtension(self)
        md.preprocessors.register(OCXMetadataPreprocessor(md), 'ocxmetadata', 28)


class OCXMetadataPreprocessor(Preprocessor):
    def run(self, lines):
        new_lines = []
        yaml_store = {}
        yaml_count = 0
        while lines: #run through all the lines of md looking for YAML
            line = lines.pop(0)
            if line == '---': #should be start of a YAML block
                yaml_count += 1
                yaml_block = [YAML_CONTEXT]
                while lines: #loop processing YAML block
                    line = lines.pop(0)
                    if line == '---': #should be the end of a YAML block
                        yaml_store[yaml_count] = yaml.safe_load (
                            "\n".join(yaml_block)
                        )
                        new_line = SCRIPT_STARTER \
                           + json.dumps(yaml_store[yaml_count]) \
                           + '</script>'
                        new_lines.append(new_line)
                        break #leave loop for processing YAML block
                    else:
                        yaml_block.append(line)
            else:
                new_lines.append(line)
        if yaml_store:
            self.md.meta = yaml_store
        else:
            self.md.meta = None
        return new_lines

def makeExtension(*args, **kwargs):
    # allows calling of extension by string which is not dot-noted
    return OCXMetadata(*args, **kwargs)
import markdown, unittest
from ocxmd import OCXMetadata
TESTINPUT = '''
#YAML to JSON-LD test
<script type="application/ld+json">{"@context": ["http://schema.org", {"oer": "http://oerschema.org/"}, {"ocx": "https://github.com/K12OCX/k12ocx-specs/"}], "@id": "#Lesson1", "name": "Test Lesson 1", "@type": ["oer:Lesson", "CreativeWork"], "learningResourceType": "LessonPlan", "hasPart": {"@id": "#activity1"}, "author": {"@type": "Person", "name": "Fred Blogs"}}</script>

I started with some YAML and turned it into JSON-LD

Here is some more YAML
<script type="application/ld+json">{"@context": ["http://schema.org", {"oer": "http://oerschema.org/"}, {"ocx": "https://github.com/K12OCX/k12ocx-specs/"}], "@id": "#activity1", "@type": ["oer:Activity", "CreativeWork"], "name": "Test Activity 1.1", "learningResourceType": "Activity"}</script>
'''
HTMLEXPECTED = '''<h1>YAML to JSON-LD test</h1>
<p><script type="application/ld+json">{"@context": ["http://schema.org", {"oer": "http://oerschema.org/"}, {"ocx": "https://github.com/K12OCX/k12ocx-specs/"}], "@id": "#Lesson1", "name": "Test Lesson 1", "@type": ["oer:Lesson", "CreativeWork"], "learningResourceType": "LessonPlan", "hasPart": {"@id": "#activity1"}, "author": {"@type": "Person", "name": "Fred Blogs"}}</script></p>
<p>I started with some YAML and turned it into JSON-LD</p>
<p>Here is some more YAML
<script type="application/ld+json">{"@context": ["http://schema.org", {"oer": "http://oerschema.org/"}, {"ocx": "https://github.com/K12OCX/k12ocx-specs/"}], "@id": "#activity1", "@type": ["oer:Activity", "CreativeWork"], "name": "Test Activity 1.1", "learningResourceType": "Activity"}</script></p>'''
METADATAEXPECTED = {1: {'@context': ['http://schema.org', {'oer': 'http://oerschema.org/'}, {'ocx': 'https://github.com/K12OCX/k12ocx-specs/'}], '@id': '#Lesson1', 'name': 'Test Lesson 1', '@type': ['oer:Lesson', 'CreativeWork'], 'learningResourceType': 'LessonPlan', 'hasPart': {'@id': '#activity1'}, 'author': {'@type': 'Person', 'name': 'Fred Blogs'}}, 2: {'@context': ['http://schema.org', {'oer': 'http://oerschema.org/'}, {'ocx': 'https://github.com/K12OCX/k12ocx-specs/'}], '@id': '#activity1', '@type': ['oer:Activity', 'CreativeWork'], 'name': 'Test Activity 1.1', 'learningResourceType': 'Activity'}}

class TestOCXMD(unittest.TestCase):
    md = markdown.Markdown(extensions = ['ocxmd'])
    html = md.convert(TESTINPUT)
    def test_html(self):
        self.assertEqual(self.html, HTMLEXPECTED)
    def test_md(self):
        self.assertEqual(self.md.meta, METADATAEXPECTED)

if '__main__' == __name__:
    unittest.main()
from setuptools import setup
setup(
    name='ocxmd',
    version='0.1',
    py_modules=['ocxmd'],
    install_requires=['markdown>=2.5'],
)

Test in MkDocs

The test YAML metadata is here... View source or inspect to see it