/cloudcms/rstutils.py - snf-cloudcms - Greek Research and Technology Network's projects

root / cloudcms / rstutils.py @ d240ebcb

History | View | Annotate | Download (5.8 kB)

       # -*- coding: utf-8 -*-
       """
       Helper methods to parse rst documents and extract data appropriate for faq/guide
       entries creation.
       """
       import os
       import sys
       import glob
       import StringIO
       from os.path import join
       from collections import defaultdict
       from docutils.core import publish_parts
       from lxml import html
       class SphinxImportException(Exception):
           pass
       class SphinxImportValidationError(SphinxImportException):
           pass
       def rst2html(data):
           """
           Use docutils publis_parts to convert rst to html. Return parts body and error
           output tuple.
           """
           origstderr = sys.stderr
           sys.stderr = StringIO.StringIO()
           parts = publish_parts(data, writer_name='html')['body']
           sys.stderr.seek(0)
           output = sys.stderr.read()
           sys.stderr = origstderr
           return parts, output
       def parse_rst_data(data, data_type='faq'):
           """
           Parse given data from rst to html. Filter html and generate approriate
           entries based on data_type provided.
           Generated content:
               - **category** (used for `faq` data type since each question belongs to a
                 specific category)
               - **slug** the slug of the entry
               - **title** the title of the entry
               - **html_data** the html content of the entry
               - **images** (img-alt, img-path) tuples list
           """
           html_data, output = rst2html(data)
           doc = html.document_fromstring("<html><body>" + html_data + "</body></html>")
           category_selectors = {
               'faq': ".//div[h2][@class='section']",
               'userguide': ".//div[h1][@class='section']",
+          }
           # find first level sections
           sections = doc.findall(category_selectors[data_type])
           for section in sections:
               entry_category = (None, None)
               attrs = dict(section.items())
               if not attrs.get('id', None):
                   continue
               slug = attrs.get('id')
               if data_type == 'userguide':
                   title = section.find('h1').text_content()
                   section.remove(section.find('h1'))
               else:
                   title = section.find('h2').text_content()
                   section.remove(section.find('h2'))
               image_els = section.findall('.//img')
               if data_type == 'faq':
                   h1 = list(section.iterancestors())[0].find(".//h1")
                   el_with_id = dict(h1.getparent().items())
                   entry_category = (el_with_id.get('id', None), h1.text_content())
               def get_img_el_data(img):
                   attrs = dict(img.items())
                   alt = attrs.get('alt', None)
                   if not alt:
                       alt = "okeanos iaas " + data_type + " image"
                   else:
                       if len(alt.split("/")) > 0:
                           alt = data_type + " " + alt.split("/")[-1]
                       if len(alt.split(".")) > 0:
                           alt = alt.split(".")[0]
                   img.set('alt', alt)
                   src = attrs.get('src')
                   if src.startswith("/images"):
                       src = src[1:]
                       img.set('src', src)
                   return attrs.get('alt', None), src
               images = map(get_img_el_data, image_els)
               html_data = ""
               for child in section.getchildren():
                   html_data += html.tostring(child, pretty_print=True)
               yield entry_category, slug, title, html_data, images, output
       def get_dir_rst_files(dirname):
           """
           Given a dir return the glob of *.rst files
           """
           for f in glob.glob(join(dirname, '*.rst')):
               if f.startswith('index'):
                   continue
               yield f
       def generate_rst_contents_from_dir(rstdir):
           """
           Handle directory contents and run ``parse_rst_data`` for each file we want
           to parse.
           Valid structure of the dir contents so that appropriate files can be parsed::
               ├── README.rst
               └── source
                   ├── conf.py
                   ├── faq
                   │   ├── cyclades.rst
                   │   ├── index.rst
                   │   ├── okeanos.rst
                   │   └── pithos.rst
                   ├── images
                   │   ├── cyclades
                   │   │   ├── image10.png
                   │   │   └── image9.png
                   │   ├── faq
                   │   │   └── faq_image1.png
                   │   ├── intro_img_cyclades.png
                   │   └── pithos_guide
                   │       └── image2.png
                   ├── index.rst
                   └── userguide
                       ├── cyclades.rst
                       ├── index.rst
                       ├── pithos.rst
                       └── quick-intro.rst
           Will generate a tuple of,
               ['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>
           """
           #rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"
           fpath = lambda x: join(rstdir, 'source', x)
           images_dir = fpath('images')
           guide_dir = fpath('userguide')
           faq_dir = fpath('faq')
           # validation
           if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
               raise SphinxImportException('Cannot find images dir')
           if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir):
               raise SphinxImportException('Cannot find guide dir')
           if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir):
               raise SphinxImportException('Cannot find FAQs dir')
           def fix_image_path(img):
               # make image path absolute
               img = list(img)
               if img[1].startswith("/"):
                   img.append(fpath(img[1][1:]))
               else:
                   img.append(fpath(img[1]))
               return img
           for d in ['userguide', 'faq']:
               for f in get_dir_rst_files(fpath(d)):
                   for category, slug, title, html_data, \
                           images, stderr in parse_rst_data(file(f).read(), d):
                       # absolute image paths
                       images = map(fix_image_path, images)
                       yield d, f, category, slug, title, html_data, images, stderr

Synnefo » snf-cloudcms

root / cloudcms / rstutils.py @ d240ebcb