Statistics
| Branch: | Tag: | Revision:

root / cloudcms / rstutils.py @ 5e213527

History | View | Annotate | Download (5.8 kB)

1 deb708bf Kostas Papadimitriou
# -*- coding: utf-8 -*-
2 deb708bf Kostas Papadimitriou
3 deb708bf Kostas Papadimitriou
"""
4 deb708bf Kostas Papadimitriou
Helper methods to parse rst documents and extract data appropriate for faq/guide
5 deb708bf Kostas Papadimitriou
entries creation.
6 deb708bf Kostas Papadimitriou
"""
7 deb708bf Kostas Papadimitriou
8 deb708bf Kostas Papadimitriou
import os
9 deb708bf Kostas Papadimitriou
import sys
10 deb708bf Kostas Papadimitriou
import glob
11 deb708bf Kostas Papadimitriou
import StringIO
12 deb708bf Kostas Papadimitriou
13 deb708bf Kostas Papadimitriou
from os.path import join
14 deb708bf Kostas Papadimitriou
from collections import defaultdict
15 deb708bf Kostas Papadimitriou
from docutils.core import publish_parts
16 deb708bf Kostas Papadimitriou
from lxml import html
17 deb708bf Kostas Papadimitriou
18 deb708bf Kostas Papadimitriou
class SphinxImportException(Exception):
19 deb708bf Kostas Papadimitriou
    pass
20 deb708bf Kostas Papadimitriou
21 deb708bf Kostas Papadimitriou
22 deb708bf Kostas Papadimitriou
class SphinxImportValidationError(SphinxImportException):
23 deb708bf Kostas Papadimitriou
    pass
24 deb708bf Kostas Papadimitriou
25 deb708bf Kostas Papadimitriou
26 deb708bf Kostas Papadimitriou
def rst2html(data):
27 deb708bf Kostas Papadimitriou
    """
28 deb708bf Kostas Papadimitriou
    Use docutils publis_parts to convert rst to html. Return parts body and error
29 deb708bf Kostas Papadimitriou
    output tuple.
30 deb708bf Kostas Papadimitriou
    """
31 deb708bf Kostas Papadimitriou
    origstderr = sys.stderr
32 deb708bf Kostas Papadimitriou
    sys.stderr = StringIO.StringIO()
33 deb708bf Kostas Papadimitriou
34 deb708bf Kostas Papadimitriou
    parts = publish_parts(data, writer_name='html')['body']
35 deb708bf Kostas Papadimitriou
    sys.stderr.seek(0)
36 deb708bf Kostas Papadimitriou
    output = sys.stderr.read()
37 deb708bf Kostas Papadimitriou
    sys.stderr = origstderr
38 deb708bf Kostas Papadimitriou
39 deb708bf Kostas Papadimitriou
    return parts, output
40 deb708bf Kostas Papadimitriou
41 deb708bf Kostas Papadimitriou
42 deb708bf Kostas Papadimitriou
def parse_rst_data(data, data_type='faq'):
43 deb708bf Kostas Papadimitriou
    """
44 deb708bf Kostas Papadimitriou
    Parse given data from rst to html. Filter html and generate approriate
45 deb708bf Kostas Papadimitriou
    entries based on data_type provided.
46 deb708bf Kostas Papadimitriou

47 deb708bf Kostas Papadimitriou
    Generated content:
48 deb708bf Kostas Papadimitriou

49 deb708bf Kostas Papadimitriou
        - **category** (used for `faq` data type since each question belongs to a
50 deb708bf Kostas Papadimitriou
          specific category)
51 deb708bf Kostas Papadimitriou
        - **slug** the slug of the entry
52 deb708bf Kostas Papadimitriou
        - **title** the title of the entry
53 deb708bf Kostas Papadimitriou
        - **html_data** the html content of the entry
54 deb708bf Kostas Papadimitriou
        - **images** (img-alt, img-path) tuples list
55 deb708bf Kostas Papadimitriou
    """
56 deb708bf Kostas Papadimitriou
    html_data, output = rst2html(data)
57 deb708bf Kostas Papadimitriou
    doc = html.document_fromstring("<html><body>" + html_data + "</body></html>")
58 deb708bf Kostas Papadimitriou
59 deb708bf Kostas Papadimitriou
    category_selectors = {
60 deb708bf Kostas Papadimitriou
        'faq': ".//div[h2][@class='section']",
61 deb708bf Kostas Papadimitriou
        'userguide': ".//div[h1][@class='section']",
62 deb708bf Kostas Papadimitriou
    }
63 deb708bf Kostas Papadimitriou
64 deb708bf Kostas Papadimitriou
    # find first level sections
65 deb708bf Kostas Papadimitriou
    sections = doc.findall(category_selectors[data_type])
66 deb708bf Kostas Papadimitriou
    for section in sections:
67 deb708bf Kostas Papadimitriou
        entry_category = (None, None)
68 deb708bf Kostas Papadimitriou
69 deb708bf Kostas Papadimitriou
        attrs = dict(section.items())
70 deb708bf Kostas Papadimitriou
        if not attrs.get('id', None):
71 deb708bf Kostas Papadimitriou
            continue
72 deb708bf Kostas Papadimitriou
73 deb708bf Kostas Papadimitriou
        slug = attrs.get('id')
74 deb708bf Kostas Papadimitriou
        if data_type == 'userguide':
75 deb708bf Kostas Papadimitriou
            title = section.find('h1').text_content()
76 deb708bf Kostas Papadimitriou
            section.remove(section.find('h1'))
77 deb708bf Kostas Papadimitriou
        else:
78 deb708bf Kostas Papadimitriou
            title = section.find('h2').text_content()
79 deb708bf Kostas Papadimitriou
            section.remove(section.find('h2'))
80 deb708bf Kostas Papadimitriou
81 deb708bf Kostas Papadimitriou
        image_els = section.findall('.//img')
82 deb708bf Kostas Papadimitriou
83 deb708bf Kostas Papadimitriou
        if data_type == 'faq':
84 deb708bf Kostas Papadimitriou
            h1 = list(section.iterancestors())[0].find(".//h1")
85 deb708bf Kostas Papadimitriou
            el_with_id = dict(h1.getparent().items())
86 deb708bf Kostas Papadimitriou
            entry_category = (el_with_id.get('id', None), h1.text_content())
87 deb708bf Kostas Papadimitriou
88 deb708bf Kostas Papadimitriou
89 deb708bf Kostas Papadimitriou
        def get_img_el_data(img):
90 deb708bf Kostas Papadimitriou
            attrs = dict(img.items())
91 deb708bf Kostas Papadimitriou
            alt = attrs.get('alt', None)
92 deb708bf Kostas Papadimitriou
            if not alt:
93 deb708bf Kostas Papadimitriou
                alt = "okeanos iaas " + data_type + " image"
94 deb708bf Kostas Papadimitriou
            else:
95 deb708bf Kostas Papadimitriou
                if len(alt.split("/")) > 0:
96 deb708bf Kostas Papadimitriou
                    alt = data_type + " " + alt.split("/")[-1]
97 deb708bf Kostas Papadimitriou
                if len(alt.split(".")) > 0:
98 deb708bf Kostas Papadimitriou
                    alt = alt.split(".")[0]
99 deb708bf Kostas Papadimitriou
100 deb708bf Kostas Papadimitriou
            img.set('alt', alt)
101 deb708bf Kostas Papadimitriou
102 deb708bf Kostas Papadimitriou
            src = attrs.get('src')
103 deb708bf Kostas Papadimitriou
            if src.startswith("/images"):
104 deb708bf Kostas Papadimitriou
                src = src[1:]
105 deb708bf Kostas Papadimitriou
                img.set('src', src)
106 deb708bf Kostas Papadimitriou
107 deb708bf Kostas Papadimitriou
            return attrs.get('alt', None), src
108 deb708bf Kostas Papadimitriou
109 deb708bf Kostas Papadimitriou
        images = map(get_img_el_data, image_els)
110 deb708bf Kostas Papadimitriou
111 deb708bf Kostas Papadimitriou
        html_data = ""
112 deb708bf Kostas Papadimitriou
        for child in section.getchildren():
113 deb708bf Kostas Papadimitriou
            html_data += html.tostring(child, pretty_print=True)
114 deb708bf Kostas Papadimitriou
115 deb708bf Kostas Papadimitriou
        yield entry_category, slug, title, html_data, images, output
116 deb708bf Kostas Papadimitriou
117 deb708bf Kostas Papadimitriou
118 deb708bf Kostas Papadimitriou
def get_dir_rst_files(dirname):
119 deb708bf Kostas Papadimitriou
    """
120 deb708bf Kostas Papadimitriou
    Given a dir return the glob of *.rst files
121 deb708bf Kostas Papadimitriou
    """
122 deb708bf Kostas Papadimitriou
    for f in glob.glob(join(dirname, '*.rst')):
123 deb708bf Kostas Papadimitriou
        if f.startswith('index'):
124 deb708bf Kostas Papadimitriou
            continue
125 deb708bf Kostas Papadimitriou
        yield f
126 deb708bf Kostas Papadimitriou
127 deb708bf Kostas Papadimitriou
128 deb708bf Kostas Papadimitriou
def generate_rst_contents_from_dir(rstdir):
129 deb708bf Kostas Papadimitriou
    """
130 deb708bf Kostas Papadimitriou
    Handle directory contents and run ``parse_rst_data`` for each file we want
131 deb708bf Kostas Papadimitriou
    to parse.
132 deb708bf Kostas Papadimitriou

133 deb708bf Kostas Papadimitriou
    Valid structure of the dir contents so that appropriate files can be parsed::
134 deb708bf Kostas Papadimitriou

135 deb708bf Kostas Papadimitriou
        ├── README.rst
136 deb708bf Kostas Papadimitriou
        └── source
137 deb708bf Kostas Papadimitriou
            ├── conf.py
138 deb708bf Kostas Papadimitriou
            ├── faq
139 deb708bf Kostas Papadimitriou
            │   ├── cyclades.rst
140 deb708bf Kostas Papadimitriou
            │   ├── index.rst
141 deb708bf Kostas Papadimitriou
            │   ├── okeanos.rst
142 deb708bf Kostas Papadimitriou
            │   └── pithos.rst
143 deb708bf Kostas Papadimitriou
            ├── images
144 deb708bf Kostas Papadimitriou
            │   ├── cyclades
145 deb708bf Kostas Papadimitriou
            │   │   ├── image10.png
146 deb708bf Kostas Papadimitriou
            │   │   └── image9.png
147 deb708bf Kostas Papadimitriou
            │   ├── faq
148 deb708bf Kostas Papadimitriou
            │   │   └── faq_image1.png
149 deb708bf Kostas Papadimitriou
            │   ├── intro_img_cyclades.png
150 deb708bf Kostas Papadimitriou
            │   └── pithos_guide
151 deb708bf Kostas Papadimitriou
            │       └── image2.png
152 deb708bf Kostas Papadimitriou
            ├── index.rst
153 deb708bf Kostas Papadimitriou
            └── userguide
154 deb708bf Kostas Papadimitriou
                ├── cyclades.rst
155 deb708bf Kostas Papadimitriou
                ├── index.rst
156 deb708bf Kostas Papadimitriou
                ├── pithos.rst
157 deb708bf Kostas Papadimitriou
                └── quick-intro.rst
158 deb708bf Kostas Papadimitriou

159 deb708bf Kostas Papadimitriou
    Will generate a tuple of,
160 deb708bf Kostas Papadimitriou

161 deb708bf Kostas Papadimitriou
        ['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>
162 deb708bf Kostas Papadimitriou

163 deb708bf Kostas Papadimitriou
    """
164 deb708bf Kostas Papadimitriou
165 deb708bf Kostas Papadimitriou
    #rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"
166 deb708bf Kostas Papadimitriou
167 deb708bf Kostas Papadimitriou
    fpath = lambda x: join(rstdir, 'source', x)
168 deb708bf Kostas Papadimitriou
169 deb708bf Kostas Papadimitriou
    images_dir = fpath('images')
170 deb708bf Kostas Papadimitriou
    guide_dir = fpath('userguide')
171 deb708bf Kostas Papadimitriou
    faq_dir = fpath('faq')
172 deb708bf Kostas Papadimitriou
173 deb708bf Kostas Papadimitriou
    # validation
174 deb708bf Kostas Papadimitriou
    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
175 deb708bf Kostas Papadimitriou
        raise SphinxImportException('Cannot find images dir')
176 deb708bf Kostas Papadimitriou
177 deb708bf Kostas Papadimitriou
    if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir):
178 deb708bf Kostas Papadimitriou
        raise SphinxImportException('Cannot find guide dir')
179 deb708bf Kostas Papadimitriou
180 deb708bf Kostas Papadimitriou
    if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir):
181 deb708bf Kostas Papadimitriou
        raise SphinxImportException('Cannot find FAQs dir')
182 deb708bf Kostas Papadimitriou
183 deb708bf Kostas Papadimitriou
    def fix_image_path(img):
184 deb708bf Kostas Papadimitriou
        # make image path absolute
185 deb708bf Kostas Papadimitriou
        img = list(img)
186 deb708bf Kostas Papadimitriou
        if img[1].startswith("/"):
187 deb708bf Kostas Papadimitriou
            img.append(fpath(img[1][1:]))
188 deb708bf Kostas Papadimitriou
        else:
189 deb708bf Kostas Papadimitriou
            img.append(fpath(img[1]))
190 deb708bf Kostas Papadimitriou
191 deb708bf Kostas Papadimitriou
        return img
192 deb708bf Kostas Papadimitriou
193 deb708bf Kostas Papadimitriou
    for d in ['userguide', 'faq']:
194 deb708bf Kostas Papadimitriou
        for f in get_dir_rst_files(fpath(d)):
195 deb708bf Kostas Papadimitriou
            for category, slug, title, html_data, \
196 deb708bf Kostas Papadimitriou
                    images, stderr in parse_rst_data(file(f).read(), d):
197 deb708bf Kostas Papadimitriou
                # absolute image paths
198 deb708bf Kostas Papadimitriou
                images = map(fix_image_path, images)
199 deb708bf Kostas Papadimitriou
                yield d, f, category, slug, title, html_data, images, stderr
200 deb708bf Kostas Papadimitriou
201 deb708bf Kostas Papadimitriou