Statistics
| Branch: | Tag: | Revision:

root / cloudcms / rstutils.py @ d240ebcb

History | View | Annotate | Download (5.8 kB)

1
# -*- coding: utf-8 -*-
2

    
3
"""
4
Helper methods to parse rst documents and extract data appropriate for faq/guide
5
entries creation.
6
"""
7

    
8
import os
9
import sys
10
import glob
11
import StringIO
12

    
13
from os.path import join
14
from collections import defaultdict
15
from docutils.core import publish_parts
16
from lxml import html
17

    
18
class SphinxImportException(Exception):
19
    pass
20

    
21

    
22
class SphinxImportValidationError(SphinxImportException):
23
    pass
24

    
25

    
26
def rst2html(data):
27
    """
28
    Use docutils publis_parts to convert rst to html. Return parts body and error
29
    output tuple.
30
    """
31
    origstderr = sys.stderr
32
    sys.stderr = StringIO.StringIO()
33

    
34
    parts = publish_parts(data, writer_name='html')['body']
35
    sys.stderr.seek(0)
36
    output = sys.stderr.read()
37
    sys.stderr = origstderr
38

    
39
    return parts, output
40

    
41

    
42
def parse_rst_data(data, data_type='faq'):
43
    """
44
    Parse given data from rst to html. Filter html and generate approriate
45
    entries based on data_type provided.
46

47
    Generated content:
48

49
        - **category** (used for `faq` data type since each question belongs to a
50
          specific category)
51
        - **slug** the slug of the entry
52
        - **title** the title of the entry
53
        - **html_data** the html content of the entry
54
        - **images** (img-alt, img-path) tuples list
55
    """
56
    html_data, output = rst2html(data)
57
    doc = html.document_fromstring("<html><body>" + html_data + "</body></html>")
58

    
59
    category_selectors = {
60
        'faq': ".//div[h2][@class='section']",
61
        'userguide': ".//div[h1][@class='section']",
62
    }
63

    
64
    # find first level sections
65
    sections = doc.findall(category_selectors[data_type])
66
    for section in sections:
67
        entry_category = (None, None)
68

    
69
        attrs = dict(section.items())
70
        if not attrs.get('id', None):
71
            continue
72

    
73
        slug = attrs.get('id')
74
        if data_type == 'userguide':
75
            title = section.find('h1').text_content()
76
            section.remove(section.find('h1'))
77
        else:
78
            title = section.find('h2').text_content()
79
            section.remove(section.find('h2'))
80

    
81
        image_els = section.findall('.//img')
82

    
83
        if data_type == 'faq':
84
            h1 = list(section.iterancestors())[0].find(".//h1")
85
            el_with_id = dict(h1.getparent().items())
86
            entry_category = (el_with_id.get('id', None), h1.text_content())
87

    
88

    
89
        def get_img_el_data(img):
90
            attrs = dict(img.items())
91
            alt = attrs.get('alt', None)
92
            if not alt:
93
                alt = "okeanos iaas " + data_type + " image"
94
            else:
95
                if len(alt.split("/")) > 0:
96
                    alt = data_type + " " + alt.split("/")[-1]
97
                if len(alt.split(".")) > 0:
98
                    alt = alt.split(".")[0]
99

    
100
            img.set('alt', alt)
101

    
102
            src = attrs.get('src')
103
            if src.startswith("/images"):
104
                src = src[1:]
105
                img.set('src', src)
106

    
107
            return attrs.get('alt', None), src
108

    
109
        images = map(get_img_el_data, image_els)
110

    
111
        html_data = ""
112
        for child in section.getchildren():
113
            html_data += html.tostring(child, pretty_print=True)
114

    
115
        yield entry_category, slug, title, html_data, images, output
116

    
117

    
118
def get_dir_rst_files(dirname):
119
    """
120
    Given a dir return the glob of *.rst files
121
    """
122
    for f in glob.glob(join(dirname, '*.rst')):
123
        if f.startswith('index'):
124
            continue
125
        yield f
126

    
127

    
128
def generate_rst_contents_from_dir(rstdir):
129
    """
130
    Handle directory contents and run ``parse_rst_data`` for each file we want
131
    to parse.
132

133
    Valid structure of the dir contents so that appropriate files can be parsed::
134

135
        ├── README.rst
136
        └── source
137
            ├── conf.py
138
            ├── faq
139
            │   ├── cyclades.rst
140
            │   ├── index.rst
141
            │   ├── okeanos.rst
142
            │   └── pithos.rst
143
            ├── images
144
            │   ├── cyclades
145
            │   │   ├── image10.png
146
            │   │   └── image9.png
147
            │   ├── faq
148
            │   │   └── faq_image1.png
149
            │   ├── intro_img_cyclades.png
150
            │   └── pithos_guide
151
            │       └── image2.png
152
            ├── index.rst
153
            └── userguide
154
                ├── cyclades.rst
155
                ├── index.rst
156
                ├── pithos.rst
157
                └── quick-intro.rst
158

159
    Will generate a tuple of,
160

161
        ['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>
162

163
    """
164

    
165
    #rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"
166

    
167
    fpath = lambda x: join(rstdir, 'source', x)
168

    
169
    images_dir = fpath('images')
170
    guide_dir = fpath('userguide')
171
    faq_dir = fpath('faq')
172

    
173
    # validation
174
    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
175
        raise SphinxImportException('Cannot find images dir')
176

    
177
    if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir):
178
        raise SphinxImportException('Cannot find guide dir')
179

    
180
    if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir):
181
        raise SphinxImportException('Cannot find FAQs dir')
182

    
183
    def fix_image_path(img):
184
        # make image path absolute
185
        img = list(img)
186
        if img[1].startswith("/"):
187
            img.append(fpath(img[1][1:]))
188
        else:
189
            img.append(fpath(img[1]))
190

    
191
        return img
192

    
193
    for d in ['userguide', 'faq']:
194
        for f in get_dir_rst_files(fpath(d)):
195
            for category, slug, title, html_data, \
196
                    images, stderr in parse_rst_data(file(f).read(), d):
197
                # absolute image paths
198
                images = map(fix_image_path, images)
199
                yield d, f, category, slug, title, html_data, images, stderr
200

    
201

    
202