root / cloudcms / rstutils.py @ a13220f8
History | View | Annotate | Download (5.8 kB)
1 |
# -*- coding: utf-8 -*-
|
---|---|
2 |
|
3 |
"""
|
4 |
Helper methods to parse rst documents and extract data appropriate for faq/guide
|
5 |
entries creation.
|
6 |
"""
|
7 |
|
8 |
import os |
9 |
import sys |
10 |
import glob |
11 |
import StringIO |
12 |
|
13 |
from os.path import join |
14 |
from collections import defaultdict |
15 |
from docutils.core import publish_parts |
16 |
from lxml import html |
17 |
|
18 |
class SphinxImportException(Exception): |
19 |
pass
|
20 |
|
21 |
|
22 |
class SphinxImportValidationError(SphinxImportException): |
23 |
pass
|
24 |
|
25 |
|
26 |
def rst2html(data): |
27 |
"""
|
28 |
Use docutils publis_parts to convert rst to html. Return parts body and error
|
29 |
output tuple.
|
30 |
"""
|
31 |
origstderr = sys.stderr |
32 |
sys.stderr = StringIO.StringIO() |
33 |
|
34 |
parts = publish_parts(data, writer_name='html')['body'] |
35 |
sys.stderr.seek(0)
|
36 |
output = sys.stderr.read() |
37 |
sys.stderr = origstderr |
38 |
|
39 |
return parts, output
|
40 |
|
41 |
|
42 |
def parse_rst_data(data, data_type='faq'): |
43 |
"""
|
44 |
Parse given data from rst to html. Filter html and generate approriate
|
45 |
entries based on data_type provided.
|
46 |
|
47 |
Generated content:
|
48 |
|
49 |
- **category** (used for `faq` data type since each question belongs to a
|
50 |
specific category)
|
51 |
- **slug** the slug of the entry
|
52 |
- **title** the title of the entry
|
53 |
- **html_data** the html content of the entry
|
54 |
- **images** (img-alt, img-path) tuples list
|
55 |
"""
|
56 |
html_data, output = rst2html(data) |
57 |
doc = html.document_fromstring("<html><body>" + html_data + "</body></html>") |
58 |
|
59 |
category_selectors = { |
60 |
'faq': ".//div[h2][@class='section']", |
61 |
'userguide': ".//div[h1][@class='section']", |
62 |
} |
63 |
|
64 |
# find first level sections
|
65 |
sections = doc.findall(category_selectors[data_type]) |
66 |
for section in sections: |
67 |
entry_category = (None, None) |
68 |
|
69 |
attrs = dict(section.items())
|
70 |
if not attrs.get('id', None): |
71 |
continue
|
72 |
|
73 |
slug = attrs.get('id')
|
74 |
if data_type == 'userguide': |
75 |
title = section.find('h1').text_content()
|
76 |
section.remove(section.find('h1'))
|
77 |
else:
|
78 |
title = section.find('h2').text_content()
|
79 |
section.remove(section.find('h2'))
|
80 |
|
81 |
image_els = section.findall('.//img')
|
82 |
|
83 |
if data_type == 'faq': |
84 |
h1 = list(section.iterancestors())[0].find(".//h1") |
85 |
el_with_id = dict(h1.getparent().items())
|
86 |
entry_category = (el_with_id.get('id', None), h1.text_content()) |
87 |
|
88 |
|
89 |
def get_img_el_data(img): |
90 |
attrs = dict(img.items())
|
91 |
alt = attrs.get('alt', None) |
92 |
if not alt: |
93 |
alt = "okeanos iaas " + data_type + " image" |
94 |
else:
|
95 |
if len(alt.split("/")) > 0: |
96 |
alt = data_type + " " + alt.split("/")[-1] |
97 |
if len(alt.split(".")) > 0: |
98 |
alt = alt.split(".")[0] |
99 |
|
100 |
img.set('alt', alt)
|
101 |
|
102 |
src = attrs.get('src')
|
103 |
if src.startswith("/images"): |
104 |
src = src[1:]
|
105 |
img.set('src', src)
|
106 |
|
107 |
return attrs.get('alt', None), src |
108 |
|
109 |
images = map(get_img_el_data, image_els)
|
110 |
|
111 |
html_data = ""
|
112 |
for child in section.getchildren(): |
113 |
html_data += html.tostring(child, pretty_print=True)
|
114 |
|
115 |
yield entry_category, slug, title, html_data, images, output
|
116 |
|
117 |
|
118 |
def get_dir_rst_files(dirname): |
119 |
"""
|
120 |
Given a dir return the glob of *.rst files
|
121 |
"""
|
122 |
for f in glob.glob(join(dirname, '*.rst')): |
123 |
if f.startswith('index'): |
124 |
continue
|
125 |
yield f
|
126 |
|
127 |
|
128 |
def generate_rst_contents_from_dir(rstdir): |
129 |
"""
|
130 |
Handle directory contents and run ``parse_rst_data`` for each file we want
|
131 |
to parse.
|
132 |
|
133 |
Valid structure of the dir contents so that appropriate files can be parsed::
|
134 |
|
135 |
├── README.rst
|
136 |
└── source
|
137 |
├── conf.py
|
138 |
├── faq
|
139 |
│ ├── cyclades.rst
|
140 |
│ ├── index.rst
|
141 |
│ ├── okeanos.rst
|
142 |
│ └── pithos.rst
|
143 |
├── images
|
144 |
│ ├── cyclades
|
145 |
│ │ ├── image10.png
|
146 |
│ │ └── image9.png
|
147 |
│ ├── faq
|
148 |
│ │ └── faq_image1.png
|
149 |
│ ├── intro_img_cyclades.png
|
150 |
│ └── pithos_guide
|
151 |
│ └── image2.png
|
152 |
├── index.rst
|
153 |
└── userguide
|
154 |
├── cyclades.rst
|
155 |
├── index.rst
|
156 |
├── pithos.rst
|
157 |
└── quick-intro.rst
|
158 |
|
159 |
Will generate a tuple of,
|
160 |
|
161 |
['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>
|
162 |
|
163 |
"""
|
164 |
|
165 |
#rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"
|
166 |
|
167 |
fpath = lambda x: join(rstdir, 'source', x) |
168 |
|
169 |
images_dir = fpath('images')
|
170 |
guide_dir = fpath('userguide')
|
171 |
faq_dir = fpath('faq')
|
172 |
|
173 |
# validation
|
174 |
if not os.path.exists(images_dir) or not os.path.isdir(images_dir): |
175 |
raise SphinxImportException('Cannot find images dir') |
176 |
|
177 |
if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir): |
178 |
raise SphinxImportException('Cannot find guide dir') |
179 |
|
180 |
if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir): |
181 |
raise SphinxImportException('Cannot find FAQs dir') |
182 |
|
183 |
def fix_image_path(img): |
184 |
# make image path absolute
|
185 |
img = list(img)
|
186 |
if img[1].startswith("/"): |
187 |
img.append(fpath(img[1][1:])) |
188 |
else:
|
189 |
img.append(fpath(img[1]))
|
190 |
|
191 |
return img
|
192 |
|
193 |
for d in ['userguide', 'faq']: |
194 |
for f in get_dir_rst_files(fpath(d)): |
195 |
for category, slug, title, html_data, \
|
196 |
images, stderr in parse_rst_data(file(f).read(), d): |
197 |
# absolute image paths
|
198 |
images = map(fix_image_path, images)
|
199 |
yield d, f, category, slug, title, html_data, images, stderr
|
200 |
|
201 |
|
202 |
|