1 # -*- coding: utf-8 -*-
4 Helper methods to parse rst documents and extract data appropriate for faq/guide
13 from os.path import join
14 from collections import defaultdict
15 from docutils.core import publish_parts
18 class SphinxImportException(Exception):
22 class SphinxImportValidationError(SphinxImportException):
28 Use docutils publis_parts to convert rst to html. Return parts body and error
31 origstderr = sys.stderr
32 sys.stderr = StringIO.StringIO()
34 parts = publish_parts(data, writer_name='html')['body']
36 output = sys.stderr.read()
37 sys.stderr = origstderr
42 def parse_rst_data(data, data_type='faq'):
44 Parse given data from rst to html. Filter html and generate approriate
45 entries based on data_type provided.
49 - **category** (used for `faq` data type since each question belongs to a
51 - **slug** the slug of the entry
52 - **title** the title of the entry
53 - **html_data** the html content of the entry
54 - **images** (img-alt, img-path) tuples list
56 html_data, output = rst2html(data)
57 doc = html.document_fromstring("<html><body>" + html_data + "</body></html>")
59 category_selectors = {
60 'faq': ".//div[h2][@class='section']",
61 'userguide': ".//div[h1][@class='section']",
64 # find first level sections
65 sections = doc.findall(category_selectors[data_type])
66 for section in sections:
67 entry_category = (None, None)
69 attrs = dict(section.items())
70 if not attrs.get('id', None):
73 slug = attrs.get('id')
74 if data_type == 'userguide':
75 title = section.find('h1').text_content()
76 section.remove(section.find('h1'))
78 title = section.find('h2').text_content()
79 section.remove(section.find('h2'))
81 image_els = section.findall('.//img')
83 if data_type == 'faq':
84 h1 = list(section.iterancestors())[0].find(".//h1")
85 el_with_id = dict(h1.getparent().items())
86 entry_category = (el_with_id.get('id', None), h1.text_content())
89 def get_img_el_data(img):
90 attrs = dict(img.items())
91 alt = attrs.get('alt', None)
93 alt = "okeanos iaas " + data_type + " image"
95 if len(alt.split("/")) > 0:
96 alt = data_type + " " + alt.split("/")[-1]
97 if len(alt.split(".")) > 0:
98 alt = alt.split(".")[0]
102 src = attrs.get('src')
103 if src.startswith("/images"):
107 return attrs.get('alt', None), src
109 images = map(get_img_el_data, image_els)
112 for child in section.getchildren():
113 html_data += html.tostring(child, pretty_print=True)
115 yield entry_category, slug, title, html_data, images, output
118 def get_dir_rst_files(dirname):
120 Given a dir return the glob of *.rst files
122 for f in glob.glob(join(dirname, '*.rst')):
123 if f.startswith('index'):
128 def generate_rst_contents_from_dir(rstdir):
130 Handle directory contents and run ``parse_rst_data`` for each file we want
133 Valid structure of the dir contents so that appropriate files can be parsed::
148 │ │ └── faq_image1.png
149 │ ├── intro_img_cyclades.png
159 Will generate a tuple of,
161 ['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>
165 #rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"
167 fpath = lambda x: join(rstdir, 'source', x)
169 images_dir = fpath('images')
170 guide_dir = fpath('userguide')
171 faq_dir = fpath('faq')
174 if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
175 raise SphinxImportException('Cannot find images dir')
177 if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir):
178 raise SphinxImportException('Cannot find guide dir')
180 if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir):
181 raise SphinxImportException('Cannot find FAQs dir')
183 def fix_image_path(img):
184 # make image path absolute
186 if img[1].startswith("/"):
187 img.append(fpath(img[1][1:]))
189 img.append(fpath(img[1]))
193 for d in ['userguide', 'faq']:
194 for f in get_dir_rst_files(fpath(d)):
195 for category, slug, title, html_data, \
196 images, stderr in parse_rst_data(file(f).read(), d):
197 # absolute image paths
198 images = map(fix_image_path, images)
199 yield d, f, category, slug, title, html_data, images, stderr