# -*- coding: utf-8 -*-

"""
Helper methods to parse rst documents and extract data appropriate for faq/guide
entries creation.
"""

import os
import sys
import glob
import StringIO

from os.path import join
from collections import defaultdict
from docutils.core import publish_parts
from lxml import html

class SphinxImportException(Exception):
    pass


class SphinxImportValidationError(SphinxImportException):
    pass


def rst2html(data):
    """
    Use docutils publis_parts to convert rst to html. Return parts body and error
    output tuple.
    """
    origstderr = sys.stderr
    sys.stderr = StringIO.StringIO()

    parts = publish_parts(data, writer_name='html')['body']
    sys.stderr.seek(0)
    output = sys.stderr.read()
    sys.stderr = origstderr

    return parts, output


def parse_rst_data(data, data_type='faq'):
    """
    Parse given data from rst to html. Filter html and generate approriate
    entries based on data_type provided.

    Generated content:

        - **category** (used for `faq` data type since each question belongs to a
          specific category)
        - **slug** the slug of the entry
        - **title** the title of the entry
        - **html_data** the html content of the entry
        - **images** (img-alt, img-path) tuples list
    """
    html_data, output = rst2html(data)
    doc = html.document_fromstring("<html><body>" + html_data + "</body></html>")

    category_selectors = {
        'faq': ".//div[h2][@class='section']",
        'userguide': ".//div[h1][@class='section']",
    }

    # find first level sections
    sections = doc.findall(category_selectors[data_type])
    for section in sections:
        entry_category = (None, None)

        attrs = dict(section.items())
        if not attrs.get('id', None):
            continue

        slug = attrs.get('id')
        if data_type == 'userguide':
            title = section.find('h1').text_content()
            section.remove(section.find('h1'))
        else:
            title = section.find('h2').text_content()
            section.remove(section.find('h2'))

        image_els = section.findall('.//img')

        if data_type == 'faq':
            h1 = list(section.iterancestors())[0].find(".//h1")
            el_with_id = dict(h1.getparent().items())
            entry_category = (el_with_id.get('id', None), h1.text_content())


        def get_img_el_data(img):
            attrs = dict(img.items())
            alt = attrs.get('alt', None)
            if not alt:
                alt = "okeanos iaas " + data_type + " image"
            else:
                if len(alt.split("/")) > 0:
                    alt = data_type + " " + alt.split("/")[-1]
                if len(alt.split(".")) > 0:
                    alt = alt.split(".")[0]

            img.set('alt', alt)

            src = attrs.get('src')
            if src.startswith("/images"):
                src = src[1:]
                img.set('src', src)

            return attrs.get('alt', None), src

        images = map(get_img_el_data, image_els)

        html_data = ""
        for child in section.getchildren():
            html_data += html.tostring(child, pretty_print=True)

        yield entry_category, slug, title, html_data, images, output


def get_dir_rst_files(dirname):
    """
    Given a dir return the glob of *.rst files
    """
    for f in glob.glob(join(dirname, '*.rst')):
        if f.startswith('index'):
            continue
        yield f


def generate_rst_contents_from_dir(rstdir):
    """
    Handle directory contents and run ``parse_rst_data`` for each file we want
    to parse.

    Valid structure of the dir contents so that appropriate files can be parsed::

        ├── README.rst
        └── source
            ├── conf.py
            ├── faq
            │   ├── cyclades.rst
            │   ├── index.rst
            │   ├── okeanos.rst
            │   └── pithos.rst
            ├── images
            │   ├── cyclades
            │   │   ├── image10.png
            │   │   └── image9.png
            │   ├── faq
            │   │   └── faq_image1.png
            │   ├── intro_img_cyclades.png
            │   └── pithos_guide
            │       └── image2.png
            ├── index.rst
            └── userguide
                ├── cyclades.rst
                ├── index.rst
                ├── pithos.rst
                └── quick-intro.rst

    Will generate a tuple of,

        ['faq', 'userguide'], </abs/path/filename.rst> + *<generated tuple members of ``parse_rst_data``>

    """

    #rstdir = "/tmp/tmphsl6bicloudcms-sphinx-exports"

    fpath = lambda x: join(rstdir, 'source', x)

    images_dir = fpath('images')
    guide_dir = fpath('userguide')
    faq_dir = fpath('faq')

    # validation
    if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
        raise SphinxImportException('Cannot find images dir')

    if not os.path.exists(guide_dir) or not os.path.isdir(guide_dir):
        raise SphinxImportException('Cannot find guide dir')

    if not os.path.exists(faq_dir) or not os.path.isdir(faq_dir):
        raise SphinxImportException('Cannot find FAQs dir')

    def fix_image_path(img):
        # make image path absolute
        img = list(img)
        if img[1].startswith("/"):
            img.append(fpath(img[1][1:]))
        else:
            img.append(fpath(img[1]))

        return img

    for d in ['userguide', 'faq']:
        for f in get_dir_rst_files(fpath(d)):
            for category, slug, title, html_data, \
                    images, stderr in parse_rst_data(file(f).read(), d):
                # absolute image paths
                images = map(fix_image_path, images)
                yield d, f, category, slug, title, html_data, images, stderr



