/cloudcms/clients.py - snf-cloudcms - Greek Research and Technology Network's projects

root / cloudcms / clients.py @ 67114d6e

History | View | Annotate | Download (8 kB)

       """
       CMS dynamic application clients module
       Helper module to automatically retrieve client download links from different
       sources (e.g. redmine files page).
       """
       import urllib, urllib2, cookielib, urlparse, re
       from datetime import datetime
       from lxml import html
       from django.conf import settings
       CLIENTS_CACHE_TIMEOUT = getattr(settings, 'CLOUDCMS_CLIENTS_CACHE_TIMEOUT', 120)
       class VersionSource(object):
           """
           Base class for the different version source handlers.
           """
           def __init__(self, link=None, os="linux", arch="all", regex=".", name=None,
                   cache_backend=None, extra_options={}, source_params={}):
               self.os = os
               self.arch = arch
               self.link = link
               self.versions = []
               self.source_params = source_params
               self.extra_version_options = {}
               self.extra_version_options.update(extra_options)
               self.extra_version_options.update({'source_type': self.source_type,
                                                  'os': self.os})
               self.cache_backend = cache_backend
               self.cache_key = self.os + self.arch + self.link
               if not name:
                   self.name = os
               # generic urllib2 opener
               self.opener = urllib2.build_opener(
                           urllib2.HTTPRedirectHandler(),
                           urllib2.HTTPHandler(debuglevel=0),
                           urllib2.HTTPSHandler(debuglevel=0),
                           urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
           def get_url(self, url):
               """
               Load url content and return the html etree object.
               """
               return html.document_fromstring(self.opener.open(url).read())
           def load(self):
               """
               Fill self.versions attribute with dict objects of the following format
               {'date': datetime.datetime(2012, 3, 16, 14, 29),
                'link': 'http://www.domain.com/clientdownload.exe',
                'name': 'Client download',
                'os': 'windows',
                'version': None}
               """
               raise NotImplemented
           def update(self):
               """
               Load wrapper which handles versions caching if cache_backend is set
               """
               if self.cache_backend:
                   self.versions = self.cache_backend.get(self.cache_key)
               if not self.versions:
                   self.load()
               if self.cache_backend:
                   self.cache_backend.set(self.cache_key, self.versions, CLIENTS_CACHE_TIMEOUT)
           def get_latest(self):
               """
               Return the latest versions
               """
               # update versions
               self.update()
               # check that at least one version is available
               if len(self.versions):
                   version = self.versions[0]
                   version.update(self.extra_version_options)
                   return version
               return None
       class ExtractSource(VersionSource):
           """
           Parse an html page and return the list of existing urls that match the
           provided regex.
           """
           source_type = 'url_extract'
           def load(self):
               """
               """
               spliturl = urlparse.urlsplit(self.link)
               baseurl = spliturl.geturl().replace(spliturl.path, '')
               html = self.get_url(self.link)
               anchors = html.xpath("//a")
               regex = re.compile(self.source_params.get('file_regex'))
               # helper lambdas
               def _parse_file(row):
                   name = row.attrib.get("href")
                   return {'name': self.name, 'link': link, 'version': None}
               files = []
               for anchor in anchors:
                   href = anchor.attrib.get("href")
                   if not "/" in href:
                       continue
                   fname = href.split("/")[-1]
                   if not regex.match(fname):
                       continue
                   data = {
                       'name': anchor.attrib.get("data-name") or self.name,
                       'link': anchor.attrib.get("href"),
                       'version': anchor.attrib.get("data-version") or None
+                  }
                   files.append(data)
               self.versions = files
               return self
       class RedmineSource(VersionSource):
           """
           Parse a redmine project files page and return the list of existing files.
           """
           source_type = 'redmine_files'
           def load(self):
               """
               Load redmine files url and extract downloads. Also parse date to be
               able to identify latest download.
               """
               spliturl = urlparse.urlsplit(self.link)
               baseurl = spliturl.geturl().replace(spliturl.path, '')
               html = self.get_url(self.link)
               files = html.xpath("//tr[contains(@class, 'file')]")
               # helper lambdas
               def _parse_row(row):
                   name = row.xpath("td[@class='filename']/a")[0].text
                   link = baseurl + row.xpath("td[@class='filename']/a")[0].attrib.get('href')
                   strdate = row.xpath("td[@class='created_on']")[0].text
                   date = datetime.strptime(strdate, '%m/%d/%Y %I:%M %p')
                   return {'name': name, 'link': link, 'date': date, 'version': None}
               versions = map(_parse_row, files)
               versions.sort(reverse=True, key=lambda r:r['date'])
               self.versions = versions
               return self
       class DirectSource(VersionSource):
           """
           Direct link to a version. Dummy VersionSource which always returns one entry
           for the provided link.
           """
           source_type = 'direct'
           def load(self):
               self.versions = [{'name': self.name, 'link': self.link, 'date': None}]
               return self.versions
       class LinkSource(DirectSource):
           """
           Used when version exists in some other url (e.g. apple store client)
           """
           source_type = 'link'
       class ClientVersions(object):
           """
           Client versions manager. Given a sources dict like::
           {'windows': {'source_type': 'direct', 'args':
           ['http://clients.com/win.exe'], 'kwargs': {}},
            'linux': {'redmine_files': 'direct',
            'args': ['http://redmine.com/projects/client/files'],
            'kwargs': {}}}
           initializes a dict of proper VersionSource objects.
           """
           def __init__(self, sources, cache_backend=None):
               self._sources = sources
               self.sources = {}
               for s in self._sources:
                   source_params = self._sources.get(s)
                   if source_params['type'] in SOURCE_TYPES:
                       kwargs = {'os': s, 'cache_backend': cache_backend}
                       kwargs['source_params'] = \
                               source_params.get('source', {}).__dict__
                       args = source_params.get('args', [])
                       self.sources[s] = SOURCE_TYPES[source_params['type']](*args,
                                                                             **kwargs)
           def get_latest_versions(self):
               """
               Return the latest version of each version source.
               """
               for os, source in self.sources.iteritems():
                   yield source.get_latest()
       class PithosXMLSource(VersionSource):
           """
           Extract version from versioninfo.xml
           """
           source_type = 'pithos_xml'
           def load(self):
               """
               Extract first item from versioninfo.xml
               """
               spliturl = urlparse.urlsplit(self.link)
               baseurl = spliturl.geturl().replace(spliturl.path, '')
               html = self.get_url(self.link)
               items = html.xpath("//item")
               # helper lambdas
               def _parse_row(row):
                   try:
                       name = row.find("title").text
                       link = row.find("enclosure").attrib["url"]
                       strdate = row.find("pubdate").text
                       date = datetime.strptime(strdate.split(" +")[0],
                                                "%a, %d %B %Y %H:%M:%S")
                       version = row.find("title").text
                       return {
                           'name': name,
                           'link': link,
                           'date': date,
                           'version': version
+                      }
                   except Exception, e:
                       return None
               versions = filter(bool, map(_parse_row, items))
               self.versions = versions
               return self
       # SOURCE TYPES CLASS MAP
       SOURCE_TYPES = {
           'redmine_files': RedmineSource,
           'direct': DirectSource,
           'pithos_xml': PithosXMLSource,
           'link': LinkSource,
           'url_extract': ExtractSource
+      }

Synnefo » snf-cloudcms

root / cloudcms / clients.py @ 67114d6e