Statistics
| Branch: | Tag: | Revision:

root / cloudcms / clients.py @ 67114d6e

History | View | Annotate | Download (8 kB)

1
"""
2
CMS dynamic application clients module
3

4
Helper module to automatically retrieve client download links from different
5
sources (e.g. redmine files page).
6
"""
7

    
8
import urllib, urllib2, cookielib, urlparse, re
9

    
10
from datetime import datetime
11
from lxml import html
12

    
13
from django.conf import settings
14

    
15
CLIENTS_CACHE_TIMEOUT = getattr(settings, 'CLOUDCMS_CLIENTS_CACHE_TIMEOUT', 120)
16

    
17
class VersionSource(object):
18
    """
19
    Base class for the different version source handlers.
20
    """
21
    def __init__(self, link=None, os="linux", arch="all", regex=".", name=None,
22
            cache_backend=None, extra_options={}, source_params={}):
23
        self.os = os
24
        self.arch = arch
25
        self.link = link
26
        self.versions = []
27
        self.source_params = source_params
28
        self.extra_version_options = {}
29
        self.extra_version_options.update(extra_options)
30
        self.extra_version_options.update({'source_type': self.source_type,
31
                                           'os': self.os})
32

    
33
        self.cache_backend = cache_backend
34
        self.cache_key = self.os + self.arch + self.link
35

    
36
        if not name:
37
            self.name = os
38

    
39
        # generic urllib2 opener
40
        self.opener = urllib2.build_opener(
41
                    urllib2.HTTPRedirectHandler(),
42
                    urllib2.HTTPHandler(debuglevel=0),
43
                    urllib2.HTTPSHandler(debuglevel=0),
44
                    urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
45

    
46
    def get_url(self, url):
47
        """
48
        Load url content and return the html etree object.
49
        """
50
        return html.document_fromstring(self.opener.open(url).read())
51

    
52
    def load(self):
53
        """
54
        Fill self.versions attribute with dict objects of the following format
55

56
        {'date': datetime.datetime(2012, 3, 16, 14, 29),
57
         'link': 'http://www.domain.com/clientdownload.exe',
58
         'name': 'Client download',
59
         'os': 'windows',
60
         'version': None}
61
        """
62
        raise NotImplemented
63

    
64
    def update(self):
65
        """
66
        Load wrapper which handles versions caching if cache_backend is set
67
        """
68
        if self.cache_backend:
69
            self.versions = self.cache_backend.get(self.cache_key)
70

    
71
        if not self.versions:
72
            self.load()
73

    
74
        if self.cache_backend:
75
            self.cache_backend.set(self.cache_key, self.versions, CLIENTS_CACHE_TIMEOUT)
76

    
77
    def get_latest(self):
78
        """
79
        Return the latest versions
80
        """
81

    
82
        # update versions
83
        self.update()
84

    
85
        # check that at least one version is available
86
        if len(self.versions):
87
            version = self.versions[0]
88
            version.update(self.extra_version_options)
89
            return version
90

    
91
        return None
92

    
93

    
94
class ExtractSource(VersionSource):
95
    """
96
    Parse an html page and return the list of existing urls that match the
97
    provided regex.
98
    """
99
    source_type = 'url_extract'
100

    
101
    def load(self):
102
        """
103
        """
104
        spliturl = urlparse.urlsplit(self.link)
105
        baseurl = spliturl.geturl().replace(spliturl.path, '')
106
        html = self.get_url(self.link)
107
        anchors = html.xpath("//a")
108
        regex = re.compile(self.source_params.get('file_regex'))
109

    
110
        # helper lambdas
111
        def _parse_file(row):
112
            name = row.attrib.get("href")
113
            return {'name': self.name, 'link': link, 'version': None}
114

    
115
        files = []
116
        for anchor in anchors:
117
            href = anchor.attrib.get("href")
118
            if not "/" in href:
119
                continue
120

    
121
            fname = href.split("/")[-1]
122
            if not regex.match(fname):
123
                continue
124

    
125
            data = {
126
                'name': anchor.attrib.get("data-name") or self.name,
127
                'link': anchor.attrib.get("href"),
128
                'version': anchor.attrib.get("data-version") or None
129
            }
130
            files.append(data)
131

    
132
        self.versions = files
133
        return self
134

    
135

    
136
class RedmineSource(VersionSource):
137
    """
138
    Parse a redmine project files page and return the list of existing files.
139
    """
140
    source_type = 'redmine_files'
141

    
142
    def load(self):
143
        """
144
        Load redmine files url and extract downloads. Also parse date to be
145
        able to identify latest download.
146
        """
147
        spliturl = urlparse.urlsplit(self.link)
148
        baseurl = spliturl.geturl().replace(spliturl.path, '')
149
        html = self.get_url(self.link)
150
        files = html.xpath("//tr[contains(@class, 'file')]")
151

    
152
        # helper lambdas
153
        def _parse_row(row):
154
            name = row.xpath("td[@class='filename']/a")[0].text
155
            link = baseurl + row.xpath("td[@class='filename']/a")[0].attrib.get('href')
156
            strdate = row.xpath("td[@class='created_on']")[0].text
157
            date = datetime.strptime(strdate, '%m/%d/%Y %I:%M %p')
158
            return {'name': name, 'link': link, 'date': date, 'version': None}
159

    
160
        versions = map(_parse_row, files)
161
        versions.sort(reverse=True, key=lambda r:r['date'])
162
        self.versions = versions
163
        return self
164

    
165

    
166
class DirectSource(VersionSource):
167
    """
168
    Direct link to a version. Dummy VersionSource which always returns one entry
169
    for the provided link.
170
    """
171
    source_type = 'direct'
172

    
173
    def load(self):
174
        self.versions = [{'name': self.name, 'link': self.link, 'date': None}]
175
        return self.versions
176

    
177

    
178
class LinkSource(DirectSource):
179
    """
180
    Used when version exists in some other url (e.g. apple store client)
181
    """
182
    source_type = 'link'
183

    
184

    
185
class ClientVersions(object):
186
    """
187
    Client versions manager. Given a sources dict like::
188

189
    {'windows': {'source_type': 'direct', 'args':
190
    ['http://clients.com/win.exe'], 'kwargs': {}},
191
     'linux': {'redmine_files': 'direct',
192
     'args': ['http://redmine.com/projects/client/files'],
193
     'kwargs': {}}}
194

195
    initializes a dict of proper VersionSource objects.
196
    """
197

    
198
    def __init__(self, sources, cache_backend=None):
199
        self._sources = sources
200
        self.sources = {}
201

    
202
        for s in self._sources:
203
            source_params = self._sources.get(s)
204
            if source_params['type'] in SOURCE_TYPES:
205
                kwargs = {'os': s, 'cache_backend': cache_backend}
206
                kwargs['source_params'] = \
207
                        source_params.get('source', {}).__dict__
208
                args = source_params.get('args', [])
209
                self.sources[s] = SOURCE_TYPES[source_params['type']](*args,
210
                                                                      **kwargs)
211

    
212
    def get_latest_versions(self):
213
        """
214
        Return the latest version of each version source.
215
        """
216
        for os, source in self.sources.iteritems():
217
            yield source.get_latest()
218

    
219

    
220

    
221
class PithosXMLSource(VersionSource):
222
    """
223
    Extract version from versioninfo.xml
224
    """
225
    source_type = 'pithos_xml'
226

    
227
    def load(self):
228
        """
229
        Extract first item from versioninfo.xml
230
        """
231
        spliturl = urlparse.urlsplit(self.link)
232
        baseurl = spliturl.geturl().replace(spliturl.path, '')
233
        html = self.get_url(self.link)
234
        items = html.xpath("//item")
235

    
236
        # helper lambdas
237
        def _parse_row(row):
238
            try:
239
                name = row.find("title").text
240
                link = row.find("enclosure").attrib["url"]
241
                strdate = row.find("pubdate").text
242
                date = datetime.strptime(strdate.split(" +")[0],
243
                                         "%a, %d %B %Y %H:%M:%S")
244
                version = row.find("title").text
245
                return {
246
                    'name': name,
247
                    'link': link,
248
                    'date': date,
249
                    'version': version
250
                }
251
            except Exception, e:
252
                return None
253

    
254
        versions = filter(bool, map(_parse_row, items))
255
        self.versions = versions
256
        return self
257

    
258

    
259
# SOURCE TYPES CLASS MAP
260
SOURCE_TYPES = {
261
    'redmine_files': RedmineSource,
262
    'direct': DirectSource,
263
    'pithos_xml': PithosXMLSource,
264
    'link': LinkSource,
265
    'url_extract': ExtractSource
266
}