Revision 67114d6e

b/cloudcms/clients.py
5 5
sources (e.g. redmine files page).
6 6
"""
7 7

  
8
import urllib, urllib2, cookielib, urlparse
8
import urllib, urllib2, cookielib, urlparse, re
9 9

  
10 10
from datetime import datetime
11 11
from lxml import html
......
19 19
    Base class for the different version source handlers.
20 20
    """
21 21
    def __init__(self, link=None, os="linux", arch="all", regex=".", name=None,
22
            cache_backend=None, extra_options={}):
22
            cache_backend=None, extra_options={}, source_params={}):
23 23
        self.os = os
24 24
        self.arch = arch
25 25
        self.link = link
26 26
        self.versions = []
27
        self.source_params = source_params
27 28
        self.extra_version_options = {}
28 29
        self.extra_version_options.update(extra_options)
29
        self.extra_version_options.update({'source_type': self.source_type, 'os': self.os})
30
        self.extra_version_options.update({'source_type': self.source_type,
31
                                           'os': self.os})
30 32

  
31 33
        self.cache_backend = cache_backend
32 34
        self.cache_key = self.os + self.arch + self.link
......
89 91
        return None
90 92

  
91 93

  
94
class ExtractSource(VersionSource):
95
    """
96
    Parse an html page and return the list of existing urls that match the
97
    provided regex.
98
    """
99
    source_type = 'url_extract'
100

  
101
    def load(self):
102
        """
103
        """
104
        spliturl = urlparse.urlsplit(self.link)
105
        baseurl = spliturl.geturl().replace(spliturl.path, '')
106
        html = self.get_url(self.link)
107
        anchors = html.xpath("//a")
108
        regex = re.compile(self.source_params.get('file_regex'))
109

  
110
        # helper lambdas
111
        def _parse_file(row):
112
            name = row.attrib.get("href")
113
            return {'name': self.name, 'link': link, 'version': None}
114

  
115
        files = []
116
        for anchor in anchors:
117
            href = anchor.attrib.get("href")
118
            if not "/" in href:
119
                continue
120

  
121
            fname = href.split("/")[-1]
122
            if not regex.match(fname):
123
                continue
124

  
125
            data = {
126
                'name': anchor.attrib.get("data-name") or self.name,
127
                'link': anchor.attrib.get("href"),
128
                'version': anchor.attrib.get("data-version") or None
129
            }
130
            files.append(data)
131

  
132
        self.versions = files
133
        return self
134

  
135

  
92 136
class RedmineSource(VersionSource):
93 137
    """
94 138
    Parse a redmine project files page and return the list of existing files.
......
159 203
            source_params = self._sources.get(s)
160 204
            if source_params['type'] in SOURCE_TYPES:
161 205
                kwargs = {'os': s, 'cache_backend': cache_backend}
206
                kwargs['source_params'] = \
207
                        source_params.get('source', {}).__dict__
162 208
                args = source_params.get('args', [])
163
                self.sources[s] = SOURCE_TYPES[source_params['type']](*args, **kwargs)
209
                self.sources[s] = SOURCE_TYPES[source_params['type']](*args,
210
                                                                      **kwargs)
164 211

  
165 212
    def get_latest_versions(self):
166 213
        """
......
194 241
                strdate = row.find("pubdate").text
195 242
                date = datetime.strptime(strdate.split(" +")[0],
196 243
                                         "%a, %d %B %Y %H:%M:%S")
197
                print "DATE", date
198 244
                version = row.find("title").text
199 245
                return {
200 246
                    'name': name,
......
215 261
    'redmine_files': RedmineSource,
216 262
    'direct': DirectSource,
217 263
    'pithos_xml': PithosXMLSource,
218
    'link': LinkSource
264
    'link': LinkSource,
265
    'url_extract': ExtractSource
219 266
}
b/cloudcms/models.py
126 126
        from cloudcms.clients import ClientVersions
127 127
        for s in self.clientversionsource_set.all():
128 128
            sources[s.os] = {'type': s.source_type,
129
                             'args': [s.link]}
129
                             'args': [s.link], 'source': s}
130 130

  
131 131
        return ClientVersions(sources, cache_backend=cache)
132 132

  
......
155 155
                                   choices=(('link', 'Link'),
156 156
                                            ('direct', 'Direct'),
157 157
                                            ('pithos_xml', 'Pithos XML'),
158
                                            ('url_extract',
159
                                             'Extract url using regex'),
158 160
                                            ('redmine_files','Redmine files')))
159 161
    os = models.CharField(max_length=255)
160 162
    link = models.CharField(max_length=255)

Also available in: Unified diff