5 |
5 |
sources (e.g. redmine files page).
|
6 |
6 |
"""
|
7 |
7 |
|
8 |
|
import urllib, urllib2, cookielib, urlparse
|
|
8 |
import urllib, urllib2, cookielib, urlparse, re
|
9 |
9 |
|
10 |
10 |
from datetime import datetime
|
11 |
11 |
from lxml import html
|
... | ... | |
19 |
19 |
Base class for the different version source handlers.
|
20 |
20 |
"""
|
21 |
21 |
def __init__(self, link=None, os="linux", arch="all", regex=".", name=None,
|
22 |
|
cache_backend=None, extra_options={}):
|
|
22 |
cache_backend=None, extra_options={}, source_params={}):
|
23 |
23 |
self.os = os
|
24 |
24 |
self.arch = arch
|
25 |
25 |
self.link = link
|
26 |
26 |
self.versions = []
|
|
27 |
self.source_params = source_params
|
27 |
28 |
self.extra_version_options = {}
|
28 |
29 |
self.extra_version_options.update(extra_options)
|
29 |
|
self.extra_version_options.update({'source_type': self.source_type, 'os': self.os})
|
|
30 |
self.extra_version_options.update({'source_type': self.source_type,
|
|
31 |
'os': self.os})
|
30 |
32 |
|
31 |
33 |
self.cache_backend = cache_backend
|
32 |
34 |
self.cache_key = self.os + self.arch + self.link
|
... | ... | |
89 |
91 |
return None
|
90 |
92 |
|
91 |
93 |
|
|
94 |
class ExtractSource(VersionSource):
|
|
95 |
"""
|
|
96 |
Parse an html page and return the list of existing urls that match the
|
|
97 |
provided regex.
|
|
98 |
"""
|
|
99 |
source_type = 'url_extract'
|
|
100 |
|
|
101 |
def load(self):
|
|
102 |
"""
|
|
103 |
"""
|
|
104 |
spliturl = urlparse.urlsplit(self.link)
|
|
105 |
baseurl = spliturl.geturl().replace(spliturl.path, '')
|
|
106 |
html = self.get_url(self.link)
|
|
107 |
anchors = html.xpath("//a")
|
|
108 |
regex = re.compile(self.source_params.get('file_regex'))
|
|
109 |
|
|
110 |
# helper lambdas
|
|
111 |
def _parse_file(row):
|
|
112 |
name = row.attrib.get("href")
|
|
113 |
return {'name': self.name, 'link': link, 'version': None}
|
|
114 |
|
|
115 |
files = []
|
|
116 |
for anchor in anchors:
|
|
117 |
href = anchor.attrib.get("href")
|
|
118 |
if not "/" in href:
|
|
119 |
continue
|
|
120 |
|
|
121 |
fname = href.split("/")[-1]
|
|
122 |
if not regex.match(fname):
|
|
123 |
continue
|
|
124 |
|
|
125 |
data = {
|
|
126 |
'name': anchor.attrib.get("data-name") or self.name,
|
|
127 |
'link': anchor.attrib.get("href"),
|
|
128 |
'version': anchor.attrib.get("data-version") or None
|
|
129 |
}
|
|
130 |
files.append(data)
|
|
131 |
|
|
132 |
self.versions = files
|
|
133 |
return self
|
|
134 |
|
|
135 |
|
92 |
136 |
class RedmineSource(VersionSource):
|
93 |
137 |
"""
|
94 |
138 |
Parse a redmine project files page and return the list of existing files.
|
... | ... | |
159 |
203 |
source_params = self._sources.get(s)
|
160 |
204 |
if source_params['type'] in SOURCE_TYPES:
|
161 |
205 |
kwargs = {'os': s, 'cache_backend': cache_backend}
|
|
206 |
kwargs['source_params'] = \
|
|
207 |
source_params.get('source', {}).__dict__
|
162 |
208 |
args = source_params.get('args', [])
|
163 |
|
self.sources[s] = SOURCE_TYPES[source_params['type']](*args, **kwargs)
|
|
209 |
self.sources[s] = SOURCE_TYPES[source_params['type']](*args,
|
|
210 |
**kwargs)
|
164 |
211 |
|
165 |
212 |
def get_latest_versions(self):
|
166 |
213 |
"""
|
... | ... | |
194 |
241 |
strdate = row.find("pubdate").text
|
195 |
242 |
date = datetime.strptime(strdate.split(" +")[0],
|
196 |
243 |
"%a, %d %B %Y %H:%M:%S")
|
197 |
|
print "DATE", date
|
198 |
244 |
version = row.find("title").text
|
199 |
245 |
return {
|
200 |
246 |
'name': name,
|
... | ... | |
215 |
261 |
'redmine_files': RedmineSource,
|
216 |
262 |
'direct': DirectSource,
|
217 |
263 |
'pithos_xml': PithosXMLSource,
|
218 |
|
'link': LinkSource
|
|
264 |
'link': LinkSource,
|
|
265 |
'url_extract': ExtractSource
|
219 |
266 |
}
|