Statistics
| Branch: | Revision:

root / drivers / tapdisk-syslog.c @ abdb293f

History | View | Annotate | Download (11.2 kB)

1
/*
2
 * Copyright (c) 2009, XenSource Inc.
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *     * Redistributions of source code must retain the above copyright
8
 *       notice, this list of conditions and the following disclaimer.
9
 *     * Redistributions in binary form must reproduce the above copyright
10
 *       notice, this list of conditions and the following disclaimer in the
11
 *       documentation and/or other materials provided with the distribution.
12
 *     * Neither the name of XenSource Inc. nor the names of its contributors
13
 *       may be used to endorse or promote products derived from this software
14
 *       without specific prior written permission.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28

    
29
/*
30
 * A non-blocking, buffered BSD syslog client.
31
 *
32
 * http://www.ietf.org/rfc/rfc3164.txt (FIXME: Read this.)
33
 */
34

    
35
#ifdef HAVE_CONFIG_H
36
#include "config.h"
37
#endif
38

    
39
#define _ISOC99_SOURCE
40
#include <stdlib.h>
41
#include <stdio.h>
42
#include <errno.h>
43
#include <unistd.h>
44
#include <time.h>
45
#include <fcntl.h>
46
#include <stdarg.h>
47
#include <sys/mman.h>
48
#include <sys/socket.h>
49
#include <sys/un.h>
50

    
51
#include "tapdisk-server.h"
52
#include "tapdisk-syslog.h"
53
#include "tapdisk-utils.h"
54

    
55
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
56

    
57
static int tapdisk_syslog_sock_send(td_syslog_t *log,
58
                                    const void *msg, size_t size);
59
static int tapdisk_syslog_sock_connect(td_syslog_t *log);
60

    
61
static void tapdisk_syslog_sock_mask(td_syslog_t *log);
62
static void tapdisk_syslog_sock_unmask(td_syslog_t *log);
63

    
64
static const struct sockaddr_un syslog_addr = {
65
        .sun_family = AF_UNIX,
66
        .sun_path   = "/dev/log"
67
};
68

    
69
#define RING_PTR(_log, _idx)                                            \
70
        (&(_log)->ring[(_idx) % (_log)->ringsz])
71

    
72
#define RING_FREE(_log)                                                 \
73
        ((_log)->ringsz - ((_log)->prod - (_log)->cons))
74

    
75
/*
76
 * NB. Ring buffer.
77
 *
78
 * We allocate a number of pages as indicated by @bufsz during
79
 * initialization. From that, 1K is reserved for message staging, the
80
 * rest is cyclic ring space.
81
 *
82
 * All producer/consumer offsets wrap on size_t range, not buffer
83
 * size. Hence the RING() macros.
84
 */
85

    
86
static void
87
__tapdisk_syslog_ring_init(td_syslog_t *log)
88
{
89
        log->buf     = NULL;
90
        log->bufsz   = 0;
91
        log->msg     = NULL;
92
        log->ring    = NULL;
93
        log->ringsz  = 0;
94
}
95

    
96
static inline size_t
97
page_align(size_t size)
98
{
99
        size_t page_size = sysconf(_SC_PAGE_SIZE);
100
        return (size + page_size - 1) & ~(page_size - 1);
101
}
102

    
103
static void
104
tapdisk_syslog_ring_uninit(td_syslog_t *log)
105
{
106
        if (log->buf)
107
                munmap(log->buf, log->bufsz);
108

    
109
        __tapdisk_syslog_ring_init(log);
110
}
111

    
112
static int
113
tapdisk_syslog_ring_init(td_syslog_t *log, size_t size)
114
{
115
        int prot, flags, err;
116

    
117
        __tapdisk_syslog_ring_init(log);
118

    
119
        log->bufsz = page_align(size);
120

    
121
        prot  = PROT_READ|PROT_WRITE;
122
        flags = MAP_ANONYMOUS|MAP_PRIVATE;
123

    
124
        log->buf = mmap(NULL, log->bufsz, prot, flags, -1, 0);
125
        if (log->buf == MAP_FAILED) {
126
                log->buf = NULL;
127
                err = -ENOMEM;
128
                goto fail;
129
        }
130

    
131
        err = mlock(log->buf, size);
132
        if (err) {
133
                err = -errno;
134
                goto fail;
135
        }
136

    
137
        log->msg    = log->buf;
138
        log->ring   = log->buf + TD_SYSLOG_PACKET_MAX;
139
        log->ringsz = size     - TD_SYSLOG_PACKET_MAX;
140

    
141
        return 0;
142

    
143
fail:
144
        tapdisk_syslog_ring_uninit(log);
145

    
146
        return err;
147
}
148

    
149
static int
150
tapdisk_syslog_ring_write_str(td_syslog_t *log, const char *msg, size_t len)
151
{
152
        size_t size, prod, i;
153

    
154
        len  = MIN(len, TD_SYSLOG_PACKET_MAX);
155
        size = len + 1;
156

    
157
        if (size > RING_FREE(log))
158
                return -ENOBUFS;
159

    
160
        prod = log->prod;
161

    
162
        for (i = 0; i < len; ++i) {
163
                char c;
164

    
165
                c = msg[i];
166
                if (c == 0)
167
                        break;
168

    
169
                *RING_PTR(log, prod) = c;
170
                prod++;
171
        }
172

    
173
        *RING_PTR(log, prod) = 0;
174

    
175
        log->prod = prod + 1;
176

    
177
        return 0;
178
}
179

    
180
static ssize_t
181
tapdisk_syslog_ring_read_pkt(td_syslog_t *log, char *msg, size_t size)
182
{
183
        size_t cons;
184
        ssize_t sz;
185

    
186
        size = MIN(size, TD_SYSLOG_PACKET_MAX);
187

    
188
        sz   = 0;
189
        cons = log->cons;
190

    
191
        while (sz < size) {
192
                char c;
193

    
194
                if (cons == log->prod)
195
                        break;
196

    
197
                c = *RING_PTR(log, cons);
198
                msg[sz++] = c;
199
                cons++;
200

    
201
                if (c == 0)
202
                        break;
203
        }
204

    
205
        return sz - 1;
206
}
207

    
208
static int
209
tapdisk_syslog_ring_dispatch_one(td_syslog_t *log)
210
{
211
        size_t len;
212
        int err;
213

    
214
        len = tapdisk_syslog_ring_read_pkt(log, log->msg,
215
                                           TD_SYSLOG_PACKET_MAX);
216
        if (len == -1)
217
                return -ENOMSG;
218

    
219
        err = tapdisk_syslog_sock_send(log, log->msg, len);
220

    
221
        if (err == -EAGAIN)
222
                return err;
223

    
224
        if (err)
225
                goto fail;
226

    
227
done:
228
        log->cons += len + 1;
229
        return 0;
230

    
231
fail:
232
        log->stats.fails++;
233
        goto done;
234
}
235

    
236
static void
237
tapdisk_syslog_ring_warning(td_syslog_t *log)
238
{
239
        int n, err;
240

    
241
        n        = log->oom;
242
        log->oom = 0;
243

    
244
        err = tapdisk_syslog(log, LOG_WARNING,
245
                             "tapdisk-syslog: %d messages dropped", n);
246
        if (err)
247
                log->oom = n;
248
}
249

    
250
static void
251
tapdisk_syslog_ring_dispatch(td_syslog_t *log)
252
{
253
        int err;
254

    
255
        do {
256
                err = tapdisk_syslog_ring_dispatch_one(log);
257
        } while (!err);
258

    
259
        if (log->oom)
260
                tapdisk_syslog_ring_warning(log);
261
}
262

    
263
static int
264
tapdisk_syslog_vsprintf(char *buf, size_t size,
265
                        int prio, const struct timeval *tv, const char *ident,
266
                        const char *fmt, va_list ap)
267
{
268
        char tsbuf[TD_SYSLOG_STRTIME_LEN+1];
269
        size_t len;
270

    
271
        /*
272
         * PKT       := PRI HEADER MSG
273
         * PRI       := "<" {"0" .. "9"} ">"
274
         * HEADER    := TIMESTAMP HOSTNAME
275
         * MSG       := <TAG> <SEP> <CONTENT>
276
         * SEP       := ":" | " " | "["
277
         */
278

    
279
        tapdisk_syslog_strftime(tsbuf, sizeof(tsbuf), tv);
280

    
281
        len = 0;
282

    
283
        /* NB. meant to work with c99 null buffers */
284

    
285
        len += snprintf(buf ? buf + len : NULL, buf ? size - len : 0,
286
                        "<%d>%s %s: ", prio, tsbuf, ident);
287

    
288
        len += vsnprintf(buf ? buf + len : NULL, buf ? size - len : 0,
289
                         fmt, ap);
290

    
291
        return MIN(len, size);
292
}
293

    
294
/*
295
 * NB. Sockets.
296
 *
297
 * Syslog is based on a connectionless (DGRAM) unix transport.
298
 *
299
 * While it is reliable, we cannot block on syslogd because -- as with
300
 * any IPC in tapdisk -- we could deadlock in page I/O writeback.
301
 * Hence the syslog(3) avoidance on the datapath, which this code
302
 * facilitates.
303
 *
304
 * This type of socket has a single (global) receive buffer on
305
 * syslogd's end, but no send buffer at all. The does just that:
306
 * headroom on the sender side.
307
 *
308
 * The transport is rather stateless, but we still need to connect()
309
 * the socket, or select() will find no receive buffer to block
310
 * on. While we never disconnect, connections are unreliable because
311
 * syslog may shut down.
312
 *
313
 * Reconnection will be attempted with every user message submitted.
314
 * Any send() or connect() failure other than EAGAIN discards the
315
 * message. Also, the write event handler will go on to discard any
316
 * remaining ring contents as well, once the socket is disconnected.
317
 *
318
 * In summary, no attempts to mask service blackouts in here.
319
 */
320

    
321
int
322
tapdisk_vsyslog(td_syslog_t *log, int prio, const char *fmt, va_list ap)
323
{
324
        struct timeval now;
325
        size_t len;
326
        int err;
327

    
328
        gettimeofday(&now, NULL);
329

    
330
        len = tapdisk_syslog_vsprintf(log->msg, TD_SYSLOG_PACKET_MAX,
331
                                      prio | log->facility,
332
                                      &now, log->ident, fmt, ap);
333

    
334
        log->stats.count += 1;
335
        log->stats.bytes += len;
336

    
337
        if (log->cons != log->prod)
338
                goto busy;
339

    
340
send:
341
        err = tapdisk_syslog_sock_send(log, log->msg, len);
342
        if (!err)
343
                return 0;
344

    
345
        if (err == -ENOTCONN) {
346
                err = tapdisk_syslog_sock_connect(log);
347
                if (!err)
348
                        goto send;
349
        }
350

    
351
        if (err != -EAGAIN)
352
                goto fail;
353

    
354
        tapdisk_syslog_sock_unmask(log);
355

    
356
busy:
357
        if (log->oom) {
358
                err = -ENOBUFS;
359
                goto oom;
360
        }
361

    
362
        err = tapdisk_syslog_ring_write_str(log, log->msg, len);
363
        if (!err)
364
                return 0;
365

    
366
        log->oom_tv = now;
367

    
368
oom:
369
        log->oom++;
370
        log->stats.drops++;
371
        return err;
372

    
373
fail:
374
        log->stats.fails++;
375
        return err;
376
}
377

    
378
int
379
tapdisk_syslog(td_syslog_t *log, int prio, const char *fmt, ...)
380
{
381
        va_list ap;
382
        int err;
383

    
384
        va_start(ap, fmt);
385
        err = tapdisk_vsyslog(log, prio, fmt, ap);
386
        va_end(ap);
387

    
388
        return err;
389
}
390

    
391
static int
392
tapdisk_syslog_sock_send(td_syslog_t *log, const void *msg, size_t size)
393
{
394
        ssize_t n;
395

    
396
        log->stats.xmits++;
397

    
398
        n = send(log->sock, msg, size, MSG_DONTWAIT);
399
        if (n < 0)
400
                return -errno;
401

    
402
        return 0;
403
}
404

    
405
static void
406
tapdisk_syslog_sock_event(event_id_t id, char mode, void *private)
407
{
408
        td_syslog_t *log = private;
409

    
410
        tapdisk_syslog_ring_dispatch(log);
411

    
412
        if (log->cons == log->prod)
413
                tapdisk_syslog_sock_mask(log);
414
}
415

    
416
static void
417
__tapdisk_syslog_sock_init(td_syslog_t *log)
418
{
419
        log->sock     = -1;
420
        log->event_id = -1;
421
}
422

    
423
static void
424
tapdisk_syslog_sock_close(td_syslog_t *log)
425
{
426
        if (log->sock >= 0)
427
                close(log->sock);
428

    
429
        if (log->event_id >= 0)
430
                tapdisk_server_unregister_event(log->event_id);
431

    
432
        __tapdisk_syslog_sock_init(log);
433
}
434

    
435
static int
436
tapdisk_syslog_sock_open(td_syslog_t *log)
437
{
438
        event_id_t id;
439
        int s, err;
440

    
441
        __tapdisk_syslog_sock_init(log);
442

    
443
        s = socket(PF_UNIX, SOCK_DGRAM, 0);
444
        if (s < 0) {
445
                err = -errno;
446
                goto fail;
447
        }
448

    
449
        log->sock = s;
450

    
451
#if 0
452
        err = fcntl(s, F_SETFL, O_NONBLOCK);
453
        if (err < 0) {
454
                err = -errno;
455
                goto fail;
456
        }
457
#endif
458

    
459
        id = tapdisk_server_register_event(SCHEDULER_POLL_WRITE_FD,
460
                                           s, 0,
461
                                           tapdisk_syslog_sock_event,
462
                                           log);
463
        if (id < 0) {
464
                err = id;
465
                goto fail;
466
        }
467

    
468
        log->event_id = id;
469

    
470
        tapdisk_syslog_sock_mask(log);
471

    
472
        return 0;
473

    
474
fail:
475
        tapdisk_syslog_sock_close(log);
476
        return err;
477
}
478

    
479
static int
480
tapdisk_syslog_sock_connect(td_syslog_t *log)
481
{
482
        int err;
483

    
484
        err = connect(log->sock, &syslog_addr, sizeof(syslog_addr));
485
        if (err < 0)
486
                err = -errno;
487

    
488
        return err;
489
}
490

    
491
static void
492
tapdisk_syslog_sock_mask(td_syslog_t *log)
493
{
494
        tapdisk_server_mask_event(log->event_id, 1);
495
}
496

    
497
static void
498
tapdisk_syslog_sock_unmask(td_syslog_t *log)
499
{
500
        tapdisk_server_mask_event(log->event_id, 0);
501
}
502

    
503
void
504
__tapdisk_syslog_init(td_syslog_t *log)
505
{
506
        memset(log, 0, sizeof(td_syslog_t));
507
        __tapdisk_syslog_sock_init(log);
508
        __tapdisk_syslog_ring_init(log);
509
}
510

    
511
void
512
tapdisk_syslog_close(td_syslog_t *log)
513
{
514
        tapdisk_syslog_ring_uninit(log);
515
        tapdisk_syslog_sock_close(log);
516

    
517
        if (log->ident)
518
                free(log->ident);
519

    
520
        __tapdisk_syslog_init(log);
521
}
522

    
523
int
524
tapdisk_syslog_open(td_syslog_t *log, const char *ident, int facility, size_t bufsz)
525
{
526
        int err;
527

    
528
        __tapdisk_syslog_init(log);
529

    
530
        log->facility = facility;
531
        log->ident = ident ? strndup(ident, TD_SYSLOG_IDENT_MAX) : NULL;
532

    
533
        err = tapdisk_syslog_sock_open(log);
534
        if (err)
535
                goto fail;
536

    
537
        err = tapdisk_syslog_ring_init(log, bufsz);
538
        if (err)
539
                goto fail;
540

    
541
        return 0;
542

    
543
fail:
544
        tapdisk_syslog_close(log);
545

    
546
        return err;
547
}
548

    
549
void
550
tapdisk_syslog_stats(td_syslog_t *log, int prio)
551
{
552
        struct _td_syslog_stats *s = &log->stats;
553

    
554
        tapdisk_syslog(log, prio,
555
                       "tapdisk-syslog: %llu messages, %llu bytes, "
556
                       "xmits: %llu, failed: %llu, dropped: %llu",
557
                       s->count, s->bytes,
558
                       s->xmits, s->fails, s->drops);
559
}
560

    
561
void
562
tapdisk_syslog_flush(td_syslog_t *log)
563
{
564
        while (log->cons != log->prod)
565
                tapdisk_server_iterate();
566
}