categraf/inputs/mtail/internal/tailer/logstream/filestream.go

282 lines
9.1 KiB
Go

// Copyright 2020 Google Inc. All Rights Reserved.
// This file is available under the Apache license.
package logstream
import (
"bytes"
"context"
"errors"
"expvar"
"io"
"log"
"os"
"sync"
"syscall"
"time"
"flashcat.cloud/categraf/inputs/mtail/internal/logline"
"flashcat.cloud/categraf/inputs/mtail/internal/waker"
)
// fileTruncates counts the truncations of a file stream.
var fileTruncates = expvar.NewMap("file_truncates_total")
// fileStream streams log lines from a regular file on the file system. These
// log files are appended to by another process, and are either rotated or
// truncated by that (or yet another) process. Rotation implies that a new
// inode with the same name has been created, the old file descriptor will be
// valid until EOF at which point it's considered completed. A truncation means
// the same file descriptor is used but the file offset will be reset to 0.
// The latter is potentially lossy as far as mtail is concerned, if the last
// logs are not read before truncation occurs. When an EOF is read, the
// goroutine tests for both truncation and inode change and resets or spins off
// a new goroutine and closes itself down. The shared context is used for
// cancellation.
type fileStream struct {
ctx context.Context
lines chan<- *logline.LogLine
pathname string // Given name for the underlying file on the filesystem
mu sync.RWMutex // protects following fields.
lastReadTime time.Time // Last time a log line was read from this file
completed bool // The filestream is completed and can no longer be used.
stopOnce sync.Once // Ensure stopChan only closed once.
stopChan chan struct{} // Close to start graceful shutdown.
}
// newFileStream creates a new log stream from a regular file.
func newFileStream(ctx context.Context, wg *sync.WaitGroup, waker waker.Waker, pathname string, fi os.FileInfo, lines chan<- *logline.LogLine, streamFromStart bool) (LogStream, error) {
fs := &fileStream{ctx: ctx, pathname: pathname, lastReadTime: time.Now(), lines: lines, stopChan: make(chan struct{})}
if err := fs.stream(ctx, wg, waker, fi, streamFromStart); err != nil {
return nil, err
}
return fs, nil
}
func (fs *fileStream) LastReadTime() time.Time {
fs.mu.RLock()
defer fs.mu.RUnlock()
return fs.lastReadTime
}
func (fs *fileStream) stream(ctx context.Context, wg *sync.WaitGroup, waker waker.Waker, fi os.FileInfo, streamFromStart bool) error {
fd, err := os.OpenFile(fs.pathname, os.O_RDONLY, 0o600)
if err != nil {
logErrors.Add(fs.pathname, 1)
return err
}
logOpens.Add(fs.pathname, 1)
// glog.V(2).Infof("%v: opened new file", fd)
if !streamFromStart {
if _, err := fd.Seek(0, io.SeekEnd); err != nil {
logErrors.Add(fs.pathname, 1)
if err := fd.Close(); err != nil {
logErrors.Add(fs.pathname, 1)
log.Println(err)
}
return err
}
// glog.V(2).Infof("%v: seeked to end", fd)
}
b := make([]byte, defaultReadBufferSize)
var lastBytes []byte
partial := bytes.NewBufferString("")
started := make(chan struct{})
var total int
wg.Add(1)
go func() {
defer wg.Done()
defer func() {
// glog.V(2).Infof("%v: read total %d bytes from %s", fd, total, fs.pathname)
// glog.V(2).Infof("%v: closing file descriptor", fd)
if err := fd.Close(); err != nil {
logErrors.Add(fs.pathname, 1)
log.Println(err)
}
logCloses.Add(fs.pathname, 1)
}()
close(started)
for {
// Blocking read but regular files will return EOF straight away.
count, err := fd.Read(b)
// glog.V(2).Infof("%v: read %d bytes, err is %v", fd, count, err)
if count > 0 {
total += count
// glog.V(2).Infof("%v: decode and send", fd)
needSend := lastBytes
needSend = append(needSend, b[:count]...)
sendCount := decodeAndSend(ctx, fs.lines, fs.pathname, len(needSend), needSend, partial)
if sendCount < len(needSend) {
lastBytes = append([]byte{}, needSend[sendCount:]...)
} else {
lastBytes = []byte{}
}
fs.mu.Lock()
fs.lastReadTime = time.Now()
fs.mu.Unlock()
}
if err != nil && err != io.EOF {
logErrors.Add(fs.pathname, 1)
// TODO: This could be generalised to check for any retryable
// errors, and end on unretriables; e.g. ESTALE looks
// retryable.
if errors.Is(err, syscall.ESTALE) {
log.Printf("%v: reopening stream due to %s", fd, err)
if nerr := fs.stream(ctx, wg, waker, fi, true); nerr != nil {
log.Println(nerr)
}
// Close this stream.
return
}
log.Println(err)
}
// If we have read no bytes and are at EOF, check for truncation and rotation.
if err == io.EOF && count == 0 {
// glog.V(2).Infof("%v: eof an no bytes", fd)
// Both rotation and truncation need to stat, so check for
// rotation first. It is assumed that rotation is the more
// common change pattern anyway.
newfi, serr := os.Stat(fs.pathname)
if serr != nil {
log.Println(serr)
// If this is a NotExist error, then we should wrap up this
// goroutine. The Tailer will create a new logstream if the
// file is in the middle of a rotation and gets recreated
// in the next moment. We can't rely on the Tailer to tell
// us we're deleted because the tailer can only tell us to
// Stop, which ends up causing us to race here against
// detection of IsCompleted.
if os.IsNotExist(serr) {
// glog.V(2).Infof("%v: source no longer exists, exiting", fd)
if partial.Len() > 0 {
sendLine(ctx, fs.pathname, partial, fs.lines)
}
fs.mu.Lock()
fs.completed = true
fs.mu.Unlock()
return
}
logErrors.Add(fs.pathname, 1)
goto Sleep
}
if !os.SameFile(fi, newfi) {
// glog.V(2).Infof("%v: adding a new file routine", fd)
if err := fs.stream(ctx, wg, waker, newfi, true); err != nil {
log.Println(err)
}
// We're at EOF so there's nothing left to read here.
return
}
currentOffset, serr := fd.Seek(0, io.SeekCurrent)
if serr != nil {
logErrors.Add(fs.pathname, 1)
log.Println(serr)
continue
}
// glog.V(2).Infof("%v: current seek is %d", fd, currentOffset)
// glog.V(2).Infof("%v: new size is %d", fd, newfi.Size())
// We know that newfi is from the current file. Truncation can
// only be detected if the new file is currently shorter than
// the current seek offset. In test this can be a race, but in
// production it's unlikely that a new file writes more bytes
// than the previous after rotation in the time it takes for
// mtail to notice.
if newfi.Size() < currentOffset {
// glog.V(2).Infof("%v: truncate? currentoffset is %d and size is %d", fd, currentOffset, newfi.Size())
// About to lose all remaining data because of the truncate so flush the accumulator.
if partial.Len() > 0 {
sendLine(ctx, fs.pathname, partial, fs.lines)
}
_, serr := fd.Seek(0, io.SeekStart)
if serr != nil {
logErrors.Add(fs.pathname, 1)
log.Println(serr)
}
// glog.V(2).Infof("%v: Seeked to %d", fd, p)
fileTruncates.Add(fs.pathname, 1)
continue
}
}
// No error implies there is more to read in this file so go
// straight back to read unless it looks like context is Done.
if err == nil && ctx.Err() == nil {
continue
}
Sleep:
// If we get here it's because we've stalled. First test to see if it's
// time to exit.
if err == io.EOF || ctx.Err() != nil {
select {
case <-fs.stopChan:
// glog.V(2).Infof("%v: stream has been stopped, exiting", fd)
if partial.Len() > 0 {
sendLine(ctx, fs.pathname, partial, fs.lines)
}
fs.mu.Lock()
fs.completed = true
fs.mu.Unlock()
return
case <-ctx.Done():
// glog.V(2).Infof("%v: stream has been cancelled, exiting", fd)
if partial.Len() > 0 {
sendLine(ctx, fs.pathname, partial, fs.lines)
}
fs.mu.Lock()
fs.completed = true
fs.mu.Unlock()
return
default:
// keep going
}
}
// Don't exit, instead yield and wait for a termination signal or
// wakeup.
// glog.V(2).Infof("%v: waiting", fd)
select {
case <-fs.stopChan:
// We may have started waiting here when the stop signal
// arrives, but since that wait the file may have been
// written to. The file is not technically yet at EOF so
// we need to go back and try one more read. We'll exit
// the stream in the select stanza above.
// glog.V(2).Infof("%v: Stopping after next read", fd)
case <-ctx.Done():
// Same for cancellation; this makes tests stable, but
// could argue exiting immediately is less surprising.
// Assumption is that this doesn't make a difference in
// production.
// glog.V(2).Infof("%v: Cancelled after next read", fd)
case <-waker.Wake():
// sleep until next Wake()
// glog.V(2).Infof("%v: Wake received", fd)
}
}
}()
<-started
return nil
}
func (fs *fileStream) IsComplete() bool {
fs.mu.RLock()
defer fs.mu.RUnlock()
return fs.completed
}
// Stop implements the LogStream interface.
func (fs *fileStream) Stop() {
fs.stopOnce.Do(func() {
log.Println("signalling stop at next EOF")
close(fs.stopChan)
})
}