categraf/inputs/mtail/internal/runtime/compiler/codegen/codegen.go

// Copyright 2016 Google Inc. All Rights Reserved.
// This file is available under the Apache license.

package codegen

import (
	"fmt"
	"log"
	"math"
	"regexp"
	"time"

	"flashcat.cloud/categraf/inputs/mtail/internal/metrics"
	"flashcat.cloud/categraf/inputs/mtail/internal/metrics/datum"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/code"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/ast"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/errors"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/parser"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/position"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/symbol"
	"flashcat.cloud/categraf/inputs/mtail/internal/runtime/compiler/types"
)

// codegen represents a code generator.
type codegen struct {
	name string // Name of the program.

	errors errors.ErrorList // Any compile errors detected are accumulated here.
	obj    code.Object      // The object to return, if successful.

	l     []int           // Label table for recording jump destinations.
	decos []*ast.DecoStmt // Decorator stack to unwind when entering decorated blocks.
}

// CodeGen is the function that compiles the program to bytecode and data.
func CodeGen(name string, n ast.Node) (*code.Object, error) {
	c := &codegen{name: name}
	_ = ast.Walk(c, n)
	c.writeJumps()
	if len(c.errors) > 0 {
		return nil, c.errors
	}
	return &c.obj, nil
}

func (c *codegen) errorf(pos *position.Position, format string, args ...interface{}) {
	e := "Internal compiler error, aborting compilation: " + fmt.Sprintf(format, args...)
	c.errors.Add(pos, e)
}

func (c *codegen) emit(n ast.Node, opcode code.Opcode, operand interface{}) {
	// glog.V(2).Infof("emitting `%s %v' from line %d node %#v\n", opcode, operand, n.Pos().Line, n)
	c.obj.Program = append(c.obj.Program, code.Instr{opcode, operand, n.Pos().Line})
}

// newLabel creates a new label to jump to.
func (c *codegen) newLabel() (l int) {
	l = len(c.l)
	c.l = append(c.l, -1)
	return
}

// setLabel points a label to the next instruction.
func (c *codegen) setLabel(l int) {
	c.l[l] = c.pc() + 1
}

// pc returns the program offset of the last instruction.
func (c *codegen) pc() int {
	return len(c.obj.Program) - 1
}

func (c *codegen) VisitBefore(node ast.Node) (ast.Visitor, ast.Node) {
	switch n := node.(type) {

	case *ast.VarDecl:
		var name string
		if n.ExportedName != "" {
			name = n.ExportedName
		} else {
			name = n.Name
		}
		// If the Type is not in the map, then default to metrics.Int.  This is
		// a hack for metrics that no type can be inferred, retaining
		// historical behaviour.
		t := n.Type()
		if types.IsDimension(t) {
			t = t.(*types.Operator).Args[len(t.(*types.Operator).Args)-1]
		}
		var dtyp metrics.Type
		switch {
		case types.Equals(types.Float, t):
			dtyp = metrics.Float
		case types.Equals(types.String, t):
			dtyp = metrics.String
		case types.Equals(types.Buckets, t):
			dtyp = metrics.Buckets
		default:
			if !types.IsComplete(t) {
				log.Printf("Incomplete type %v for %#v", t, n)
			}
			dtyp = metrics.Int
		}
		m := metrics.NewMetric(name, c.name, n.Kind, dtyp, n.Keys...)
		m.SetSource(n.Pos().String())
		// Scalar counters can be initialized to zero.  Dimensioned counters we
		// don't know the values of the labels yet.  Gauges and Timers we can't
		// assume start at zero.
		if len(n.Keys) == 0 && n.Kind == metrics.Counter {
			// Calling GetDatum here causes the storage to be allocated.
			d, err := m.GetDatum()
			if err != nil {
				c.errorf(n.Pos(), "%s", err)
				return nil, n
			}
			// Initialize to zero at the zero time.
			switch dtyp {
			case metrics.Int:
				datum.SetInt(d, 0, time.Unix(0, 0))
			case metrics.Float:
				datum.SetFloat(d, 0, time.Unix(0, 0))
			default:
				c.errorf(n.Pos(), "Can't initialize to zero a %#v", n)
				return nil, n
			}
		}

		if n.Kind == metrics.Histogram {
			if len(n.Buckets) < 2 {
				c.errorf(n.Pos(), "a histogram need at least two boundaries")
				return nil, n
			}
			if n.Buckets[0] >= n.Buckets[1] {
				c.errorf(n.Pos(), "buckets boundaries must be sorted")
				return nil, n
			}

			if n.Buckets[0] > 0 {
				m.Buckets = append(m.Buckets, datum.Range{0, n.Buckets[0]})
			}
			m.Buckets = append(m.Buckets, datum.Range{n.Buckets[0], n.Buckets[1]})
			min := n.Buckets[1]
			for _, max := range n.Buckets[2:] {
				if max <= min {
					c.errorf(n.Pos(), "buckets boundaries must be sorted")
					return nil, n
				}
				m.Buckets = append(m.Buckets, datum.Range{min, max})
				min = max
			}
			m.Buckets = append(m.Buckets, datum.Range{min, math.Inf(+1)})

			if len(n.Keys) == 0 {
				// Calling GetDatum here causes the storage to be allocated.
				_, err := m.GetDatum()
				if err != nil {
					c.errorf(n.Pos(), "%s", err)
					return nil, n
				}
			}
		}

		m.Hidden = n.Hidden
		// int is int64 only on 64bit platforms.  To be fair MaxInt is a
		// ridiculously excessive size for this anyway, you're going to use 2GiB
		// x sizeof(datum) in a single metric.
		if n.Limit > math.MaxInt {
			c.errorf(n.Pos(), "limit %d too large; max %d", n.Limit, math.MaxInt)
			return nil, n
		}
		m.Limit = int(n.Limit)

		n.Symbol.Binding = m
		n.Symbol.Addr = len(c.obj.Metrics)
		c.obj.Metrics = append(c.obj.Metrics, m)
		return nil, n

	case *ast.CondStmt:
		lElse := c.newLabel()
		lEnd := c.newLabel()
		if n.Cond != nil {
			n.Cond = ast.Walk(c, n.Cond)
			c.emit(n, code.Jnm, lElse)
		}
		// Set matched flag false for children.
		c.emit(n, code.Setmatched, false)
		n.Truth = ast.Walk(c, n.Truth)
		// Re-set matched flag to true for rest of current block.
		c.emit(n, code.Setmatched, true)
		if n.Else != nil {
			c.emit(n, code.Jmp, lEnd)
		}
		c.setLabel(lElse)
		if n.Else != nil {
			n.Else = ast.Walk(c, n.Else)
		}
		c.setLabel(lEnd)
		return nil, n

	case *ast.PatternExpr:
		re, err := regexp.Compile(n.Pattern)
		if err != nil {
			c.errorf(n.Pos(), "%s", err)
			return nil, n
		}
		c.obj.Regexps = append(c.obj.Regexps, re)
		// Store the location of this regular expression in the PatternExpr
		n.Index = len(c.obj.Regexps) - 1
		return nil, n

	case *ast.PatternFragment:
		// Skip, const pattern fragments are concatenated into PatternExpr storage, not executable.
		return nil, n

	case *ast.StringLit:
		c.obj.Strings = append(c.obj.Strings, n.Text)
		c.emit(n, code.Str, len(c.obj.Strings)-1)

	case *ast.IntLit:
		c.emit(n, code.Push, n.I)

	case *ast.FloatLit:
		c.emit(n, code.Push, n.F)

	case *ast.StopStmt:
		c.emit(n, code.Stop, nil)

	case *ast.IDTerm:
		if n.Symbol == nil || n.Symbol.Kind != symbol.VarSymbol {
			break
		}
		if n.Symbol.Binding == nil {
			c.errorf(n.Pos(), "No metric bound to identifier %q", n.Name)
			return nil, n
		}
		c.emit(n, code.Mload, n.Symbol.Addr)
		m := n.Symbol.Binding.(*metrics.Metric)
		c.emit(n, code.Dload, len(m.Keys))

		if !n.Lvalue {
			t := n.Type()
			if types.IsDimension(t) {
				l := len(t.(*types.Operator).Args)
				t = t.(*types.Operator).Args[l-1]
			}

			switch {
			case types.Equals(t, types.Float):
				c.emit(n, code.Fget, nil)
			case types.Equals(t, types.Int):
				c.emit(n, code.Iget, nil)
			case types.Equals(t, types.String):
				c.emit(n, code.Sget, nil)
			default:
				c.errorf(n.Pos(), "invalid type for get %q in %#v", n.Type(), n)
				return nil, n
			}
		}

	case *ast.CaprefTerm:
		if n.Symbol == nil || n.Symbol.Binding == nil {
			c.errorf(n.Pos(), "No regular expression bound to capref %q", n.Name)
			return nil, n
		}
		rn := n.Symbol.Binding.(*ast.PatternExpr)
		// rn.index contains the index of the compiled regular expression object
		// in the re slice of the object code
		c.emit(n, code.Push, rn.Index)
		// n.Symbol.Addr is the capture group offset
		c.emit(n, code.Capref, n.Symbol.Addr)
		if types.Equals(n.Type(), types.Float) {
			c.emit(n, code.S2f, nil)
		} else if types.Equals(n.Type(), types.Int) {
			c.emit(n, code.S2i, nil)
		}

	case *ast.IndexedExpr:
		if args, ok := n.Index.(*ast.ExprList); ok {
			for _, arg := range args.Children {
				_ = ast.Walk(c, arg)
				if types.Equals(arg.Type(), types.Float) {
					c.emit(n, code.F2s, nil)
				} else if types.Equals(arg.Type(), types.Int) {
					c.emit(n, code.I2s, nil)
				}
			}
		}
		ast.Walk(c, n.LHS)
		return nil, n

	case *ast.DecoDecl:
		// Do nothing, defs are inlined.
		return nil, n

	case *ast.DecoStmt:
		// Put the current block on the stack
		decoLen := len(c.decos)
		c.decos = append(c.decos, n)
		if n.Decl == nil {
			c.errorf(n.Pos(), "No definition found for decorator %q", n.Name)
			return nil, n
		}
		// then iterate over the decorator's nodes
		ast.Walk(c, n.Decl.Block)
		if len(c.decos) > decoLen {
			// glog.V(1).Info("Too many blocks on stack, was there no `next' in the last one?")
		}
		return nil, n

	case *ast.NextStmt:
		// Visit the 'next' block on the decorated block stack
		top := len(c.decos) - 1
		deco := c.decos[top]
		c.decos = c.decos[:top]
		ast.Walk(c, deco.Block)
		return nil, n

	case *ast.OtherwiseStmt:
		c.emit(n, code.Otherwise, nil)

	case *ast.DelStmt:
		if n.Expiry > 0 {
			c.emit(n, code.Push, n.Expiry)
		}
		ast.Walk(c, n.N)
		// overwrite the dload instruction
		pc := c.pc()
		c.obj.Program[pc].Opcode = code.Del
		if n.Expiry > 0 {
			c.obj.Program[pc].Opcode = code.Expire
		}

	case *ast.BinaryExpr:
		switch n.Op {
		case parser.AND:
			lFalse := c.newLabel()
			lEnd := c.newLabel()
			ast.Walk(c, n.LHS)
			c.emit(n, code.Jnm, lFalse)
			ast.Walk(c, n.RHS)
			c.emit(n, code.Jnm, lFalse)
			c.emit(n, code.Push, true)
			c.emit(n, code.Jmp, lEnd)
			c.setLabel(lFalse)
			c.emit(n, code.Push, false)
			c.setLabel(lEnd)
			return nil, n

		case parser.OR:
			lTrue := c.newLabel()
			lEnd := c.newLabel()
			ast.Walk(c, n.LHS)
			c.emit(n, code.Jm, lTrue)
			ast.Walk(c, n.RHS)
			c.emit(n, code.Jm, lTrue)
			c.emit(n, code.Push, false)
			c.emit(n, code.Jmp, lEnd)
			c.setLabel(lTrue)
			c.emit(n, code.Push, true)
			c.setLabel(lEnd)
			return nil, n

		case parser.ADD_ASSIGN:
			if !types.Equals(n.Type(), types.Int) {
				// Double-emit the lhs so that it can be assigned to
				ast.Walk(c, n.LHS)
			}

		default:
			// Didn't handle it, let normal walk proceed
			return c, n
		}
	}

	return c, node
}

var typedOperators = map[int]map[types.Type]code.Opcode{
	parser.PLUS: {
		types.Int:     code.Iadd,
		types.Float:   code.Fadd,
		types.String:  code.Cat,
		types.Pattern: code.Cat,
	},
	parser.MINUS: {
		types.Int:   code.Isub,
		types.Float: code.Fsub,
	},
	parser.MUL: {
		types.Int:   code.Imul,
		types.Float: code.Fmul,
	},
	parser.DIV: {
		types.Int:   code.Idiv,
		types.Float: code.Fdiv,
	},
	parser.MOD: {
		types.Int:   code.Imod,
		types.Float: code.Fmod,
	},
	parser.POW: {
		types.Int:   code.Ipow,
		types.Float: code.Fpow,
	},
	parser.ASSIGN: {
		types.Int:    code.Iset,
		types.Float:  code.Fset,
		types.String: code.Sset,
	},
}

func getOpcodeForType(op int, opT types.Type) (code.Opcode, error) {
	opmap, ok := typedOperators[op]
	if !ok {
		return -1, errors.Errorf("no typed operator for type %v", op)
	}
	for t, opcode := range opmap {
		if types.Equals(t, opT) {
			return opcode, nil
		}
	}
	return -1, errors.Errorf("no opcode for type %s in op %v", opT, op)
}

var builtin = map[string]code.Opcode{
	"getfilename": code.Getfilename,
	"len":         code.Length,
	"settime":     code.Settime,
	"strptime":    code.Strptime,
	"strtol":      code.S2i,
	"subst":       code.Subst,
	"timestamp":   code.Timestamp,
	"tolower":     code.Tolower,
}

func (c *codegen) VisitAfter(node ast.Node) ast.Node {
	switch n := node.(type) {
	case *ast.BuiltinExpr:
		arglen := 0
		if n.Args != nil {
			arglen = len(n.Args.(*ast.ExprList).Children)
		}
		switch n.Name {
		case "bool":
		// TODO(jaq): Nothing, no support in VM yet.

		case "int", "float", "string":
			// len args should be 1
			if arglen > 1 {
				c.errorf(n.Pos(), "too many arguments to builtin %q: %#v", n.Name, n)
				return n
			}
			if err := c.emitConversion(n, n.Args.(*ast.ExprList).Children[0].Type(), n.Type()); err != nil {
				c.errorf(n.Pos(), "%s on node %v", err.Error(), n)
				return n
			}
		case "subst":
			if types.Equals(n.Args.(*ast.ExprList).Children[0].Type(), types.Pattern) {
				index := n.Args.(*ast.ExprList).Children[0].(*ast.PatternExpr).Index
				c.emit(n, code.Push, index)
				c.emit(n, code.Rsubst, arglen)
			} else {
				c.emit(n, code.Subst, arglen)
			}

		default:
			c.emit(n, builtin[n.Name], arglen)
		}
	case *ast.UnaryExpr:
		switch n.Op {
		case parser.INC:
			c.emit(n, code.Inc, nil)
		case parser.DEC:
			c.emit(n, code.Dec, nil)
		case parser.NOT:
			c.emit(n, code.Neg, nil)
		case parser.MATCH:
			index := n.Expr.(*ast.PatternExpr).Index
			c.emit(n, code.Match, index)
		}
	case *ast.BinaryExpr:
		switch n.Op {
		case parser.LT, parser.GT, parser.LE, parser.GE, parser.EQ, parser.NE:
			lFail := c.newLabel()
			lEnd := c.newLabel()
			var cmpArg int
			var jumpOp code.Opcode
			switch n.Op {
			case parser.LT:
				cmpArg = -1
				jumpOp = code.Jnm
			case parser.GT:
				cmpArg = 1
				jumpOp = code.Jnm
			case parser.LE:
				cmpArg = 1
				jumpOp = code.Jm
			case parser.GE:
				cmpArg = -1
				jumpOp = code.Jm
			case parser.EQ:
				cmpArg = 0
				jumpOp = code.Jnm
			case parser.NE:
				cmpArg = 0
				jumpOp = code.Jm
			}
			cmpOp := code.Cmp
			if types.Equals(n.LHS.Type(), n.RHS.Type()) {
				switch n.LHS.Type() {
				case types.Float:
					cmpOp = code.Fcmp
				case types.Int:
					cmpOp = code.Icmp
				case types.String:
					cmpOp = code.Scmp
				default:
					cmpOp = code.Cmp
				}
			}
			c.emit(n, cmpOp, cmpArg)
			c.emit(n, jumpOp, lFail)
			c.emit(n, code.Push, true)
			c.emit(n, code.Jmp, lEnd)
			c.setLabel(lFail)
			c.emit(n, code.Push, false)
			c.setLabel(lEnd)
		case parser.ADD_ASSIGN:
			// When operand is not nil, inc pops the delta from the stack.
			switch {
			case types.Equals(n.Type(), types.Int):
				c.emit(n, code.Inc, 0)
			case types.Equals(n.Type(), types.Float), types.Equals(n.Type(), types.String):
				// Already walked the lhs and rhs of this expression
				opcode, err := getOpcodeForType(parser.PLUS, n.Type())
				if err != nil {
					c.errorf(n.Pos(), "%s", err)
					return n
				}
				c.emit(n, opcode, nil)
				// And a second lhs
				opcode, err = getOpcodeForType(parser.ASSIGN, n.Type())
				if err != nil {
					c.errorf(n.Pos(), "%s", err)
					return n
				}
				c.emit(n, opcode, nil)
			default:
				c.errorf(n.Pos(), "invalid type for add-assignment: %v", n.Type())
				return n
			}
		case parser.PLUS, parser.MINUS, parser.MUL, parser.DIV, parser.MOD, parser.POW, parser.ASSIGN:
			opcode, err := getOpcodeForType(n.Op, n.Type())
			if err != nil {
				c.errorf(n.Pos(), "%s", err)
				return n
			}
			c.emit(n, opcode, nil)
		case parser.BITAND:
			c.emit(n, code.And, nil)
		case parser.BITOR:
			c.emit(n, code.Or, nil)
		case parser.XOR:
			c.emit(n, code.Xor, nil)
		case parser.SHL:
			c.emit(n, code.Shl, nil)
		case parser.SHR:
			c.emit(n, code.Shr, nil)

		case parser.MATCH, parser.NOT_MATCH:
			switch v := n.RHS.(type) {
			case *ast.PatternExpr:
				index := v.Index
				c.emit(n, code.Smatch, index)
			default:
				c.errorf(n.Pos(), "unexpected rhs expression for match %#v", n.RHS)
				return n
			}

			if n.Op == parser.NOT_MATCH {
				c.emit(n, code.Not, nil)
			}

		default:
			c.errorf(n.Pos(), "unexpected op %v", n.Op)
		}

	case *ast.ConvExpr:
		if err := c.emitConversion(n, n.N.Type(), n.Type()); err != nil {
			c.errorf(n.Pos(), "internal error: %s on node %v", err.Error(), n)
			return n
		}
	}
	return node
}

func (c *codegen) emitConversion(n ast.Node, inType, outType types.Type) error {
	// glog.V(2).Infof("Conversion: %q to %q", inType, outType)
	switch {
	case types.Equals(types.Int, inType) && types.Equals(types.Float, outType):
		c.emit(n, code.I2f, nil)
	case types.Equals(types.String, inType) && types.Equals(types.Float, outType):
		c.emit(n, code.S2f, nil)
	case types.Equals(types.String, inType) && types.Equals(types.Int, outType):
		c.emit(n, code.S2i, nil)
	case types.Equals(types.Float, inType) && types.Equals(types.String, outType):
		c.emit(n, code.F2s, nil)
	case types.Equals(types.Int, inType) && types.Equals(types.String, outType):
		c.emit(n, code.I2s, nil)
	case types.Equals(types.Pattern, inType) && types.Equals(types.Bool, outType):
		// nothing, pattern is implicit bool
	case types.Equals(inType, outType):
		// Nothing; no-op.
	default:
		return errors.Errorf("can't convert %q to %q", inType, outType)
	}
	return nil
}

func (c *codegen) writeJumps() {
	for j, i := range c.obj.Program {
		switch i.Opcode {
		case code.Jmp, code.Jm, code.Jnm:
			index := i.Operand.(int)
			if index > len(c.l) {
				c.errorf(nil, "no jump at label %v, table is %v", i.Operand, c.l)
				continue
			}
			offset := c.l[index]
			if offset < 0 {
				c.errorf(nil, "offset for label %v is negative, table is %v", i.Operand, c.l)
				continue
			}
			c.obj.Program[j].Operand = c.l[index]
		}
	}
}