// Package envparse is a minimal environment variable parser. It handles empty // lines, comments, single quotes, double quotes, and JSON escape sequences. // // Non-empty or comment lines should be of the form: // // KEY=value // // While extraneous characters are discouraged, an "export" prefix, preceding // whitespace, and trailing whitespace are all removed: // // KEY = This is ok! # Parses to {"KEY": "This is ok!"} // KEY2= Also ok. # Parses to {"KEY2": "Also ok."} // export FOO=bar # Parses to {"FOO": "bar"} package envparse import ( "bufio" "bytes" "fmt" "io" "unicode/utf16" "unicode/utf8" ) var ( ErrMissingSeparator = fmt.Errorf("missing =") ErrEmptyKey = fmt.Errorf("empty key") ErrUnmatchedDouble = fmt.Errorf(`unmatched "`) ErrUnmatchedSingle = fmt.Errorf("unmatched '") ErrIncompleteEscape = fmt.Errorf("incomplete escape sequence") ErrIncompleteHex = fmt.Errorf("incomplete hex sequence") ErrIncompleteSur = fmt.Errorf("incomplete Unicode surrogate pair") ErrMultibyteEscape = fmt.Errorf("multibyte characters disallowed in escape sequences") ) // ParseError is returned whenever the Parse function encounters an error. It // includes the line number and underlying error. type ParseError struct { Line int Err error } func (e *ParseError) Error() string { if e.Line > 0 { return fmt.Sprintf("error on line %d: %v", e.Line, e.Err) } return fmt.Sprintf("error reading: %v", e.Err) } func parseError(line int, err error) error { return &ParseError{ Line: line, Err: err, } } // Parse environment variables from an io.Reader into a map or return a // ParseError. func Parse(r io.Reader) (map[string]string, error) { env := make(map[string]string) scanner := bufio.NewScanner(r) // Track line number i := 0 // Main scan loop for scanner.Scan() { i++ k, v, err := parseLine(scanner.Bytes()) if err != nil { return nil, parseError(i, err) } // Skip blank lines if len(k) > 0 { env[string(k)] = string(v) } } if err := scanner.Err(); err != nil { return nil, parseError(i, err) } return env, nil } const ( normalMode = iota doubleQuote = iota singleQuote = iota escapeMode = iota unicodeMode = iota ) var ( empty = []byte{} separator = []byte{'='} exportPrefix = []byte("export ") ) // parseLine parses the given line into a key and value or error. // // Empty lines are returned as zero length slices func parseLine(ln []byte) ([]byte, []byte, error) { ln = bytes.TrimSpace(ln) if len(ln) == 0 || ln[0] == '#' { return empty, empty, nil } parts := bytes.SplitN(ln, separator, 2) if len(parts) != 2 { return nil, nil, ErrMissingSeparator } // Trim whitespace key, value := bytes.TrimSpace(parts[0]), bytes.TrimSpace(parts[1]) // Ensure key is of the form [A-Za-z][A-Za-z0-9_]? with an optional // leading 'export ', but only trim leading export if there's another // key name. if len(key) > len(exportPrefix) { key = bytes.TrimPrefix(key, exportPrefix) } if len(key) == 0 { return nil, nil, ErrEmptyKey } if key[0] < 'A' { return nil, nil, fmt.Errorf("key must start with [A-Za-z_] but found %q", key[0]) } if key[0] > 'Z' && key[0] < 'a' && key[0] != '_' { return nil, nil, fmt.Errorf("key must start with [A-Za-z_] but found %q", key[0]) } if key[0] > 'z' { return nil, nil, fmt.Errorf("key must start with [A-Za-z_] but found %q", key[0]) } for _, v := range key[1:] { switch { case v == '_': case v == '.': case v >= 'A' && v <= 'Z': case v >= 'a' && v <= 'z': case v >= '0' && v <= '9': default: return nil, nil, fmt.Errorf("key characters must be [A-Za-z0-9_.] but found %q", v) } } // Evaluate the value if len(value) == 0 { // Empty values are ok! Shortcircuit return key, value, nil } // Scratch buffer for unescaped value newv := make([]byte, len(value)) newi := 0 // Track last significant character for trimming unquoted whitespace preceding a trailing comment lastSig := 0 // Parser State mode := normalMode for i := 0; i < len(value); i++ { v := value[i] // Control characters are always an error if v < 32 { return nil, nil, fmt.Errorf("0x%0.2x is an invalid value character", v) } // High bit set means it is part of a multibyte character, pass // it through as only ASCII characters have special meaning. if v > 127 { if mode == escapeMode { return nil, nil, ErrMultibyteEscape } // All multibyte characters are significant lastSig = newi newv[newi] = v newi++ continue } switch mode { case normalMode: switch v { case '"': mode = doubleQuote case '\'': mode = singleQuote case '#': // Start of a comment, nothing left to parse return key, newv[:lastSig], nil case ' ', '\t': // Make sure whitespace doesn't get tracked newv[newi] = v newi++ default: // Add the character to the new value newv[newi] = v newi++ // Track last non-WS char for trimming on trailing comments lastSig = newi } case doubleQuote: switch v { case '"': mode = normalMode case '\\': mode = escapeMode default: // Add the character to the new value newv[newi] = v newi++ // All quoted characters are significant lastSig = newi } case escapeMode: // We're in double quotes and the last character was a backslash switch v { case '"': newv[newi] = v case '\\': newv[newi] = v case '/': newv[newi] = v case 'b': newv[newi] = '\b' case 'f': newv[newi] = '\f' case 'r': newv[newi] = '\r' case 'n': newv[newi] = '\n' case 't': newv[newi] = '\t' case 'u': // Parse-ahead to capture unicode r, err := h2r(value[i+1:]) if err != nil { return nil, nil, err } // Bump index by width of hex chars i += 4 // Check if we need to get another rune if utf16.IsSurrogate(r) { if len(value) < i+6 { //TODO Use replacement character instead? return nil, nil, ErrIncompleteSur } if value[i+1] != '\\' || value[i+2] != 'u' { //TODO Use replacement character instead? return nil, nil, ErrIncompleteSur } r2, err := h2r(value[i+3:]) if err != nil { return nil, nil, err } // Bump index by width of \uXXXX i += 6 r = utf16.DecodeRune(r, r2) } n := utf8.EncodeRune(newv[newi:], r) newi += n - 1 // because it's incremented outside the switch default: return nil, nil, fmt.Errorf("invalid escape sequence: %q", string(v)) } // Add the character to the new value newi++ // All escaped characters are significant lastSig = newi // Switch back to quote mode mode = doubleQuote case singleQuote: switch v { case '\'': mode = normalMode default: // Add all other characters to the new value newv[newi] = v newi++ // All single quoted characters are significant lastSig = newi } default: panic(fmt.Errorf("BUG: invalid mode: %v", mode)) } } switch mode { case normalMode: // All escape sequences are complete and all quotes are matched return key, newv[:newi], nil case doubleQuote: return nil, nil, ErrUnmatchedDouble case singleQuote: return nil, nil, ErrUnmatchedSingle case escapeMode: return nil, nil, ErrIncompleteEscape default: panic(fmt.Errorf("BUG: invalid mode: %v", mode)) } } // convert hex characters into a rune func h2r(buf []byte) (rune, error) { if len(buf) < 4 { return 0, ErrIncompleteHex } var r rune for i := 0; i < 4; i++ { d := buf[i] switch { case '0' <= d && d <= '9': d = d - '0' case 'a' <= d && d <= 'f': d = d - 'a' + 10 case 'A' <= d && d <= 'F': d = d - 'A' + 10 default: return 0, fmt.Errorf("invalid hex character: %q", string(d)) } r *= 16 r += rune(d) } return r, nil }