From e84ebcc494eec511e43b21b71137521434d99ca8 Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Fri, 16 Mar 2018 12:15:53 -0700 Subject: [PATCH] Added dockerfile/shell package to vendor --- Gopkg.lock | 9 +- .../dockerfile/shell/equal_env_unix.go | 9 + .../dockerfile/shell/equal_env_windows.go | 9 + .../docker/builder/dockerfile/shell/lex.go | 344 +++++++++++++++ vendor/github.com/google/shlex/COPYING | 202 --------- vendor/github.com/google/shlex/shlex.go | 417 ------------------ 6 files changed, 364 insertions(+), 626 deletions(-) create mode 100644 vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_unix.go create mode 100644 vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_windows.go create mode 100644 vendor/github.com/docker/docker/builder/dockerfile/shell/lex.go delete mode 100644 vendor/github.com/google/shlex/COPYING delete mode 100644 vendor/github.com/google/shlex/shlex.go diff --git a/Gopkg.lock b/Gopkg.lock index 82b8ecd94..d6be583a7 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -161,6 +161,7 @@ "builder/dockerfile/command", "builder/dockerfile/instructions", "builder/dockerfile/parser", + "builder/dockerfile/shell", "client", "pkg/homedir", "pkg/idtools", @@ -225,12 +226,6 @@ revision = "925541529c1fa6821df4e44ce2723319eb2be768" version = "v1.0.0" -[[projects]] - branch = "master" - name = "github.com/google/shlex" - packages = ["."] - revision = "6f45313302b9c56850fc17f99e40caebce98c716" - [[projects]] name = "github.com/gorilla/context" packages = ["."] @@ -492,6 +487,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "0a35c48c8d6982331bd68de44349b310410a7cf987465b3429ddb11b67854de7" + inputs-digest = "eadec1feacc8473e54622d5f3a25fbc9c7fb1f9bd38776475c3e2d283bd80d2a" solver-name = "gps-cdcl" solver-version = 1 diff --git a/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_unix.go b/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_unix.go new file mode 100644 index 000000000..ac97a1e16 --- /dev/null +++ b/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_unix.go @@ -0,0 +1,9 @@ +// +build !windows + +package shell // import "github.com/docker/docker/builder/dockerfile/shell" + +// EqualEnvKeys compare two strings and returns true if they are equal. On +// Windows this comparison is case insensitive. +func EqualEnvKeys(from, to string) bool { + return from == to +} diff --git a/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_windows.go b/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_windows.go new file mode 100644 index 000000000..4baa771bd --- /dev/null +++ b/vendor/github.com/docker/docker/builder/dockerfile/shell/equal_env_windows.go @@ -0,0 +1,9 @@ +package shell // import "github.com/docker/docker/builder/dockerfile/shell" + +import "strings" + +// EqualEnvKeys compare two strings and returns true if they are equal. On +// Windows this comparison is case insensitive. +func EqualEnvKeys(from, to string) bool { + return strings.ToUpper(from) == strings.ToUpper(to) +} diff --git a/vendor/github.com/docker/docker/builder/dockerfile/shell/lex.go b/vendor/github.com/docker/docker/builder/dockerfile/shell/lex.go new file mode 100644 index 000000000..bd3fac525 --- /dev/null +++ b/vendor/github.com/docker/docker/builder/dockerfile/shell/lex.go @@ -0,0 +1,344 @@ +package shell // import "github.com/docker/docker/builder/dockerfile/shell" + +import ( + "bytes" + "strings" + "text/scanner" + "unicode" + + "github.com/pkg/errors" +) + +// Lex performs shell word splitting and variable expansion. +// +// Lex takes a string and an array of env variables and +// process all quotes (" and ') as well as $xxx and ${xxx} env variable +// tokens. Tries to mimic bash shell process. +// It doesn't support all flavors of ${xx:...} formats but new ones can +// be added by adding code to the "special ${} format processing" section +type Lex struct { + escapeToken rune +} + +// NewLex creates a new Lex which uses escapeToken to escape quotes. +func NewLex(escapeToken rune) *Lex { + return &Lex{escapeToken: escapeToken} +} + +// ProcessWord will use the 'env' list of environment variables, +// and replace any env var references in 'word'. +func (s *Lex) ProcessWord(word string, env []string) (string, error) { + word, _, err := s.process(word, env) + return word, err +} + +// ProcessWords will use the 'env' list of environment variables, +// and replace any env var references in 'word' then it will also +// return a slice of strings which represents the 'word' +// split up based on spaces - taking into account quotes. Note that +// this splitting is done **after** the env var substitutions are done. +// Note, each one is trimmed to remove leading and trailing spaces (unless +// they are quoted", but ProcessWord retains spaces between words. +func (s *Lex) ProcessWords(word string, env []string) ([]string, error) { + _, words, err := s.process(word, env) + return words, err +} + +func (s *Lex) process(word string, env []string) (string, []string, error) { + sw := &shellWord{ + envs: env, + escapeToken: s.escapeToken, + } + sw.scanner.Init(strings.NewReader(word)) + return sw.process(word) +} + +type shellWord struct { + scanner scanner.Scanner + envs []string + escapeToken rune +} + +func (sw *shellWord) process(source string) (string, []string, error) { + word, words, err := sw.processStopOn(scanner.EOF) + if err != nil { + err = errors.Wrapf(err, "failed to process %q", source) + } + return word, words, err +} + +type wordsStruct struct { + word string + words []string + inWord bool +} + +func (w *wordsStruct) addChar(ch rune) { + if unicode.IsSpace(ch) && w.inWord { + if len(w.word) != 0 { + w.words = append(w.words, w.word) + w.word = "" + w.inWord = false + } + } else if !unicode.IsSpace(ch) { + w.addRawChar(ch) + } +} + +func (w *wordsStruct) addRawChar(ch rune) { + w.word += string(ch) + w.inWord = true +} + +func (w *wordsStruct) addString(str string) { + var scan scanner.Scanner + scan.Init(strings.NewReader(str)) + for scan.Peek() != scanner.EOF { + w.addChar(scan.Next()) + } +} + +func (w *wordsStruct) addRawString(str string) { + w.word += str + w.inWord = true +} + +func (w *wordsStruct) getWords() []string { + if len(w.word) > 0 { + w.words = append(w.words, w.word) + + // Just in case we're called again by mistake + w.word = "" + w.inWord = false + } + return w.words +} + +// Process the word, starting at 'pos', and stop when we get to the +// end of the word or the 'stopChar' character +func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) { + var result bytes.Buffer + var words wordsStruct + + var charFuncMapping = map[rune]func() (string, error){ + '\'': sw.processSingleQuote, + '"': sw.processDoubleQuote, + '$': sw.processDollar, + } + + for sw.scanner.Peek() != scanner.EOF { + ch := sw.scanner.Peek() + + if stopChar != scanner.EOF && ch == stopChar { + sw.scanner.Next() + break + } + if fn, ok := charFuncMapping[ch]; ok { + // Call special processing func for certain chars + tmp, err := fn() + if err != nil { + return "", []string{}, err + } + result.WriteString(tmp) + + if ch == rune('$') { + words.addString(tmp) + } else { + words.addRawString(tmp) + } + } else { + // Not special, just add it to the result + ch = sw.scanner.Next() + + if ch == sw.escapeToken { + // '\' (default escape token, but ` allowed) escapes, except end of line + ch = sw.scanner.Next() + + if ch == scanner.EOF { + break + } + + words.addRawChar(ch) + } else { + words.addChar(ch) + } + + result.WriteRune(ch) + } + } + + return result.String(), words.getWords(), nil +} + +func (sw *shellWord) processSingleQuote() (string, error) { + // All chars between single quotes are taken as-is + // Note, you can't escape ' + // + // From the "sh" man page: + // Single Quotes + // Enclosing characters in single quotes preserves the literal meaning of + // all the characters (except single quotes, making it impossible to put + // single-quotes in a single-quoted string). + + var result bytes.Buffer + + sw.scanner.Next() + + for { + ch := sw.scanner.Next() + switch ch { + case scanner.EOF: + return "", errors.New("unexpected end of statement while looking for matching single-quote") + case '\'': + return result.String(), nil + } + result.WriteRune(ch) + } +} + +func (sw *shellWord) processDoubleQuote() (string, error) { + // All chars up to the next " are taken as-is, even ', except any $ chars + // But you can escape " with a \ (or ` if escape token set accordingly) + // + // From the "sh" man page: + // Double Quotes + // Enclosing characters within double quotes preserves the literal meaning + // of all characters except dollarsign ($), backquote (`), and backslash + // (\). The backslash inside double quotes is historically weird, and + // serves to quote only the following characters: + // $ ` " \ . + // Otherwise it remains literal. + + var result bytes.Buffer + + sw.scanner.Next() + + for { + switch sw.scanner.Peek() { + case scanner.EOF: + return "", errors.New("unexpected end of statement while looking for matching double-quote") + case '"': + sw.scanner.Next() + return result.String(), nil + case '$': + value, err := sw.processDollar() + if err != nil { + return "", err + } + result.WriteString(value) + default: + ch := sw.scanner.Next() + if ch == sw.escapeToken { + switch sw.scanner.Peek() { + case scanner.EOF: + // Ignore \ at end of word + continue + case '"', '$', sw.escapeToken: + // These chars can be escaped, all other \'s are left as-is + // Note: for now don't do anything special with ` chars. + // Not sure what to do with them anyway since we're not going + // to execute the text in there (not now anyway). + ch = sw.scanner.Next() + } + } + result.WriteRune(ch) + } + } +} + +func (sw *shellWord) processDollar() (string, error) { + sw.scanner.Next() + + // $xxx case + if sw.scanner.Peek() != '{' { + name := sw.processName() + if name == "" { + return "$", nil + } + return sw.getEnv(name), nil + } + + sw.scanner.Next() + name := sw.processName() + ch := sw.scanner.Peek() + if ch == '}' { + // Normal ${xx} case + sw.scanner.Next() + return sw.getEnv(name), nil + } + if ch == ':' { + // Special ${xx:...} format processing + // Yes it allows for recursive $'s in the ... spot + + sw.scanner.Next() // skip over : + modifier := sw.scanner.Next() + + word, _, err := sw.processStopOn('}') + if err != nil { + return "", err + } + + // Grab the current value of the variable in question so we + // can use to to determine what to do based on the modifier + newValue := sw.getEnv(name) + + switch modifier { + case '+': + if newValue != "" { + newValue = word + } + return newValue, nil + + case '-': + if newValue == "" { + newValue = word + } + return newValue, nil + + default: + return "", errors.Errorf("unsupported modifier (%c) in substitution", modifier) + } + } + return "", errors.Errorf("missing ':' in substitution") +} + +func (sw *shellWord) processName() string { + // Read in a name (alphanumeric or _) + // If it starts with a numeric then just return $# + var name bytes.Buffer + + for sw.scanner.Peek() != scanner.EOF { + ch := sw.scanner.Peek() + if name.Len() == 0 && unicode.IsDigit(ch) { + ch = sw.scanner.Next() + return string(ch) + } + if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' { + break + } + ch = sw.scanner.Next() + name.WriteRune(ch) + } + + return name.String() +} + +func (sw *shellWord) getEnv(name string) string { + for _, env := range sw.envs { + i := strings.Index(env, "=") + if i < 0 { + if EqualEnvKeys(name, env) { + // Should probably never get here, but just in case treat + // it like "var" and "var=" are the same + return "" + } + continue + } + compareName := env[:i] + if !EqualEnvKeys(name, compareName) { + continue + } + return env[i+1:] + } + return "" +} diff --git a/vendor/github.com/google/shlex/COPYING b/vendor/github.com/google/shlex/COPYING deleted file mode 100644 index d64569567..000000000 --- a/vendor/github.com/google/shlex/COPYING +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/google/shlex/shlex.go b/vendor/github.com/google/shlex/shlex.go deleted file mode 100644 index 3cb37b7e4..000000000 --- a/vendor/github.com/google/shlex/shlex.go +++ /dev/null @@ -1,417 +0,0 @@ -/* -Copyright 2012 Google Inc. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -/* -Package shlex implements a simple lexer which splits input in to tokens using -shell-style rules for quoting and commenting. - -The basic use case uses the default ASCII lexer to split a string into sub-strings: - - shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"} - -To process a stream of strings: - - l := NewLexer(os.Stdin) - for ; token, err := l.Next(); err != nil { - // process token - } - -To access the raw token stream (which includes tokens for comments): - - t := NewTokenizer(os.Stdin) - for ; token, err := t.Next(); err != nil { - // process token - } - -*/ -package shlex - -import ( - "bufio" - "fmt" - "io" - "strings" -) - -// TokenType is a top-level token classification: A word, space, comment, unknown. -type TokenType int - -// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape. -type runeTokenClass int - -// the internal state used by the lexer state machine -type lexerState int - -// Token is a (type, value) pair representing a lexographical token. -type Token struct { - tokenType TokenType - value string -} - -// Equal reports whether tokens a, and b, are equal. -// Two tokens are equal if both their types and values are equal. A nil token can -// never be equal to another token. -func (a *Token) Equal(b *Token) bool { - if a == nil || b == nil { - return false - } - if a.tokenType != b.tokenType { - return false - } - return a.value == b.value -} - -// Named classes of UTF-8 runes -const ( - spaceRunes = " \t\r\n" - escapingQuoteRunes = `"` - nonEscapingQuoteRunes = "'" - escapeRunes = `\` - commentRunes = "#" -) - -// Classes of rune token -const ( - unknownRuneClass runeTokenClass = iota - spaceRuneClass - escapingQuoteRuneClass - nonEscapingQuoteRuneClass - escapeRuneClass - commentRuneClass - eofRuneClass -) - -// Classes of lexographic token -const ( - UnknownToken TokenType = iota - WordToken - SpaceToken - CommentToken -) - -// Lexer state machine states -const ( - startState lexerState = iota // no runes have been seen - inWordState // processing regular runes in a word - escapingState // we have just consumed an escape rune; the next rune is literal - escapingQuotedState // we have just consumed an escape rune within a quoted string - quotingEscapingState // we are within a quoted string that supports escaping ("...") - quotingState // we are within a string that does not support escaping ('...') - commentState // we are within a comment (everything following an unquoted or unescaped # -) - -// tokenClassifier is used for classifying rune characters. -type tokenClassifier map[rune]runeTokenClass - -func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) { - for _, runeChar := range runes { - typeMap[runeChar] = tokenType - } -} - -// newDefaultClassifier creates a new classifier for ASCII characters. -func newDefaultClassifier() tokenClassifier { - t := tokenClassifier{} - t.addRuneClass(spaceRunes, spaceRuneClass) - t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass) - t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass) - t.addRuneClass(escapeRunes, escapeRuneClass) - t.addRuneClass(commentRunes, commentRuneClass) - return t -} - -// ClassifyRune classifiees a rune -func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass { - return t[runeVal] -} - -// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped. -type Lexer Tokenizer - -// NewLexer creates a new lexer from an input stream. -func NewLexer(r io.Reader) *Lexer { - - return (*Lexer)(NewTokenizer(r)) -} - -// Next returns the next word, or an error. If there are no more words, -// the error will be io.EOF. -func (l *Lexer) Next() (string, error) { - for { - token, err := (*Tokenizer)(l).Next() - if err != nil { - return "", err - } - switch token.tokenType { - case WordToken: - return token.value, nil - case CommentToken: - // skip comments - default: - return "", fmt.Errorf("Unknown token type: %v", token.tokenType) - } - } -} - -// Tokenizer turns an input stream into a sequence of typed tokens -type Tokenizer struct { - input bufio.Reader - classifier tokenClassifier -} - -// NewTokenizer creates a new tokenizer from an input stream. -func NewTokenizer(r io.Reader) *Tokenizer { - input := bufio.NewReader(r) - classifier := newDefaultClassifier() - return &Tokenizer{ - input: *input, - classifier: classifier} -} - -// scanStream scans the stream for the next token using the internal state machine. -// It will panic if it encounters a rune which it does not know how to handle. -func (t *Tokenizer) scanStream() (*Token, error) { - state := startState - var tokenType TokenType - var value []rune - var nextRune rune - var nextRuneType runeTokenClass - var err error - - for { - nextRune, _, err = t.input.ReadRune() - nextRuneType = t.classifier.ClassifyRune(nextRune) - - if err == io.EOF { - nextRuneType = eofRuneClass - err = nil - } else if err != nil { - return nil, err - } - - switch state { - case startState: // no runes read yet - { - switch nextRuneType { - case eofRuneClass: - { - return nil, io.EOF - } - case spaceRuneClass: - { - } - case escapingQuoteRuneClass: - { - tokenType = WordToken - state = quotingEscapingState - } - case nonEscapingQuoteRuneClass: - { - tokenType = WordToken - state = quotingState - } - case escapeRuneClass: - { - tokenType = WordToken - state = escapingState - } - case commentRuneClass: - { - tokenType = CommentToken - state = commentState - } - default: - { - tokenType = WordToken - value = append(value, nextRune) - state = inWordState - } - } - } - case inWordState: // in a regular word - { - switch nextRuneType { - case eofRuneClass: - { - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - case spaceRuneClass: - { - t.input.UnreadRune() - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - case escapingQuoteRuneClass: - { - state = quotingEscapingState - } - case nonEscapingQuoteRuneClass: - { - state = quotingState - } - case escapeRuneClass: - { - state = escapingState - } - default: - { - value = append(value, nextRune) - } - } - } - case escapingState: // the rune after an escape character - { - switch nextRuneType { - case eofRuneClass: - { - err = fmt.Errorf("EOF found after escape character") - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - default: - { - state = inWordState - value = append(value, nextRune) - } - } - } - case escapingQuotedState: // the next rune after an escape character, in double quotes - { - switch nextRuneType { - case eofRuneClass: - { - err = fmt.Errorf("EOF found after escape character") - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - default: - { - state = quotingEscapingState - value = append(value, nextRune) - } - } - } - case quotingEscapingState: // in escaping double quotes - { - switch nextRuneType { - case eofRuneClass: - { - err = fmt.Errorf("EOF found when expecting closing quote") - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - case escapingQuoteRuneClass: - { - state = inWordState - } - case escapeRuneClass: - { - state = escapingQuotedState - } - default: - { - value = append(value, nextRune) - } - } - } - case quotingState: // in non-escaping single quotes - { - switch nextRuneType { - case eofRuneClass: - { - err = fmt.Errorf("EOF found when expecting closing quote") - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - case nonEscapingQuoteRuneClass: - { - state = inWordState - } - default: - { - value = append(value, nextRune) - } - } - } - case commentState: // in a comment - { - switch nextRuneType { - case eofRuneClass: - { - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } - case spaceRuneClass: - { - if nextRune == '\n' { - state = startState - token := &Token{ - tokenType: tokenType, - value: string(value)} - return token, err - } else { - value = append(value, nextRune) - } - } - default: - { - value = append(value, nextRune) - } - } - } - default: - { - return nil, fmt.Errorf("Unexpected state: %v", state) - } - } - } -} - -// Next returns the next token in the stream. -func (t *Tokenizer) Next() (*Token, error) { - return t.scanStream() -} - -// Split partitions a string into a slice of strings. -func Split(s string) ([]string, error) { - l := NewLexer(strings.NewReader(s)) - subStrings := make([]string, 0) - for { - word, err := l.Next() - if err != nil { - if err == io.EOF { - return subStrings, nil - } - return subStrings, err - } - subStrings = append(subStrings, word) - } -}