#! /bin/sed -nf ####### DESCRIPTION ############################################################ # # config.sed # version 1.0 # # This script processes a configuration file. # # The input to the script is a configuration file. Each line of the # configuration file should be one of the following: # # 1) a blank line (with or without whitespace); # 2) a comment; # 3) an assignment via equals ('='); # 4) an assignment via here document ('<<' or '<<-'). # # The syntax of a comment is: # # [] '#' [] # # The syntax of an assignment via equals is: # # [] [] '=' [] [] [] # # The name must begin with a letter or underscore and can contain any # combination of digits, letters, dashes, periods, slashes, colons, and # underscores. # # If the value is not present, the entire line will be treated as a comment. If # the value is present, it must be either unquoted, single-quoted, or # double-quoted. The value may begin on a subsequent line if each newline after # the operator (and before the value) is immediately preceded by a backslash, # and the backslash is the first non-whitespace character after the operator (or # the first non-whitespace character of the line, in the case of intermittent # lines). The value may continue over multiple lines if each newline within the # value is immediately preceeded by a backslash. Such backslash-newline pairs # will be removed and will not be included as part of the value. A newline # cannot be part of a value assigned via equals. # # An unquoted value extends from the first non-whitespace character to the last # non-whitespace character (or to the last non-whitespace character before a # comment, if a comment is present). A pound sign may be included as part of the # value if escaped by a backslash; the backslash will be removed and will not be # included as part of the value. Each single quote and double quote (other than # as the first character) will be treated as if escaped by a backslash (that is, # no split tokenization will be performed. Each dollar sign, grave accent, and # backslash (other than a backslash used to escape another character or a # newline) will be treated as if escaped by a backslash (that is, no parameter # substitution or command substitution will be performed); if such a backslash # is present, it will be removed and will not be included as part of the value. # Otherwise, each backslash (other than a backslash used to escape a newline) # will be treated literally. # # A single-quoted value extends from the first character after the opening quote # to the last character before the closing quote. A single quote cannot be # included as part of the value. Each backslash (other than a backslash used to # escape a newline) will be treated literally. # # A double-quoted value extends from the first character after the opening quote # to the last character before the closing quote. A double quote may be included # as part of the value if escaped by a backslash; the backslash will be removed # and will not be included as part of the value. Each dollar sign, grave accent, # and backslash (other than a backslash used to escape another character or a # newline) will be treated as if escaped by a backslash (that is, no parameter # substitution or command substitution will be performed); if such a backslash # is present, it will be removed and will not be included as part of the value. # Otherwise, each backslash (other than a backslash used to escape a newline) # will be treated literally. # # The syntax of an assignment via here document is: # # [] [] { '<<' | '<<-' } [] [ [] # [...] # ] # # The name must adhere to the same syntax as specified for an assignment by # equals. # # If the first occurrance of the word is not present, the entire line will be # treated as a comment and processing will continue with the next line, # interpreting it outside the context of a here document. If the first # occurrance of the word is present, it extends from the first non-whitespace # character after the operator to the last non-whitespace character, and will be # treated literally (that is, no parameter substitution or command substitution # will be performed, either in the word or in any of the value lines). # # If the operator is '<<' (meaning the here document is unindented), the value # will be the concatenation of value lines, read literally, until the second # occurrance of the word is encountered on a line by itself. The value will # include each newline that separates a pair of value lines, but will not # include the newline that separates the last value line from the second # occurrance of the word. # # If the operator is '<<-' (meaning the here document is indented), the result # will be the same as if the operator was '<<' except that leading tabs will be # stripped from each line (including the second occurrance of the word). # # Since the here document is read literally, comments that appear in the here # document will be included as part of the value. Also, note that this is not a # true here document, since it is stored by sed and not in a temporary file. # # Because each value is stored by sed, the maximum length of a value is # approximately eight kilobytes. # # The output of the script is a series of lines, one line per assignment in the # configuration file. The syntax of each output line is: # # '=' # # Note that each name is guaranteed not to contain an equals sign, and each # output line will contain no whitespace, unless whitespace is part of the # value. Thus, the name of the assignment is the first character of the line to # the last character before the first equals sign, and the encoded value of the # assignment is the first character after the first equals sign to the last # character of the line (not including the newline). # # In each encoded value, each double quote, dollar sign, grave accent, and # backslash will be escaped by a backslash, and each newline (present only if # assigned via here document) will be represented by the escape sequence '\n'. # This is necessary for representing a newline and useful for assigning both an # encoded value and a decoded value to shell parameters. Assuming the shell # parameter 'LINE' contains an output line, consider the following: # # NAME=`echo $LINE | cut -f 1 -d "="` # VALUE_ENCODED=`echo $LINE | cut -f 2- -d "="` # eval "VALUE_DECODED=\"$VALUE_ENCODED\"" # # The script will output a line as each assignment in the configuration file is # processed, until the end of the configuration file is reached or until a # syntax error is encountered in the configuration file. If the end of the # configuration file is reached, the last output line will be a status code that # indicates success. If a syntax error is encountered in the configuration file, # the last output line will be a status code that indicates the error. The # status code will be one of the following: # # 0 success; # 1 invalid name; # 2 beginning of value is folded beyond end-of-file; # 3 unquoted value is folded beyond end-of-file; # 4 single-quoted value is folded beyond end-of-file; # 5 single-quoted value is missing closing quote; # 6 single-quoted value contains extraneous quote; # 7 double-quoted value is folded beyond end-of-file; # 8 double-quoted value is missing closing quote; # 9 double-quoted value contains extraneous quote; # 10 unindented here document continues beyond end-of-file; # 11 indented here document continues beyond end-of-file. # # If the status code is greater than 0, the second-to-last output line will be # the line number at which the syntax error was encountered. If the status code # is 1, the third-to-last output line will be the line of the configuration file # with the invalid name. If the status code is greater than 1, the third-to-last # output line will be the name that was to be assigned an invalid value. # # ####### HISTORY ################################################################ # # 2001.11.28 Nathan D. Ryan # Initial implementation. This probably would have been easier in # perl, but I got carried away... # # # process a blank line # /^[ ]*$/ b is_eof # process a comment # /^[ ]*#/ b is_eof # process an assignment via equals ('=') # /^[ ]*[A-Za-z_][0-9A-Za-z\.\/:_-]*[ ]*=/ { # copy the name into the hold space, removing both extraneous whitespace # and the operator # h; s/^[ ]*\([^ =]*\).*$/\1/ x; s/^[^=]*=[ ]*\(.*\)$/\1/ # unfold lines before the value # : unfold /^\\$/ { $ { s/^.*$/2/; b quit } N; s/\\\n[ ]*//; b unfold } # process a null value # /^$/ b is_eof # process a null value with a comment # /^#/ b is_eof # process an unquoted value # /^[^"']/ { : unfold0 # reset the replacement test # t reset0 : reset0 # replace a trailing comment with a space (the space prevents # unintended line folding) # s/\([^\\]\)\(\\*\)\2#.*$/\1\2\2 /; t trim0 # unfold lines within the value # /[^\\]\(\\*\)\1\\$/ { $ { s/^.*$/3/; b quit } N; s/\\\n//; b unfold0 } : trim0 # remove trailing whitespace # s/[ ]*$// # remove each backslash used to escape a pound sign or a single # quote # s/^\(\\*\)\1\\\([#']\)/\1\1\2/g s/\([^\\]\)\(\\*\)\1\\\([#']\)/\1\2\2\3/g b decode } # process a single-quoted value # /^'/ { : unfold1 # reset the replacement test # t reset1 : reset1 # remove a trailing comment # s/\(.'[ ]*\)#.*$/\1/; t trim1 # handle an extraneous quote # /.'[ ]*[^ ]/ { s/^.*$/6/; b quit } # unfold lines within the value # /\\$/ { $ { s/^.*$/4/; b quit } N; s/\\\n//; b unfold1 } : trim1 # remove trailing whitespace # s/[ ]*$// # handle a missing closing quote # /'$/ !{ s/^.*$/5/; b quit } # remove the opening quote and the closing quote # s/^'\(.*\)'$/\1/ b encode } # process a double-quoted value # /^"/ { : unfold2 # reset the replacement test # t reset2 : reset2 # remove a trailing comment # s/\([^\\]\)\(\\*\)\2"[ ]*#.*$/\1\2\2"/; t trim2 # handle an extraneous quote # /[^\\]\(\\*\)\1"[ ]*[^ ]/ { s/^.*$/9/; b quit } # unfold lines within the value # /[^\\]\(\\*\)\1\\$/ { $ { s/^.*$/7/; b quit } N; s/\\\n//; b unfold2 } : trim2 # remove trailing whitespace # s/[ ]*$// # handle a missing closing quote # /"$/ !{ s/^.*$/8/; b quit } # remove the opening quote and the closing quote # s/^"\(.*\)"$/\1/ b decode } } # process an assignment via here document ('<<' or '<<-') # /^[ ]*[A-Za-z_][0-9A-Za-z\.\/:_-]*[ ]*<