# $Id: getopt.py,v 1.3 2000/06/29 15:20:20 fms Exp $

"""Parser for command line options.

This module helps scripts to parse the command line arguments in
sys.argv.  It supports the same conventions as the Unix getopt()
function (including the special meanings of arguments of the form `-'
and `--').  Long options similar to those supported by GNU software
may be used as well via an optional third argument.  This module
provides two functions and a family of exceptions:

getopt() -- Parse command line options (classic)
getoptdict() -- As above, but return a dictionary instead of a list of pairs

GetoptError -- exception (class) raised with 'opt' attribute, which is
the option involved with the exception. Actual exceptions raised are
subclasses of this, so the calling program can figure out what the
problem was without having to (yecchh) inspect the text of the
message. This is of relevance because the options usually come from
the interactive user, to whom a programmer has a duty to offer
something nicer than a traceback even when totally broken input is
supplied.

"""

# Long option support added by Lars Wirzenius <liw@iki.fi>.

# Gerrit Holl <gerrit@nl.linux.org> moved the string-based exceptions
# to class-based exceptions.

# Frank Stajano ( http://www.uk.research.att.com/~fms/ )
# added dictionary output. While he was at it he also specialised the
# exceptions and retouched the doc strings.

class GetoptError(Exception):
    opt = ''
    msg = ''
    def __init__(self, *args):
        self.args = args
        if len(args) == 1:
            self.msg = args[0]
        elif len(args) == 2:
            self.msg = args[0]
            self.opt = args[1]

    def __str__(self):
        return self.msg

error = GetoptError # backward compatibility

class InternalError(SystemError):
    pass

# Backwards compatibility: thanks to exception inheritance, all of the
# following may be caught as GetoptError if you are not interested in
# the details.
class RepeatedOption(GetoptError):
    pass
class MissingArgument(GetoptError):
    pass
class UnexpectedArgument(GetoptError):
    pass
class AmbiguousPrefix(GetoptError):
    pass
class UnknownOption(GetoptError):
    pass

# Constants enumerating the possible values for the onRepeatedNames parameter:
REJECT = "reject"
KEEP_EARLIEST = "keep earliest"
KEEP_LATEST = "keep latest"
APPEND = "append"


def getoptdict(args, shortOptions="", longOptions=[], onRepeatedNames=REJECT):

    """Do the job of getopt(), but return the result in a dictionary,
    which is often more palatable than a list to the caller.

    In detail: args (mandatory) is the argument list to be parsed,
    typically coming from "sys.argv[1:]". The rest of the arguments
    are optional: the useful ones may be supplied as keyword
    arguments. shortOptions is the string of option letters to be
    recognised (same behaviour as getopt(): those followed by ':' take
    an argument). longOptions is the list of names of multicharacter
    options to be recognised (again, same as getopt(): no leading
    '--', and those followed by '=' take an argument). onRepeatedNames
    says what to do when args contains repeated options with the same
    name; possible values are REJECT, KEEP_EARLIEST, KEEP_LATEST and
    APPEND (outside callers will have to qualify these names:
    getopt.REJECT etc). With REJECT, multiple options with the same
    name raise the RepeatedOption exception. With KEEP_EARLIEST and
    KEEP_LATEST, all options with the same name are ignored except the
    earliest or the latest respectively. With APPEND, all the values
    are collected in a list.

    Return a dictionary where the keys are the names of the options
    that appear in args and the values are, you guessed it, their
    values. The dictionary key "" (empty string) yields the non-option
    arguments that appear after the options (this is a trailing slice
    of args)."""

    options, nonOptions = getopt(args, shortOptions, longOptions)
    dict = _list2dict(options, onRepeatedNames)
    dict[''] = nonOptions
    return dict


def getopt(args, shortopts, longopts = []):
    """getopt(args, options[, long_options]) -> opts, args

    Parse command line options and parameter list.  args is the
    argument list to be parsed, without the leading reference to the
    running program.  Typically, this means "sys.argv[1:]".  shortopts
    is the string of option letters that the script wants to
    recognize, with options that require an argument followed by a
    colon (i.e., the same format that Unix getopt() uses).  If
    specified, longopts is a list of strings with the names of the
    long options which should be supported.  The leading '--'
    characters should not be included in the option name.  Long
    options which require an argument should be followed by an equal
    sign ('=').

    Options declared as requiring an argument will aggressively grab
    one from what follows, even if it's in the next word, even if
    there is no '=' sign in between, and even if it starts with a
    "-". For example, if you declare short option "o:", the argument
    lists ['-o-2'], ['-o=-2'], ['-o', '-2'] will all assign the value
    '-2' to option '-o'.

    The return value consists of two elements: the first is a list of
    (option, value) pairs; the second is the list of program arguments
    left after the option list was stripped (this is a trailing slice
    of the first argument).  Each option-and-value pair returned has
    the option as its first element, prefixed with a hyphen (e.g.,
    '-x'), and the option argument as its second element, or an empty
    string if the option has no argument.  The options occur in the
    list in the same order in which they were found, thus allowing
    multiple occurrences.  Long and short options may be mixed.

    """

    opts = []
    if type(longopts) == type(""):
        longopts = [longopts]
    else:
        longopts = list(longopts)
    longopts.sort()
    while args and args[0][:1] == '-' and args[0] != '-':
        if args[0] == '--':
            args = args[1:]
            break
        if args[0][:2] == '--':
            opts, args = _do_longs(opts, args[0][2:], longopts, args[1:])
        else:
            opts, args = _do_shorts(opts, args[0][1:], shortopts, args[1:])

    return opts, args




def _list2dict(list, onRepeatedNames=REJECT):
    
    """Helper function. Convert a list of (name, value) pairs of
    strings to a dictionary. In unambiguous cases, each (name, value)
    pair becomes one entry of the dictionary, indexed by name and
    containing value.

    If several pairs have the same name, the behaviour is regulated by
    the onRepeatedNames parameter, whose value can be one of the
    following: REJECT, KEEP_EARLIEST, KEEP_LATEST, APPEND. The
    detailed semantics are as described in the documentation for the
    public function getoptdict()."""
    
    result = {}
    for (name, value) in list:
        assert(type(name) == type(""))
        assert(type(value) == type(""))
        if result.has_key(name):
            if onRepeatedNames == REJECT:
                raise RepeatedOption("Repeated option %s" % name, name)
            elif onRepeatedNames == KEEP_EARLIEST:
                pass
            elif onRepeatedNames == KEEP_LATEST:
                result[name] = value
            elif onRepeatedNames == APPEND:
                if type(result[name]) == type(""):
                    # Assert: we are processing the SECOND occurrence.
                    result[name] = [result[name], value]
                elif type(result[name]) == type([]):
                    result[name].append(value)
                else:
                    # Assert: this can't happen.
                    raise InternalError(
                        ("Neither list nor string while "
                        "appending pair %s to slot %s containing %s" ) % (
                        `(name, value)`, `name`, `value`))
            else:
                raise ValueError ("onRepeatedNames must be one of REJECT, "
                    "KEEP_EARLIEST, KEEP_LATEST, APPEND")
        else:
            result[name] = value
    return result




def _do_longs(opts, opt, longopts, args):
    try:
        i = opt.index('=')
        opt, optarg = opt[:i], opt[i+1:]
    except ValueError:
        optarg = None

    has_arg, opt = _long_has_args(opt, longopts)
    if has_arg:
        if optarg is None:
            if not args:
                raise MissingArgument('option --%s requires argument'
                                      % opt, opt)
            optarg, args = args[0], args[1:]
    elif optarg:
        raise UnexpectedArgument('option --%s must not have an argument'
                                 % opt, opt)
    opts.append(('--' + opt, optarg or ''))
    return opts, args

# Return:
#   has_arg?
#   full option name
def _long_has_args(opt, longopts):
    optlen = len(opt)
    for i in range(len(longopts)):
        x, y = longopts[i][:optlen], longopts[i][optlen:]
        if opt != x:
            continue
        if y != '' and y != '=' and i+1 < len(longopts):
            if opt == longopts[i+1][:optlen]:
                raise AmbiguousPrefix('option --%s not a unique prefix'
                                      % opt, opt)
        if longopts[i][-1:] in ('=', ):
            return 1, longopts[i][:-1]
        return 0, longopts[i]
    raise UnknownOption('option --%s not recognized' % opt, opt)

def _do_shorts(opts, optstring, shortopts, args):
    while optstring != '':
        opt, optstring = optstring[0], optstring[1:]
        if _short_has_arg(opt, shortopts):
            if optstring == '':
                if not args:
                    raise MissingArgument('option -%s requires argument'
                                          % opt, opt)
                optstring, args = args[0], args[1:]
            optarg, optstring = optstring, ''
        else:
            optarg = ''
        opts.append(('-' + opt, optarg))
    return opts, args

def _short_has_arg(opt, shortopts):
    for i in range(len(shortopts)):
        if opt == shortopts[i] != ':':
            return shortopts[i+1:i+2] == ':'
    raise UnknownOption('option -%s not recognized' % opt, opt)


def _test_getoptdict():

    """More of a demo than a test, since the results are not
    automatically checked. But it gives you a good gallery of
    examples. Worth running this test and reading the output once, if
    nothing else as an alternative to reading the docs."""
    
    import string
    args1 = string.split("-a -b -c -d123 --echo=hi -q -q -q spam eggs")
    args2 = string.split("-a -b -c -d123 --echo=hi -q4 -q5 -q6 --echo=bye")
    args3 = string.split("-a -axxx --book=zen -c -qX --echo=hi --echo foo bar")
    args4 = string.split("-a -axxx --book zen -q X --echo hi --echo foo bar")
    short1 = "abcq"
    short2 = "a:bcq:z"
    short3 = "abcd:q"
    short4 = "abcdq"
    short5 = "abcdq:"
    short6 = "abcd:q:"
    long1 = ["echo"]
    long2 = ["echo=", "book="]
    long3 = ["echo="]

    for (args, short, long, rep) in [
        (args1, short1, long1, REJECT),
        (args1, short4, long1, REJECT),
        (args1, short3, long1, REJECT),
        (args1, short3, long3, REJECT),
        (args1, short3, long3, KEEP_LATEST),
        (args1, short3, long3, APPEND),

        (args2, short1, long1, REJECT),
        (args2, short3, long1, REJECT),
        (args2, short3, long3, REJECT),
        (args2, short6, long3, REJECT),
        (args2, short6, long3, KEEP_EARLIEST),
        (args2, short6, long3, KEEP_LATEST),
        (args2, short6, long3, APPEND),
        
        (args3, short2, long1, REJECT),
        (args3, short2, long2, REJECT),
        (args3, short2, long2, APPEND),
        # Note what happens to -axxx and to foo! If an option accepts
        # a value, this value is either the part of the current word
        # that follows the '=' or, if there is no '=', the NEXT WORD!

        # Further illustration of this (look ma, no '=' signs):
        (args4, short2, long2, APPEND), 
        ]:
        print "getoptdict(\n%s,\n%s,\n%s,\n%s) -->" % (
            `args`, `short`, `long`, `rep`)
        try:
            d = getoptdict(args, short, long, rep)
            k = d.keys()
            k.sort()
            print "{"
            for key in k:
                print "%s: %s" % (`key`, `d[key]`)
            print "}"
        except GetoptError:
            print "***Exception %s:\n%s" % sys.exc_info()[0:2]
        print
            
if __name__ == '__main__':
    import sys
    #print getopt(sys.argv[1:], "a:b", ["alpha=", "beta"])
    _test_getoptdict()
