#!/usr/bin/env python

""" Simple parser for Minimal XML[1] (aka Simple Markup Language).

    Author:  Magnus Lie Hetland
    History:
        1.1 (2001-09-28) Minor bugfix
        1.0 (2001-05-20) Initial version

    [1] http://www.docuverse.com/smldev/minxml.html
"""

import re, sys

def parse(text):
    text = re.sub('&#(\d+);', lambda m: chr(int(m.group(1))), text)
    text = re.sub('\s+', ' ', text)
    tokens = re.split('(<[^>]+>)', text)
    tokens = [t.strip() for t in tokens if t.strip()]
    tokens.reverse()
    return tree(tokens)

def tree(tokens,tag=None):
    if not tag: tag = tokens.pop()[1:-1]
    kids = []
    while 1:
        tok = tokens.pop()
        if len(tok) > 1 and tok[1] == '/': break
        if tok[0] == '<': kids += [tree(tokens,tok[1:-1])]
        else: kids += [(None, tok)]
    return (tag, kids)

if __name__=="__main__": print parse(sys.argv[1])
