¿Cómo analizar una cadena y devolver una matriz anidada?

Quiero una función de Python que tome una cadena y devuelva una matriz, donde cada elemento de la matriz sea un carácter u otra matriz de este tipo. Las matrices anidadas se marcan en la cadena de entrada comenzando con ‘(‘ y terminando con ‘)’.

Así, la función actuaría así:

1) foo("abc") == ["a", "b", "c"] 2) foo("a(b)c") == ["a", ["b"], "c"] 3) foo("a(b(c))") == ["a", ["b", ["c"]]] 4) foo("a(b(c)") == error: closing bracket is missing 5) foo("a(b))c") == error: opening bracket is missing 6) foo("a)b(c") == error: opening bracket is missing 

Nota: Prefiero una solución que sea puramente funcional.

 def foo(s): def foo_helper(level=0): try: token = next(tokens) except StopIteration: if level != 0: raise Exception('missing closing paren') else: return [] if token == ')': if level == 0: raise Exception('missing opening paren') else: return [] elif token == '(': return [foo_helper(level+1)] + foo_helper(level) else: return [token] + foo_helper(level) tokens = iter(s) return foo_helper() 

Y,

 >>> foo('a((b(c))d)(e)') ['a', [['b', ['c']], 'd'], ['e']] 

Iterativo.

 def foo(xs): stack = [[]] for x in xs: if x == '(': stack[-1].append([]) stack.append(stack[-1][-1]) elif x == ')': stack.pop() if not stack: return 'error: opening bracket is missing' #raise ValueError('error: opening bracket is missing') else: stack[-1].append(x) if len(stack) > 1: return 'error: closing bracket is missing' #raise ValueError('error: closing bracket is missing') return stack.pop() assert foo("abc") == ["a", "b", "c"] assert foo("a(b)c") == ["a", ["b"], "c"] assert foo("a(b(c))") == ["a", ["b", ["c"]]] assert foo("a((b(c))d)(e)") == ['a', [['b', ['c']], 'd'], ['e']] assert foo("a(b(c)") == "error: closing bracket is missing" assert foo("a(b))c") == "error: opening bracket is missing" assert foo("a)b(c") == 'error: opening bracket is missing' 

Sugeriría dos maneras:

Programe su propio analizador de descenso recusivo como aquí , o use pyparsing , algo como

 import pyparsing as pp expr = pp.Forward() expr << pp.Word(pp.alphas) + pp.Optional('(' + expr + ')') + pp.Optional(pp.Word(pp.alphas)) 

Aquí se describe una expresión recursiva como una secuencia de alfas, que se puede intercalar entre paréntesis equilibrados. Cuando verifique este ejemplo para las salidas, verá cómo obtener la estructura de salida deseada (aunque requerirá algunos ajustes de su lado y requerirá algo de aprendizaje sobre la creación de parámetros).

saludos Markus

Usando regex y ast.literal_eval

 >>> import re >>> from ast import literal_eval >>> def listit(t): ... return list(map(listit, t)) if isinstance(t, (list, tuple)) else t ... def solve(strs): s = re.sub(r'[A-Za-z]', "'\g<0>',", strs) s = re.sub(r"\)", "\g<0>,", s) try: return listit( literal_eval('[' + s + ']') ) except : return "Invalid string! " ... >>> solve("abc") ['a', 'b', 'c'] >>> solve("a(b)c") ['a', ['b'], 'c'] >>> solve("a(b(c))") ['a', ['b', ['c']]] >>> solve("a(b(c)") 'Invalid string! ' >>> solve("a)b(c") 'Invalid string! ' >>> solve("a(b))c") 'Invalid string! ' >>> solve('a((b(c))d)(e)') ['a', [['b', ['c']], 'd'], ['e']] 

Un enfoque bastante rápido y desagradable (solo para algo diferente):

 import json, re def foo(x): # Split continuous strings # Match consecutive characters matches = re.findall('[az]{2,}', x) for m in matches: # Join with "," x = x.replace(m, '","'.join(y for y in list(m))) # Turn curvy brackets into square brackets x = x.replace(')', '"],"') x = x.replace('(', '",["') # Wrap whole string with square brackets x = '["'+x+'"]' # Remove empty entries x = x.replace('"",', '') x = x.replace(',""', '') try: # Load with JSON return json.loads(x) except: # TODO determine error type return "error" def main(): print foo("abc") # ['a', 'b', 'c'] print foo("a(b)c") # ['a', ['b'], 'c'] print foo("a(b(c))") # ['a', ['b', ['c']]] print foo("a(b))c") # error print foo('a((b(c))d)(e)') # ['a', [['b', ['c']], 'd'], ['e']] 
 def parse_nested(iterator, level=0): result = [] for c in iterator: if c == '(': result.append(parse_nested(iterator, level+1)) elif c == ')': if level: return result else: raise ValueError("Opening parenthesis missing") else: result.append(c) if level: raise ValueError("Closing parenthesis missing") else: return result print parse_nested(iter('a((b(c))d)(e)')) 

La recursión es algo muy poderoso que deberías intentar usar.

Aquí está mi código:

 # encoding: utf-8 # Python33 def check(s): cs = [c for c in s if c == '(' or c ==')'] flag = 0 for c in cs: if flag < 0: return 'opening bracket is missing' if c == '(': flag += 1 else: flag -= 1 if flag < 0: return 'opening bracket is missing' elif flag > 0: return 'closing bracket is missing' else: return '' def _foo(cs): result = [] while len(cs): c = cs.pop(0) if c == '(': result.append(_foo(cs)) elif c == ')': return result else: result.append(c) return result def foo(s): valiad = check(s) if valiad: return valiad cs = list(s) return _foo(cs) if __name__ == '__main__': ss = ["abc","a(b)c","a(b(c))","a(b(c)","a(b))c","a)b(c"] for s in ss: print(foo(s))