División del archivo de texto en una sección con una línea de delimitador especial – python

Tengo un archivo de entrada como tal:

This is a text block start This is the end And this is another with more than one line and another line. 

La tarea deseada es leer los archivos por sección delimitada por alguna línea especial, en este caso es una línea vacía, por ejemplo, [out]:

 [['This is a text block start', 'This is the end'], ['And this is another','with more than one line', 'and another line.']] 

He estado obteniendo la salida deseada al hacerlo:

 def per_section(it): """ Read a file and yield sections using empty line as delimiter """ section = [] for line in it: if line.strip('\n'): section.append(line) else: yield ''.join(section) section = [] # yield any remaining lines as a section too if section: yield ''.join(section) 

Pero si la línea especial es una línea que comienza con # por ejemplo:

 # Some comments, maybe the title of the following section This is a text block start This is the end # Some other comments and also the title And this is another with more than one line and another line. 

Tengo que hacer esto:

 def per_section(it): """ Read a file and yield sections using empty line as delimiter """ section = [] for line in it: if line[0] != "#": section.append(line) else: yield ''.join(section) section = [] # yield any remaining lines as a section too if section: yield ''.join(section) 

Si permitiera que per_section() tenga un parámetro delimitador, podría intentar esto:

 def per_section(it, delimiter== '\n'): """ Read a file and yield sections using empty line as delimiter """ section = [] for line in it: if line.strip('\n') and delimiter == '\n': section.append(line) elif delimiter= '\#' and line[0] != "#": section.append(line) else: yield ''.join(section) section = [] # yield any remaining lines as a section too if section: yield ''.join(section) 

Pero, ¿hay alguna manera de no codificar todos los delimitadores posibles?

¿Qué hay de pasar un predicado?

 def per_section(it, is_delimiter=lambda x: x.isspace()): ret = [] for line in it: if is_delimiter(line): if ret: yield ret # OR ''.join(ret) ret = [] else: ret.append(line.rstrip()) # OR ret.append(line) if ret: yield ret 

Uso:

 with open('/path/to/file.txt') as f: sections = list(per_section(f)) # default delimiter with open('/path/to/file.txt.txt') as f: sections = list(per_section(f, lambda line: line.startswith('#'))) # comment 

Simplemente haga esto:

 with open('yorfileaname.txt') as f: #open desired file data = f.read() #read the whole file and save to variable data print(*(data.split('=========='))) #now split data when "=.." and print it #usually it would ouput a list but if you use * it will print as string 

Salida:

 content content more content content conclusion content again more of it content conclusion content content contend done 

¿Qué tal algo como esto?

 from itertools import groupby def per_section(s, delimiters=()): def key(s): return not s or s.isspace() or any(s.startswith(x) for x in delimiters) for k, g in groupby(s.splitlines(), key=key): if not k: yield list(g) if __name__ == '__main__': print list(per_section('''This is a text block start This is the end And this is another with more than one line and another line.''')) print list(per_section('''# Some comments, maybe the title of the following section This is a text block start This is the end # Some other comments and also the title And this is another with more than one line and another line.''', ('#'))) print list(per_section('''!! Some comments, maybe the title of the following section This is a text block start This is the end $$ Some other comments and also the title And this is another with more than one line and another line.''', ('!', '$'))) 

Salida:

 [['This is a text block start', 'This is the end'], ['And this is another', 'with more than one line', 'and another line.']] [['This is a text block start', 'This is the end'], ['And this is another', 'with more than one line', 'and another line.']] [['This is a text block start', 'This is the end'], ['And this is another', 'with more than one line', 'and another line.']]