diff --git a/glossolalia/loader.py b/glossolalia/loader.py index 8cc22ca..c794b9a 100644 --- a/glossolalia/loader.py +++ b/glossolalia/loader.py @@ -1,5 +1,4 @@ import os -import string from pprint import pprint from random import choice, randint @@ -37,21 +36,21 @@ def get_lines(filename): return all_lines -def load_seeds(corpus=None, nb_seeds=10): +def load_seeds(corpus=None, nb_seeds=10, min_len=1, max_len=2): if corpus is None: - corpus = load_texts() + corpus = load_text() seeds = [] for i in range(nb_seeds): - plain_lines = filter(lambda k: k != "\n", corpus) + plain_lines = filter(lambda k: k not in "\n" and len(k) > 2, corpus) chosen = choice(list(plain_lines)) split = chosen.split(" ") - nb_words = randint(1, len(split)) + nb_words = randint(min_len, min(max_len, len(split))) seeds.append(" ".join(split[:nb_words])) return seeds def main(): - lines = load_texts("../") + lines = load_text("../KoozDawa/data/genius.txt") print("Some seeds:") pprint(load_seeds(lines))