From 29a23ae761df9e04a54daecd3d592a825d08cdcf Mon Sep 17 00:00:00 2001 From: Paul-Louis NECH Date: Sat, 23 Nov 2019 00:05:43 +0100 Subject: [PATCH] feat(Kawa): tonekize " --- KoozDawa/dawa/lyrics.py | 17 +++++++++++++++++ KoozDawa/dawa/tokens.py | 4 +++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/KoozDawa/dawa/lyrics.py b/KoozDawa/dawa/lyrics.py index e69de29..27426d3 100644 --- a/KoozDawa/dawa/lyrics.py +++ b/KoozDawa/dawa/lyrics.py @@ -0,0 +1,17 @@ +import lyricsgenius + + +def fetch(): + genius = lyricsgenius.Genius("zUSpjfQ9ELXDqOjx9hGfAlJGYQFrNvHh3rlDV298_QSr5ScKf3qlHZtOO2KsXspQ") + response = genius.search_artist("Dooz-kawa") + + for hit in response["hits"]: + print(hit) + + +def main(): + fetch() + + +if __name__ == '__main__': + main() diff --git a/KoozDawa/dawa/tokens.py b/KoozDawa/dawa/tokens.py index bd275d2..1e05230 100644 --- a/KoozDawa/dawa/tokens.py +++ b/KoozDawa/dawa/tokens.py @@ -5,7 +5,9 @@ from KoozDawa.dawa.loader import load_kawa class PoemTokenizer(Tokenizer): def __init__(self, **kwargs) -> None: - super().__init__(lower=False, filters='"#$%&()*+,-/<=>@[\\]^_`{|}~\t\n', oov_token="😢", **kwargs) + super().__init__(lower=True, # TODO: Better generalization without? + filters='#$%&()*+/<=>@[\\]^_`{|}~\t\n', oov_token="😢", + **kwargs) def get_sequence_of_tokens(self, corpus): self.fit_on_texts(corpus) -- libgit2 0.27.0