diff --git a/KoozDawa/dawa/lyrics.py b/KoozDawa/dawa/lyrics.py index e69de29..27426d3 100644 --- a/KoozDawa/dawa/lyrics.py +++ b/KoozDawa/dawa/lyrics.py @@ -0,0 +1,17 @@ +import lyricsgenius + + +def fetch(): + genius = lyricsgenius.Genius("zUSpjfQ9ELXDqOjx9hGfAlJGYQFrNvHh3rlDV298_QSr5ScKf3qlHZtOO2KsXspQ") + response = genius.search_artist("Dooz-kawa") + + for hit in response["hits"]: + print(hit) + + +def main(): + fetch() + + +if __name__ == '__main__': + main() diff --git a/KoozDawa/dawa/tokens.py b/KoozDawa/dawa/tokens.py index bd275d2..1e05230 100644 --- a/KoozDawa/dawa/tokens.py +++ b/KoozDawa/dawa/tokens.py @@ -5,7 +5,9 @@ from KoozDawa.dawa.loader import load_kawa class PoemTokenizer(Tokenizer): def __init__(self, **kwargs) -> None: - super().__init__(lower=False, filters='"#$%&()*+,-/<=>@[\\]^_`{|}~\t\n', oov_token="😢", **kwargs) + super().__init__(lower=True, # TODO: Better generalization without? + filters='#$%&()*+/<=>@[\\]^_`{|}~\t\n', oov_token="😢", + **kwargs) def get_sequence_of_tokens(self, corpus): self.fit_on_texts(corpus)