Source code for wordle.vocab

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Wordle Vocabulary"""

###############################################################################

import json
from pathlib import Path
from collections import defaultdict
from typing import Iterable

from .defaults import (
    ALPHABET,
    WORD_LENGTH,
    WORDS_FILE,
    VOCAB_CACHE,
    INDEX_CACHE
)

###############################################################################


[docs]class Vocabulary: def __init__( self, alphabet: Iterable = ALPHABET, words_file: str or Path = WORDS_FILE, word_length: int = WORD_LENGTH, vocab_cache: str or Path = VOCAB_CACHE, index_cache: str or Path = INDEX_CACHE ): self.alphabet = set(alphabet) self.word_length = word_length self.words_file = Path(words_file) self.vocab_cache = Path(vocab_cache) self.index_cache = Path(index_cache) self.build_vocabulary() # creates self.vocab self.build_index() # creates self.index self.frequency = { k: { k1: len(v1) for k1, v1 in v.items() } for k, v in self.index.items() }
[docs] def build_vocabulary(self, use_cache=True): if use_cache and self.vocab_cache.is_file(): with open(self.vocab_cache, encoding="utf-8") as f: self.vocab = json.load(f) else: with open(self.words_file, encoding="utf-8") as f: self.vocab = { word.strip(): self.word_length for word in f.read().split() if len(word.strip()) == self.word_length } with open(self.vocab_cache, mode="w", encoding="utf-8") as f: json.dump(self.vocab, f, indent=2) return True
[docs] def build_index(self, use_cache=True): if use_cache and self.index_cache.is_file(): with open(self.index_cache, mode="r") as f: self.index = json.load(f) else: self.index = { "letter": defaultdict(dict), "letter_position": defaultdict(dict) } for word in self.vocab: letter_position = [ f"{ltr}{pos}" for ltr, pos in zip(word, range(self.word_length)) ] letters = set(word) unique_letters = len(letters) for letter in letters: self.index['letter'][letter][word] = unique_letters for l_p in letter_position: self.index['letter_position'][l_p][word] = unique_letters with open(self.index_cache, mode="w") as f: json.dump(self.index, f) return True
[docs] def is_word(self, text): return text in self.vocab
############################################################################### if __name__ == '__main__': vocab = Vocabulary() ###############################################################################