|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
The amount of words with two c's seems to be bigger than the amount of
words with two u's.
The amount of duplicate letters in English words (466k words):
```
{
'aa': 250, 'bb': 1706, 'cc': 2370,
'dd': 1819, 'ee': 7231, 'ff': 3057,
'gg': 2107, 'hh': 112, 'ii': 574,
'jj': 6, 'kk': 132, 'll': 18613,
'mm': 3425, 'nn': 3944, 'pp': 3566,
'oo': 7363, 'qq': 4, 'rr': 5747,
'ss': 20062, 'tt': 5815, 'uu': 68,
'vv': 44, 'ww': 82, 'xx': 9,
'yy': 9, 'zz': 564
}
```
Generated with this[1] list and the following python script:
```python
def load_words():
with open('words_alpha.txt') as word_file:
valid_words = set(word_file.read().split())
return valid_words
if __name__ == '__main__':
english_words = load_words()
d_chars = ['aa', 'bb', 'cc',
'dd', 'ee', 'ff',
'gg', 'hh', 'ii',
'jj', 'kk', 'll',
'mm', 'nn', 'pp',
'oo', 'qq', 'rr',
'ss', 'tt', 'uu',
'vv', 'ww', 'xx',
'yy', 'zz']
hits = {
'aa': 0,
'bb': 0,
'cc': 0,
'dd': 0,
'ee': 0,
'ff': 0,
'gg': 0,
'hh': 0,
'ii': 0,
'jj': 0,
'kk': 0,
'll': 0,
'mm': 0,
'nn': 0,
'pp': 0,
'oo': 0,
'qq': 0,
'rr': 0,
'ss': 0,
'tt': 0,
'uu': 0,
'vv': 0,
'ww': 0,
'xx': 0,
'yy': 0,
'zz': 0
}
for word in english_words:
for d_char in d_chars:
print("checking " + d_char + " for existence in " + word)
if d_char in word:
print(d_char + " is in " + word)
hits[d_char] = hits[d_char] + 1
print(hits)
```
[1]: https://github.com/dwyl/english-words
|