-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakeDictionary.html
189 lines (171 loc) · 5.72 KB
/
makeDictionary.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
<!DOCTYPE html>
<html lang="it">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Convertitore di lista di parole (txt) in un dizionario (json) per la conversione fonetica</title>
<style>
body {
font-family: Arial, sans-serif;
text-align: center;
margin: 20px;
background-color: #f4f4f4;
}
#file-input {
margin: 20px 0;
}
#download-btn {
display: none;
margin-top: 20px;
padding: 10px 20px;
font-size: 1.2em;
background: #4caf50;
color: white;
border: none;
cursor: pointer;
border-radius: 5px;
}
#download-btn:hover {
background: #43a047;
}
pre {
background: white;
padding: 10px;
border-radius: 5px;
text-align: left;
overflow: auto;
max-height: 300px;
margin-top: 20px;
}
</style>
</head>
<body>
<h1>Convertitore di lista di parole (txt) in un dizionario (json) per la conversione fonetica</h1>
<!-- File input for reading a .txt file -->
<input type="file" id="file-input" accept=".txt">
<!-- Button for downloading the resulting JSON -->
<button id="download-btn">Scarica JSON</button>
<!-- Output area for showing the generated JSON -->
<pre id="output"></pre>
<script>
/**
* Main DOM references and event listeners
*/
const fileInput = document.getElementById('file-input');
const downloadBtn = document.getElementById('download-btn');
const outputPre = document.getElementById('output');
fileInput.addEventListener('change', handleFileSelection);
downloadBtn.addEventListener('click', downloadJson);
/**
* Handles the file selection event. Reads the .txt file content
* and converts each word to a numeric key using 'convertWord'.
* Builds an object with numericKey -> array of words.
*/
function handleFileSelection(event) {
const file = event.target.files[0];
if (!file) return;
const reader = new FileReader();
reader.onload = function(e) {
// Split file content into lines, remove empty lines, trim each line
const wordsFromTxt = new Set(
e.target.result
.split('\n')
.map(w => w.trim())
.filter(w => w)
);
// Dictionary is a map of numberKey -> set of words
const dictionaryMap = {};
wordsFromTxt.forEach(word => {
const numberKey = convertWord(word);
if (!dictionaryMap[numberKey]) {
dictionaryMap[numberKey] = new Set();
}
dictionaryMap[numberKey].add(word);
});
// Convert each set to an array for final JSON
const finalDictionary = {};
Object.keys(dictionaryMap).forEach(key => {
finalDictionary[key] = Array.from(dictionaryMap[key]);
});
const jsonOutput = JSON.stringify(finalDictionary, null, 2);
outputPre.textContent = jsonOutput;
downloadBtn.style.display = 'inline-block';
};
reader.readAsText(file);
}
// Tables for phonetics
const VOWELS = ["a","e","i","o","u"];
const ITALIAN_ADJUSTMENTS = [
{ original:"a", variants:["á","à"] },
{ original:"e", variants:["è","é"] },
{ original:"i", variants:["ì","í","y","j"] },
{ original:"o", variants:["ò","ó"] },
{ original:"u", variants:["ù","ú"] },
{ original:"ks", variants:["x"] }
];
const EXCEPTIONS = [
{ number:"756", words:["glig","glic"] },
{ number:"05", words:["siglio"] },
{ number:"405", words:["rsigli"] },
{ number:"205", words:["msigli"] },
{ number:"075", words:["sigli"] }
];
const CONVERSION_TABLE = [
{ number:"0", phonetics:["ss","zz","sci","sce","s","z"] },
{ number:"1", phonetics:["tt","dd","t","d"] },
{ number:"2", phonetics:["nn","gn","n"] },
{ number:"3", phonetics:["mm","m"] },
{ number:"4", phonetics:["rr","r"] },
{ number:"5", phonetics:["gli","ll","l"] },
{ number:"6", phonetics:["cci","ci","cce","ce","ggi","gi","gge","ge"] },
{ number:"7", phonetics:["cc","gg","cq","kk","ck","q","k","c","g"] },
{ number:"8", phonetics:["ff","vv","f","v"] },
{ number:"9", phonetics:["pp","bb","p","b"] }
];
/**
* Converts a single word to its numeric code according to phonetic rules.
* Vowels are removed, and specific phonetic patterns are replaced with digits.
* If the result is empty, '99999999' is used as a fallback.
*/
function convertWord(input) {
if (!input) return "99999999";
let word = String(input).toLowerCase().trim();
ITALIAN_ADJUSTMENTS.forEach((adj) => {
adj.variants.forEach((variant) => {
word = word.replace(new RegExp(variant, "g"), adj.original);
});
});
EXCEPTIONS.forEach((ex) => {
ex.words.forEach((w) => {
word = word.replace(new RegExp(w, "g"), ex.number);
});
});
CONVERSION_TABLE.forEach((entry) => {
entry.phonetics.forEach((phonetic) => {
const regex = new RegExp(phonetic, "g");
word = word.replace(regex, entry.number);
});
});
VOWELS.forEach((v) => {
const rg = new RegExp(v, "g");
word = word.replace(rg, "");
});
word = word.replace(/[^0-9]/g, "");
return (word === "") ? "99999999" : word;
}
/**
* Downloads the JSON currently displayed in the <pre> element as 'dictionary.json'.
*/
function downloadJson() {
const jsonOutput = outputPre.textContent;
const blob = new Blob([jsonOutput], { type: 'application/json' });
const temporaryLink = document.createElement('a');
temporaryLink.href = URL.createObjectURL(blob);
temporaryLink.download = 'dictionary.json';
document.body.appendChild(temporaryLink);
temporaryLink.click();
document.body.removeChild(temporaryLink);
}
</script>
</body>
</html>