1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package io.earcam.utilitarian.site.search.offline;
20
21 import static io.earcam.unexceptional.Closing.closeAfterAccepting;
22 import static io.earcam.unexceptional.Exceptional.apply;
23 import static io.earcam.utilitarian.site.search.offline.Component.getOrDefault;
24 import static io.earcam.utilitarian.site.search.offline.Component.mandatory;
25 import static io.earcam.utilitarian.site.search.offline.Javascript.createJavascriptEngine;
26 import static io.earcam.utilitarian.site.search.offline.Javascript.invokeFunction;
27 import static io.earcam.utilitarian.site.search.offline.Resources.SCRIPT_INDEX;
28 import static io.earcam.utilitarian.site.search.offline.Resources.SCRIPT_SEARCH;
29 import static io.earcam.utilitarian.site.search.offline.Resources.getResource;
30 import static java.nio.charset.StandardCharsets.UTF_8;
31 import static java.util.Arrays.stream;
32 import static java.util.Collections.emptyMap;
33 import static java.util.function.Function.identity;
34 import static java.util.stream.Collectors.joining;
35 import static java.util.stream.Collectors.toMap;
36
37 import java.io.FileOutputStream;
38 import java.io.IOException;
39 import java.io.InputStream;
40 import java.io.OutputStream;
41 import java.nio.charset.Charset;
42 import java.nio.file.Path;
43 import java.nio.file.Paths;
44 import java.util.HashMap;
45 import java.util.Map;
46 import java.util.Objects;
47 import java.util.SortedSet;
48 import java.util.TreeSet;
49 import java.util.function.BiConsumer;
50 import java.util.function.Consumer;
51 import java.util.stream.Stream;
52 import java.util.zip.GZIPOutputStream;
53
54 import javax.annotation.WillNotClose;
55 import javax.script.Invocable;
56 import javax.script.ScriptException;
57
58 import org.slf4j.Logger;
59 import org.slf4j.LoggerFactory;
60
61
62 public class DefaultIndexer implements Indexer {
63
64 private static final Logger LOG = LoggerFactory.getLogger(DefaultIndexer.class);
65
66 public static final String BASEDIR_WEBJARS_RESOURCES = "META-INF/resources/webjars/";
67 public static final String VERSION_LUNR_JS = "2.1.0";
68 public static final String LUNR = BASEDIR_WEBJARS_RESOURCES + "lunr.js/" + VERSION_LUNR_JS + "/lunr.js";
69
70 public static final String OUTPUT_FILE = "outputFile";
71 public static final String FIELDS = "fields";
72 public static final String MAP_TITLES = "mapTitles";
73 public static final String GENERATE_AUTOCOMPLETE = "generateAutocomplete";
74 public static final String OUTPUT_CHARSET = "outputCharset";
75
76 private Path outputFile;
77
78 @SuppressWarnings("squid:S1845")
79 private String[] fields;
80 private String refUrl;
81 private Charset outputCharset;
82
83 private Map<String, String> titlesMap = new HashMap<>();
84 private SortedSet<String> autocomplete = new TreeSet<>();
85
86 private BiConsumer<String, String> titleMapper = titlesMap::put;
87 private Consumer<String> autocompleter = autocomplete::add;
88
89 private Invocable engine;
90 private Object javascriptIndexBuilder;
91
92
93 @Override
94 public void configure(Map<String, String> configuration)
95 {
96 outputCharset = getOrDefault(configuration, OUTPUT_CHARSET, UTF_8);
97 refUrl = mandatory(configuration, Document.REF_URL);
98 outputFile = Paths.get(mandatory(configuration, OUTPUT_FILE));
99 fields = mandatory(configuration, FIELDS).split(",");
100
101 if(!getOrDefault(configuration, MAP_TITLES, true)) {
102 titleMapper = (u, t) -> { };
103 }
104
105 if(!getOrDefault(configuration, GENERATE_AUTOCOMPLETE, true)) {
106 autocompleter = d -> { };
107 }
108
109 initialize();
110 }
111
112
113 private void initialize()
114 {
115 engine = createSearchEngine(SCRIPT_INDEX);
116
117 Map<String, Map<Object, Object>> fieldConfigurations = stream(fields).collect(toMap(identity(), v -> emptyMap()));
118 javascriptIndexBuilder = invokeFunction(engine, "createIndexBuilder", refUrl, fieldConfigurations);
119 }
120
121
122 static Invocable createSearchEngine(String script)
123 {
124 InputStream lunr = getResource(LUNR);
125 InputStream indexScript = getResource(script);
126 Objects.requireNonNull(lunr, "Could not load lunrjs lib");
127 Objects.requireNonNull(indexScript, "Could not load indexScript");
128 return createJavascriptEngine(lunr, indexScript);
129 }
130
131
132 @Override
133 public synchronized Indexer add(Stream<Document> documents)
134 {
135 invokeFunction(engine, "addDocuments", javascriptIndexBuilder, documents
136 .filter(Document::hasTokens)
137 .peek(d -> titleMapper.accept(d.refUrl(), d.title()))
138 .peek(d -> d.tokens().forEach(autocompleter::accept))
139 .map(Document::asMap)
140 .iterator());
141 return this;
142 }
143
144
145 @Override
146 public void writeJson()
147 {
148 outputFile.getParent().toFile().mkdirs();
149
150 if(isGzip()) {
151 FileOutputStream fos = apply(FileOutputStream::new, outputFile.toFile());
152 closeAfterAccepting(GZIPOutputStream::new, fos, this::writeJson);
153 } else {
154 closeAfterAccepting(FileOutputStream::new, outputFile.toFile(), this::writeJson);
155 }
156 }
157
158
159 private boolean isGzip()
160 {
161 return outputFile.getFileName().toString().endsWith(".gz");
162 }
163
164
165 protected void writeJson(@WillNotClose OutputStream output) throws IOException
166 {
167 writeIndex(output);
168 writeAutocomplete(output);
169 writeTitleMap(output);
170 }
171
172
173 private void writeIndex(OutputStream output) throws IOException
174 {
175 output.write(bytes("{\n\n\"index\": "));
176 String indexJson = serializeIndex();
177 byte[] bytes = bytes(indexJson);
178 output.write(bytes);
179 String id = id();
180 LOG.debug("{} wrote {} bytes for index to {}", id, bytes.length, outputFile);
181 }
182
183
184 public byte[] bytes(String text)
185 {
186 return text.getBytes(outputCharset);
187 }
188
189
190 public String serializeIndex()
191 {
192 return (String) invokeFunction(engine, "buildSerializedIndex", javascriptIndexBuilder);
193 }
194
195
196 private void writeAutocomplete(OutputStream output) throws IOException
197 {
198 byte[] bytes = bytes(autocomplete.stream().collect(joining("\", \"", ",\n\n\"autocomplete\": [\"", "\"]")));
199 output.write(bytes);
200 String id = id();
201 LOG.debug("{} wrote {} bytes for {} words for autocomplete to {}", id, bytes.length, autocomplete.size(), outputFile);
202 }
203
204
205 private void writeTitleMap(OutputStream output) throws IOException
206 {
207 byte[] bytes = bytes(titlesMap.entrySet().stream().map(
208 e -> new StringBuilder()
209 .append('"').append(e.getKey()).append('"')
210 .append(':')
211 .append('"').append(e.getValue()).append('"'))
212 .collect(joining(", ", ",\n\n\"titleMap\": {", "}\n}")));
213 output.write(bytes);
214 String id = id();
215 LOG.debug("{} wrote {} bytes for {} entries for title map to {}", id, bytes.length, titlesMap.size(), outputFile);
216 }
217
218
219 public static String search(String indexJson, String query) throws ScriptException, NoSuchMethodException
220 {
221 Invocable engine = createSearchEngine(SCRIPT_SEARCH);
222 return engine.invokeFunction("jsonSearchIndex", indexJson, query).toString();
223 }
224 }