View Javadoc
1   /*-
2    * #%L
3    * io.earcam.utilitarian.site.search.offline
4    * %%
5    * Copyright (C) 2017 earcam
6    * %%
7    * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
8    *
9    * You <b>must</b> choose to accept, in full - any individual or combination of
10   * the following licenses:
11   * <ul>
12   * 	<li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
13   * 	<li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
14   * 	<li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
15   * 	<li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
16   * </ul>
17   * #L%
18   */
19  package io.earcam.utilitarian.site.search.offline;
20  
21  import static io.earcam.unexceptional.Closing.closeAfterAccepting;
22  import static io.earcam.unexceptional.Exceptional.apply;
23  import static io.earcam.utilitarian.site.search.offline.Component.getOrDefault;
24  import static io.earcam.utilitarian.site.search.offline.Component.mandatory;
25  import static io.earcam.utilitarian.site.search.offline.Javascript.createJavascriptEngine;
26  import static io.earcam.utilitarian.site.search.offline.Javascript.invokeFunction;
27  import static io.earcam.utilitarian.site.search.offline.Resources.SCRIPT_INDEX;
28  import static io.earcam.utilitarian.site.search.offline.Resources.SCRIPT_SEARCH;
29  import static io.earcam.utilitarian.site.search.offline.Resources.getResource;
30  import static java.nio.charset.StandardCharsets.UTF_8;
31  import static java.util.Arrays.stream;
32  import static java.util.Collections.emptyMap;
33  import static java.util.function.Function.identity;
34  import static java.util.stream.Collectors.joining;
35  import static java.util.stream.Collectors.toMap;
36  
37  import java.io.FileOutputStream;
38  import java.io.IOException;
39  import java.io.InputStream;
40  import java.io.OutputStream;
41  import java.nio.charset.Charset;
42  import java.nio.file.Path;
43  import java.nio.file.Paths;
44  import java.util.HashMap;
45  import java.util.Map;
46  import java.util.Objects;
47  import java.util.SortedSet;
48  import java.util.TreeSet;
49  import java.util.function.BiConsumer;
50  import java.util.function.Consumer;
51  import java.util.stream.Stream;
52  import java.util.zip.GZIPOutputStream;
53  
54  import javax.annotation.WillNotClose;
55  import javax.script.Invocable;
56  import javax.script.ScriptException;
57  
58  import org.slf4j.Logger;
59  import org.slf4j.LoggerFactory;
60  
61  //TODO test maven SOURCE filtering to replace VERSION_* constants ... but will IDE/Eclipse do the replacement before test exec?
62  public class DefaultIndexer implements Indexer {
63  
64  	private static final Logger LOG = LoggerFactory.getLogger(DefaultIndexer.class);
65  
66  	public static final String BASEDIR_WEBJARS_RESOURCES = "META-INF/resources/webjars/";
67  	public static final String VERSION_LUNR_JS = "2.1.0";
68  	public static final String LUNR = BASEDIR_WEBJARS_RESOURCES + "lunr.js/" + VERSION_LUNR_JS + "/lunr.js";
69  
70  	public static final String OUTPUT_FILE = "outputFile";
71  	public static final String FIELDS = "fields";
72  	public static final String MAP_TITLES = "mapTitles";
73  	public static final String GENERATE_AUTOCOMPLETE = "generateAutocomplete";
74  	public static final String OUTPUT_CHARSET = "outputCharset";
75  
76  	private Path outputFile;
77  
78  	@SuppressWarnings("squid:S1845")
79  	private String[] fields;
80  	private String refUrl;
81  	private Charset outputCharset;
82  
83  	private Map<String, String> titlesMap = new HashMap<>();
84  	private SortedSet<String> autocomplete = new TreeSet<>();
85  
86  	private BiConsumer<String, String> titleMapper = titlesMap::put;
87  	private Consumer<String> autocompleter = autocomplete::add;
88  
89  	private Invocable engine;
90  	private Object javascriptIndexBuilder;
91  
92  
93  	@Override
94  	public void configure(Map<String, String> configuration)
95  	{
96  		outputCharset = getOrDefault(configuration, OUTPUT_CHARSET, UTF_8);
97  		refUrl = mandatory(configuration, Document.REF_URL);
98  		outputFile = Paths.get(mandatory(configuration, OUTPUT_FILE));
99  		fields = mandatory(configuration, FIELDS).split(",");
100 
101 		if(!getOrDefault(configuration, MAP_TITLES, true)) {
102 			titleMapper = (u, t) -> { /* noop */ };
103 		}
104 
105 		if(!getOrDefault(configuration, GENERATE_AUTOCOMPLETE, true)) {
106 			autocompleter = d -> { /* noop */ };
107 		}
108 
109 		initialize();
110 	}
111 
112 
113 	private void initialize()
114 	{
115 		engine = createSearchEngine(SCRIPT_INDEX);
116 
117 		Map<String, Map<Object, Object>> fieldConfigurations = stream(fields).collect(toMap(identity(), v -> emptyMap()));
118 		javascriptIndexBuilder = invokeFunction(engine, "createIndexBuilder", refUrl, fieldConfigurations);
119 	}
120 
121 
122 	static Invocable createSearchEngine(String script)
123 	{
124 		InputStream lunr = getResource(LUNR);
125 		InputStream indexScript = getResource(script);
126 		Objects.requireNonNull(lunr, "Could not load lunrjs lib");
127 		Objects.requireNonNull(indexScript, "Could not load indexScript");
128 		return createJavascriptEngine(lunr, indexScript);
129 	}
130 
131 
132 	@Override
133 	public synchronized Indexer add(Stream<Document> documents)
134 	{
135 		invokeFunction(engine, "addDocuments", javascriptIndexBuilder, documents
136 				.filter(Document::hasTokens)
137 				.peek(d -> titleMapper.accept(d.refUrl(), d.title()))
138 				.peek(d -> d.tokens().forEach(autocompleter::accept))
139 				.map(Document::asMap)
140 				.iterator());
141 		return this;
142 	}
143 
144 
145 	@Override
146 	public void writeJson()
147 	{
148 		outputFile.getParent().toFile().mkdirs();
149 
150 		if(isGzip()) {
151 			FileOutputStream fos = apply(FileOutputStream::new, outputFile.toFile());
152 			closeAfterAccepting(GZIPOutputStream::new, fos, this::writeJson);
153 		} else {
154 			closeAfterAccepting(FileOutputStream::new, outputFile.toFile(), this::writeJson);
155 		}
156 	}
157 
158 
159 	private boolean isGzip()
160 	{
161 		return outputFile.getFileName().toString().endsWith(".gz");
162 	}
163 
164 
165 	protected void writeJson(@WillNotClose OutputStream output) throws IOException
166 	{
167 		writeIndex(output);
168 		writeAutocomplete(output);
169 		writeTitleMap(output);
170 	}
171 
172 
173 	private void writeIndex(OutputStream output) throws IOException
174 	{
175 		output.write(bytes("{\n\n\"index\": "));
176 		String indexJson = serializeIndex();
177 		byte[] bytes = bytes(indexJson);
178 		output.write(bytes);
179 		String id = id();
180 		LOG.debug("{} wrote {} bytes for index to {}", id, bytes.length, outputFile);
181 	}
182 
183 
184 	public byte[] bytes(String text)
185 	{
186 		return text.getBytes(outputCharset);
187 	}
188 
189 
190 	public String serializeIndex()
191 	{
192 		return (String) invokeFunction(engine, "buildSerializedIndex", javascriptIndexBuilder);
193 	}
194 
195 
196 	private void writeAutocomplete(OutputStream output) throws IOException
197 	{
198 		byte[] bytes = bytes(autocomplete.stream().collect(joining("\", \"", ",\n\n\"autocomplete\": [\"", "\"]")));
199 		output.write(bytes);
200 		String id = id();
201 		LOG.debug("{} wrote {} bytes for {} words for autocomplete to {}", id, bytes.length, autocomplete.size(), outputFile);
202 	}
203 
204 
205 	private void writeTitleMap(OutputStream output) throws IOException
206 	{
207 		byte[] bytes = bytes(titlesMap.entrySet().stream().map(
208 				e -> new StringBuilder()
209 						.append('"').append(e.getKey()).append('"')
210 						.append(':')
211 						.append('"').append(e.getValue()).append('"'))
212 				.collect(joining(", ", ",\n\n\"titleMap\": {", "}\n}")));
213 		output.write(bytes);
214 		String id = id();
215 		LOG.debug("{} wrote {} bytes for {} entries for title map to {}", id, bytes.length, titlesMap.size(), outputFile);
216 	}
217 
218 
219 	public static String search(String indexJson, String query) throws ScriptException, NoSuchMethodException
220 	{
221 		Invocable engine = createSearchEngine(SCRIPT_SEARCH);
222 		return engine.invokeFunction("jsonSearchIndex", indexJson, query).toString();
223 	}
224 }