SimpleTokenizer.java

1
/*-
2
 * #%L
3
 * io.earcam.utilitarian.site.search.offline
4
 * %%
5
 * Copyright (C) 2017 earcam
6
 * %%
7
 * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
8
 *
9
 * You <b>must</b> choose to accept, in full - any individual or combination of
10
 * the following licenses:
11
 * <ul>
12
 * 	<li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
13
 * 	<li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
14
 * 	<li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
15
 * 	<li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
16
 * </ul>
17
 * #L%
18
 */
19
package io.earcam.utilitarian.site.search.offline;
20
21
import static java.util.Collections.emptyList;
22
23
import java.io.IOException;
24
import java.io.StringReader;
25
import java.io.UncheckedIOException;
26
import java.util.ArrayList;
27
import java.util.List;
28
29
import org.apache.lucene.analysis.Analyzer;
30
import org.apache.lucene.analysis.TokenStream;
31
import org.apache.lucene.analysis.core.SimpleAnalyzer;
32
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
33
import org.slf4j.Logger;
34
import org.slf4j.LoggerFactory;
35
36
import io.earcam.unexceptional.Closing;
37
38
public class SimpleTokenizer implements Processor {
39
40
	private static final Logger LOG = LoggerFactory.getLogger(SimpleTokenizer.class);
41
42
43
	public List<String> tokenize(String input)
44
	{
45
		try {
46 1 1. tokenize : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → KILLED
			return Closing.closeAfterApplying(createAnalyzer(), input, this::tokens);
47
		} catch(UncheckedIOException e) {
48
			LOG.warn("Failed to tokenize '{}', due to {}", input, e.getMessage());
49
		}
50 1 1. tokenize : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE
		return emptyList();
51
	}
52
53
54
	private List<String> tokens(Analyzer analyzer, String input)
55
	{
56 1 1. tokens : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokens to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return Closing.closeAfterApplying(analyzer.tokenStream(null, new StringReader(input)), this::streamTokens);
57
	}
58
59
60
	private List<String> streamTokens(TokenStream stream) throws IOException
61
	{
62 1 1. streamTokens : removed call to org/apache/lucene/analysis/TokenStream::reset → KILLED
		stream.reset();
63
		List<String> tokens = new ArrayList<>();
64 1 1. streamTokens : negated conditional → KILLED
		while(stream.incrementToken()) {
65
			tokens.add(stream.getAttribute(CharTermAttribute.class).toString());
66
		}
67 1 1. streamTokens : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::streamTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return tokens;
68
	}
69
70
71
	/**
72
	 * <p>
73
	 * Override this method to return a custom {@link Analyzer}.
74
	 * </p>
75
	 * <p>
76
	 * Note; Use of Lucene for stemming, stopword filtering, etc must match
77
	 * whatever is configured for lunrjs.
78
	 * </p>
79
	 *
80
	 * @return an {@link Analyzer} for tokenizing
81
	 */
82
	protected Analyzer createAnalyzer()
83
	{
84 1 1. createAnalyzer : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::createAnalyzer to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return new SimpleAnalyzer();
85
	}
86
87
88
	@Override
89
	public void process(Document document)
90
	{
91 2 1. process : negated conditional → SURVIVED
2. process : negated conditional → SURVIVED
		if(document.hasRaw() && !document.hasTokens()) {
92
			List<String> tokenized = tokenize(document.raw());
93
			document.tokens().addAll(tokenized);
94
		}
95
	}
96
}

Mutations

46

1.1
Location : tokenize
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → KILLED

50

1.1
Location : tokenize
Killed by : none
mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE

56

1.1
Location : tokens
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokens to ( if (x != null) null else throw new RuntimeException ) → KILLED

62

1.1
Location : streamTokens
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
removed call to org/apache/lucene/analysis/TokenStream::reset → KILLED

64

1.1
Location : streamTokens
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
negated conditional → KILLED

67

1.1
Location : streamTokens
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::streamTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED

84

1.1
Location : createAnalyzer
Killed by : io.earcam.utilitarian.site.search.offline.SimpleTokenizerTest.simple()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::createAnalyzer to ( if (x != null) null else throw new RuntimeException ) → KILLED

91

1.1
Location : process
Killed by : none
negated conditional → SURVIVED

2.2
Location : process
Killed by : none
negated conditional → SURVIVED

Active mutators

Tests examined


Report generated by PIT 1.4.3