1 | /*- | |
2 | * #%L | |
3 | * io.earcam.utilitarian.site.search.offline | |
4 | * %% | |
5 | * Copyright (C) 2017 earcam | |
6 | * %% | |
7 | * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT) | |
8 | * | |
9 | * You <b>must</b> choose to accept, in full - any individual or combination of | |
10 | * the following licenses: | |
11 | * <ul> | |
12 | * <li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li> | |
13 | * <li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li> | |
14 | * <li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li> | |
15 | * <li><a href="https://opensource.org/licenses/MIT">MIT</a></li> | |
16 | * </ul> | |
17 | * #L% | |
18 | */ | |
19 | package io.earcam.utilitarian.site.search.offline; | |
20 | ||
21 | import static java.util.Collections.emptyList; | |
22 | ||
23 | import java.io.IOException; | |
24 | import java.io.StringReader; | |
25 | import java.io.UncheckedIOException; | |
26 | import java.util.ArrayList; | |
27 | import java.util.List; | |
28 | ||
29 | import org.apache.lucene.analysis.Analyzer; | |
30 | import org.apache.lucene.analysis.TokenStream; | |
31 | import org.apache.lucene.analysis.core.SimpleAnalyzer; | |
32 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | |
33 | import org.slf4j.Logger; | |
34 | import org.slf4j.LoggerFactory; | |
35 | ||
36 | import io.earcam.unexceptional.Closing; | |
37 | ||
38 | public class SimpleTokenizer implements Processor { | |
39 | ||
40 | private static final Logger LOG = LoggerFactory.getLogger(SimpleTokenizer.class); | |
41 | ||
42 | ||
43 | public List<String> tokenize(String input) | |
44 | { | |
45 | try { | |
46 |
1
1. tokenize : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return Closing.closeAfterApplying(createAnalyzer(), input, this::tokens); |
47 | } catch(UncheckedIOException e) { | |
48 | LOG.warn("Failed to tokenize '{}', due to {}", input, e.getMessage()); | |
49 | } | |
50 |
1
1. tokenize : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokenize to ( if (x != null) null else throw new RuntimeException ) → NO_COVERAGE |
return emptyList(); |
51 | } | |
52 | ||
53 | ||
54 | private List<String> tokens(Analyzer analyzer, String input) | |
55 | { | |
56 |
1
1. tokens : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::tokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return Closing.closeAfterApplying(analyzer.tokenStream(null, new StringReader(input)), this::streamTokens); |
57 | } | |
58 | ||
59 | ||
60 | private List<String> streamTokens(TokenStream stream) throws IOException | |
61 | { | |
62 |
1
1. streamTokens : removed call to org/apache/lucene/analysis/TokenStream::reset → KILLED |
stream.reset(); |
63 | List<String> tokens = new ArrayList<>(); | |
64 |
1
1. streamTokens : negated conditional → KILLED |
while(stream.incrementToken()) { |
65 | tokens.add(stream.getAttribute(CharTermAttribute.class).toString()); | |
66 | } | |
67 |
1
1. streamTokens : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::streamTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return tokens; |
68 | } | |
69 | ||
70 | ||
71 | /** | |
72 | * <p> | |
73 | * Override this method to return a custom {@link Analyzer}. | |
74 | * </p> | |
75 | * <p> | |
76 | * Note; Use of Lucene for stemming, stopword filtering, etc must match | |
77 | * whatever is configured for lunrjs. | |
78 | * </p> | |
79 | * | |
80 | * @return an {@link Analyzer} for tokenizing | |
81 | */ | |
82 | protected Analyzer createAnalyzer() | |
83 | { | |
84 |
1
1. createAnalyzer : mutated return of Object value for io/earcam/utilitarian/site/search/offline/SimpleTokenizer::createAnalyzer to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return new SimpleAnalyzer(); |
85 | } | |
86 | ||
87 | ||
88 | @Override | |
89 | public void process(Document document) | |
90 | { | |
91 |
2
1. process : negated conditional → SURVIVED 2. process : negated conditional → SURVIVED |
if(document.hasRaw() && !document.hasTokens()) { |
92 | List<String> tokenized = tokenize(document.raw()); | |
93 | document.tokens().addAll(tokenized); | |
94 | } | |
95 | } | |
96 | } | |
Mutations | ||
46 |
1.1 |
|
50 |
1.1 |
|
56 |
1.1 |
|
62 |
1.1 |
|
64 |
1.1 |
|
67 |
1.1 |
|
84 |
1.1 |
|
91 |
1.1 2.2 |