HtmlContentProcessor.java

1
/*-
2
 * #%L
3
 * io.earcam.utilitarian.site.search.offline
4
 * %%
5
 * Copyright (C) 2017 earcam
6
 * %%
7
 * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
8
 * 
9
 * You <b>must</b> choose to accept, in full - any individual or combination of 
10
 * the following licenses:
11
 * <ul>
12
 * 	<li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
13
 * 	<li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
14
 * 	<li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
15
 * 	<li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
16
 * </ul>
17
 * #L%
18
 */
19
package io.earcam.utilitarian.site.search.offline;
20
21
import static java.nio.charset.StandardCharsets.UTF_8;
22
23
import java.io.FileInputStream;
24
import java.io.IOException;
25
26
import org.jsoup.Jsoup;
27
import org.slf4j.Logger;
28
import org.slf4j.LoggerFactory;
29
30
public class HtmlContentProcessor extends AbstractHtmlProcessor {
31
32
	private static final Logger LOG = LoggerFactory.getLogger(HtmlContentProcessor.class);
33
34
35
	@Override
36
	public void process(Document document)
37
	{
38 2 1. process : negated conditional → SURVIVED
2. process : negated conditional → SURVIVED
		if(isHtml(document) && !document.hasRaw()) {
39
			org.jsoup.nodes.Document html;
40
			try {
41
				html = Jsoup.parse(new FileInputStream(document.file().toFile()), UTF_8.toString(), "");
42 1 1. process : removed call to io/earcam/utilitarian/site/search/offline/HtmlContentProcessor::assignFields → SURVIVED
				assignFields(document, html);
43
			} catch(IOException e) {
44
				LOG.warn("Failed to process HTML {} due to: {}", document.file(), e.getMessage());
45
				LOG.debug("Failed to process HTML", e);
46
			}
47
		}
48
	}
49
50
51
	private void assignFields(Document document, org.jsoup.nodes.Document html)
52
	{
53 1 1. assignFields : removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED
		document.field(Document.TITLE, html.getElementsByTag("title").text());
54 1 1. assignFields : removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED
		document.field(Document.DESCRIPTION, html.getElementsByTag("meta").select("[name=description]").attr("content"));
55
56 1 1. assignFields : removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED
		document.field(Document.RAW_TEXT, html.getElementsByTag("h1").text() + ' ' +
57
				html.getElementsByTag("h2").text() + ' ' +
58
				html.getElementsByTag("h3").text() + ' ' +
59
				html.getElementsByTag("h4").text() + ' ' +
60
				html.getElementsByTag("h5").text() + ' ' +
61
				html.getElementsByTag("h6").text() + ' ' +
62
				html.getElementsByTag("p").text());
63
	}
64
}

Mutations

38

1.1
Location : process
Killed by : none
negated conditional → SURVIVED

2.2
Location : process
Killed by : none
negated conditional → SURVIVED

42

1.1
Location : process
Killed by : none
removed call to io/earcam/utilitarian/site/search/offline/HtmlContentProcessor::assignFields → SURVIVED

53

1.1
Location : assignFields
Killed by : none
removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED

54

1.1
Location : assignFields
Killed by : none
removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED

56

1.1
Location : assignFields
Killed by : none
removed call to io/earcam/utilitarian/site/search/offline/Document::field → SURVIVED

Active mutators

Tests examined


Report generated by PIT 1.4.3