PdfContentProcessor.java

1
/*-
2
 * #%L
3
 * io.earcam.utilitarian.site.search.offline
4
 * %%
5
 * Copyright (C) 2017 earcam
6
 * %%
7
 * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
8
 *
9
 * You <b>must</b> choose to accept, in full - any individual or combination of
10
 * the following licenses:
11
 * <ul>
12
 * 	<li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
13
 * 	<li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
14
 * 	<li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
15
 * 	<li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
16
 * </ul>
17
 * #L%
18
 */
19
package io.earcam.utilitarian.site.search.offline;
20
21
import java.io.IOException;
22
import java.io.UncheckedIOException;
23
24
import org.apache.pdfbox.pdmodel.PDDocument;
25
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
26
import org.apache.pdfbox.text.PDFTextStripper;
27
import org.slf4j.Logger;
28
import org.slf4j.LoggerFactory;
29
30
import io.earcam.unexceptional.Closing;
31
32
public class PdfContentProcessor implements Processor {
33
34
	private static final Logger LOG = LoggerFactory.getLogger(PdfContentProcessor.class);
35
36
	static {
37
		System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
38
	}
39
40
41
	@Override
42
	public void process(Document document)
43
	{
44 2 1. process : negated conditional → KILLED
2. process : negated conditional → KILLED
		if(isPdf(document) && !document.hasRaw()) {
45
46
			try {
47 1 1. process : removed call to io/earcam/unexceptional/Closing::closeAfterAccepting → KILLED
				Closing.closeAfterAccepting(PDDocument::load, document.file().toFile(), document, this::consume);
48
			} catch(UncheckedIOException e) {
49
				LOG.warn("Failed to process PDF {} due to: {}", document.file(), e.getMessage());
50
				LOG.debug("Failed to process PDF", e.getCause());
51
			}
52
		}
53
	}
54
55
56
	private void consume(PDDocument pdf, Document document) throws IOException
57
	{
58
		PDDocumentInformation information = pdf.getDocumentInformation();
59 1 1. consume : removed call to io/earcam/utilitarian/site/search/offline/Document::field → KILLED
		document.field(Document.TITLE, information.getTitle());
60
61
		PDFTextStripper stripper = new PDFTextStripper();
62
		String text = stripper.getText(pdf);
63 1 1. consume : removed call to io/earcam/utilitarian/site/search/offline/Document::field → KILLED
		document.field(Document.RAW_TEXT, text);
64
	}
65
66
67
	private boolean isPdf(Document document)
68
	{
69 1 1. isPdf : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED
		return "application/pdf".equals(document.contentType());
70
	}
71
}

Mutations

44

1.1
Location : process
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
negated conditional → KILLED

2.2
Location : process
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
negated conditional → KILLED

47

1.1
Location : process
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
removed call to io/earcam/unexceptional/Closing::closeAfterAccepting → KILLED

59

1.1
Location : consume
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
removed call to io/earcam/utilitarian/site/search/offline/Document::field → KILLED

63

1.1
Location : consume
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
removed call to io/earcam/utilitarian/site/search/offline/Document::field → KILLED

69

1.1
Location : isPdf
Killed by : io.earcam.utilitarian.site.search.offline.PdfContentProcessorTest.processesContentAndMeta()
replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED

Active mutators

Tests examined


Report generated by PIT 1.4.3