Crawler.java

1
/*-
2
 * #%L
3
 * io.earcam.utilitarian.site.search.offline
4
 * %%
5
 * Copyright (C) 2017 earcam
6
 * %%
7
 * SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
8
 * 
9
 * You <b>must</b> choose to accept, in full - any individual or combination of 
10
 * the following licenses:
11
 * <ul>
12
 * 	<li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
13
 * 	<li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
14
 * 	<li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
15
 * 	<li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
16
 * </ul>
17
 * #L%
18
 */
19
package io.earcam.utilitarian.site.search.offline;
20
21
import java.net.URI;
22
import java.nio.file.Files;
23
import java.nio.file.Path;
24
import java.util.Map;
25
import java.util.function.Predicate;
26
import java.util.stream.Stream;
27
28
import io.earcam.unexceptional.EmeticStream;
29
30
// @formatter:off
31
/**
32
 * Pipeline needs to be built according to definition, with names driven via SPI, e.g.
33
 *
34
 * configuration is just a Map<String, String> passed to each
35
 *
36
 * <pre>
37
 * <pipeline>
38
 *    <filter>
39
 *    	 <id>default-regex</id>
40
 *    	 <configuration>
41
 *          <include>regex</include>
42
 *          <exclude>regex</exclude>
43
 *       </configuration>
44
 *    </filter>
45
 *    <processor>
46
 *       <id>default-html</id>
47
 *    </processor>
48
 *    <processor>
49
 *       <id>default-pdf</id>
50
 *    </processor>
51
 *    <!-- ... filter based on content can go here ... -->
52
 *    <processor>
53
 *       <id>default-tokenizer</id>
54
 *    </processor>
55
 * </pipeline>
56
 * </pre>
57
 *
58
 * Therefore Filter and Processor both need to extend 'Component'
59
 *
60
 * Component{ String id; void configure(Map<String, String>) }
61
 *
62
 * Filter imps Predicate<Document>
63
 *
64
 * Processor{ process(Document); }
65
 *
66
 *
67
 * HtmlContentParser{ }
68
 *
69
 *
70
 */
71
// @formatter:on
72
public class Crawler {
73
74
	private Stream<Document> documents;
75
76
77
	public static Crawler crawler(Map<Path, URI> directories)
78
	{
79
		Crawler crawler = new Crawler();
80
		crawler.documents = crawl(directories);
81 1 1. crawler : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawler to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return crawler;
82
	}
83
84
85
	private static Stream<Document> crawl(Map<Path, URI> directories)
86
	{
87 1 1. crawl : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return directories.entrySet().parallelStream().flatMap(Crawler::crawl);
88
	}
89
90
91
	private static Stream<Document> crawl(Map.Entry<Path, URI> pair)
92
	{
93 1 1. crawl : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → SURVIVED
		return crawl(pair.getKey(), pair.getValue());
94
	}
95
96
97
	private static Stream<Document> crawl(Path baseDir, URI baseUri)
98
	{
99 2 1. crawl : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → SURVIVED
2. lambda$crawl$0 : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::lambda$crawl$0 to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return EmeticStream.emesis(Files::walk, baseDir)
100
				.mapToStream()
101 1 1. lambda$crawl$1 : replaced return of integer sized value with (x == 0 ? 1 : 0) → SURVIVED
				.filter(Files::isRegularFile)
102 1 1. lambda$crawl$2 : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::lambda$crawl$2 to ( if (x != null) null else throw new RuntimeException ) → KILLED
				.map(f -> Document.document(baseDir, baseUri, f));
103
	}
104
105
106
	public Crawler filter(Predicate<Document> filter)
107
	{
108
		documents = documents.filter(filter);
109 1 1. filter : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::filter to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return this;
110
	}
111
112
113
	public Crawler processor(Processor processor)
114
	{
115
		documents = documents.map(processor);
116 1 1. processor : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::processor to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return this;
117
	}
118
119
120
	public Stream<Document> documents()
121
	{
122 1 1. documents : mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::documents to ( if (x != null) null else throw new RuntimeException ) → KILLED
		return documents.filter(Document::hasTokens);
123
	}
124
}

Mutations

81

1.1
Location : crawler
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawler to ( if (x != null) null else throw new RuntimeException ) → KILLED

87

1.1
Location : crawl
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → KILLED

93

1.1
Location : crawl
Killed by : none
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → SURVIVED

99

1.1
Location : crawl
Killed by : none
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::crawl to ( if (x != null) null else throw new RuntimeException ) → SURVIVED

2.2
Location : lambda$crawl$0
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::lambda$crawl$0 to ( if (x != null) null else throw new RuntimeException ) → KILLED

101

1.1
Location : lambda$crawl$1
Killed by : none
replaced return of integer sized value with (x == 0 ? 1 : 0) → SURVIVED

102

1.1
Location : lambda$crawl$2
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::lambda$crawl$2 to ( if (x != null) null else throw new RuntimeException ) → KILLED

109

1.1
Location : filter
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::filter to ( if (x != null) null else throw new RuntimeException ) → KILLED

116

1.1
Location : processor
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::processor to ( if (x != null) null else throw new RuntimeException ) → KILLED

122

1.1
Location : documents
Killed by : io.earcam.utilitarian.site.search.offline.SearchTest.equivalence()
mutated return of Object value for io/earcam/utilitarian/site/search/offline/Crawler::documents to ( if (x != null) null else throw new RuntimeException ) → KILLED

Active mutators

Tests examined


Report generated by PIT 1.4.3