Sitemaps.java
/*-
* #%L
* io.earcam.utilitarian.site.sitemap
* %%
* Copyright (C) 2017 earcam
* %%
* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)
*
* You <b>must</b> choose to accept, in full - any individual or combination of
* the following licenses:
* <ul>
* <li><a href="https://opensource.org/licenses/BSD-3-Clause">BSD-3-Clause</a></li>
* <li><a href="https://www.eclipse.org/legal/epl-v10.html">EPL-1.0</a></li>
* <li><a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0</a></li>
* <li><a href="https://opensource.org/licenses/MIT">MIT</a></li>
* </ul>
* #L%
*/
package io.earcam.utilitarian.site.sitemap;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.AbstractMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.function.Consumer;
import java.util.stream.Stream;
import javax.annotation.WillNotClose;
import io.earcam.unexceptional.Closing;
import io.earcam.unexceptional.EmeticStream;
import io.earcam.unexceptional.Exceptional;
/**
* Static entry point for <a href="https://sitemaps.org">Sitemaps</a>
*
*/
public final class Sitemaps {
private static final String GENERATED_FILE_SITEMAPS = ".io.earcam.utilitarian.site.sitemap.list";
private static final String GENERATED_FILE_SITEMAP_PARAMETERS = ".io.earcam.utilitarian.site.sitemap.parameters.ser";
private static final String GENERATED_FILE_SITEMAP_INDICES = ".io.earcam.utilitarian.site.sitemap.index.list";
private static final byte[] NL = bytes(System.lineSeparator());
private Sitemaps()
{}
/**
* Based on the parameters, generates an arbitrary number of sitemap.xml files
*
* @param parameters
* @return path to a text file cache containing the filenames of generated sitemaps
* @throws IOException if unable to write the files or cache
*/
public static Path create(SitemapParameters parameters)
{
File file = parameters.targetDir.resolve(GENERATED_FILE_SITEMAP_PARAMETERS).toFile();
Closing.closeAfterAccepting(FileOutputStream::new, file, parameters::serialize);
Path generated = parameters.targetDir.resolve(GENERATED_FILE_SITEMAPS);
Closing.closeAfterAccepting(FileOutputStream::new, generated.toFile(), parameters, Sitemaps::doCreate);
return generated;
}
private static void doCreate(FileOutputStream output, SitemapParameters parameters) throws IOException
{
Consumer<Path> generatedFileRecorder = generatedFileRecorder(output);
Sitemap sitemap = new Sitemap(parameters, generatedFileRecorder);
sitemap.run();
}
private static Consumer<Path> generatedFileRecorder(OutputStream output)
{
return Exceptional.uncheckConsumer(p -> {
output.write(bytes(p.toAbsolutePath()));
output.write(NL);
});
}
private static byte[] bytes(Object instance)
{
return instance.toString().getBytes(UTF_8);
}
/**
* Creates sitemap-index.xml file(s) for all sitemap.xml files listed in any cache
* file ( {@value #GENERATED_FILE_SITEMAPS} ) in the {@code targetDirs}
*
* @param targetDir where to write the index/indices
* @param targetDirs where to look for sitemap listing cache files
* @return a path to a cache file ({@value #GENERATED_FILE_SITEMAP_INDICES}) listing the created index/indices
* @throws IOException if unable to write the files or read the caches
*/
public static Path index(Path targetDir, Stream<Path> targetDirs)
{
SitemapParameters parameters = new SitemapParameters();
File file = targetDir.resolve(GENERATED_FILE_SITEMAP_PARAMETERS).toFile();
Closing.closeAfterAccepting(FileInputStream::new, file, parameters::deserialize);
return index(parameters, targetDirs);
}
public static Path index(SitemapParameters parameters, Stream<Path> targetDirs)
{
Path generated = parameters.targetDir.resolve(GENERATED_FILE_SITEMAP_INDICES);
Closing.closeAfterAccepting(FileOutputStream::new, generated.toFile(), o -> doIndex(parameters, targetDirs, o));
return generated;
}
private static void doIndex(SitemapParameters parameters, Stream<Path> targetDirs, OutputStream output) throws IOException
{
Consumer<Path> generatedFileRecorder = generatedFileRecorder(output);
SitemapIndex index = new SitemapIndex(parameters, generatedFileRecorder);
Stream<Entry<SitemapParameters, Stream<String>>> map = targetDirs
.map(d -> new AbstractMap.SimpleEntry<SitemapParameters, Stream<String>>(
Exceptional.apply(Sitemaps::hydrateSitemapParameters, d),
Exceptional.apply(Files::lines, d.resolve(GENERATED_FILE_SITEMAPS))));
index.accept(map);
}
private static SitemapParameters hydrateSitemapParameters(Path targetDir) throws IOException
{
FileInputStream input = new FileInputStream(targetDir.resolve(GENERATED_FILE_SITEMAP_PARAMETERS).toFile());
return new SitemapParameters().deserialize(input);
}
/**
* Submits sitemap indices to a given list of search engines
*
* @param targetDir
* @param baseUrl
* @param hosts
* @return
*/
public static String submit(Path targetDir, URI baseUrl, List<String> hosts)
{
return SitemapSubmission.submit(hosts, baseUrl, targetDir, sitemapsIndicesIn(targetDir));
}
private static Stream<Path> sitemapsIndicesIn(Path targetDir)
{
Path list = targetDir.resolve(GENERATED_FILE_SITEMAP_INDICES);
return Exceptional.apply(Files::lines, list)
.map(Paths::get);
}
/**
* <p>
* Generates a {@code robots.txt} file in the {@code targetDir}, based on
* the sitemap indices cache list there.
* </p>
*
* <p>
* If no sitemap indices exist then a permissive robots.txt will still be generated.
* </p>
*
* @param targetDir the directory to read the cache list from, and write the robots.txt file to
* @throws UncheckedIOException if unable to write the file or read the cache
*/
public static void robotsTxt(Path targetDir)
{
File robotsTxt = targetDir.resolve("robots.txt").toFile();
Closing.closeAfterAccepting(FileOutputStream::new, robotsTxt, targetDir, Sitemaps::writeRobotsTxt);
}
/**
* Generates a {@code robots.txt} file, linking to sitemap indices for cache file in {@code targetDir}
*
* @param output the stream for {@code robots.txt} content
* @param targetDir where to load the cache file ({@value #GENERATED_FILE_SITEMAP_INDICES}) from
* @throws IOException if unable to write the file or read the cache
*/
private static void writeRobotsTxt(@WillNotClose OutputStream output, Path targetDir) throws IOException
{
output.write(bytes(
"User-agent: *\n" +
"Allow: /\n" +
"Disallow:\n" +
"\n"));
EmeticStream.emesis(sitemapsIndicesIn(targetDir)).forEach(s -> {
output.write(bytes("sitemap: ./"));
output.write(bytes(s.getFileName().toString()));
});
}
}