1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| /** * // This is the HtmlParser's API interface. * // You should not implement it, or speculate about its implementation * interface HtmlParser { * public List<String> getUrls(String url) {} * } */
class Solution { public List<String> crawl(String startUrl, HtmlParser htmlParser) { String hostname = getHostname(startUrl); Set<String> visited = ConcurrentHashMap.newKeySet(); visited.add(startUrl); return crawl(startUrl, htmlParser, hostname, visited).collect(Collectors.toList()); } private String getHostname(String url) { int index = url.indexOf('/', 7); return (index != -1) ? url.substring(0, index) : url; } private Stream<String> crawl(String startUrl, HtmlParser htmlParser, String hostname, Set<String> visited) { Stream<String> stream = htmlParser.getUrls(startUrl) .parallelStream() .filter(url -> isSameHostname(url, hostname)) .filter(url -> visited.add(url)) .flatMap(url -> crawl(url, htmlParser, hostname, visited)); return Stream.concat(Stream.of(startUrl), stream); } private boolean isSameHostname(String url, String hostname) { if (!url.startsWith(hostname)) return false; return url.length() == hostname.length() || url.charAt(hostname.length()) == '/'; } }
|