View Javadoc
1   package de.l3s.icrawl.api;
2   
3   import java.util.Map;
4   
5   import javax.annotation.Nullable;
6   
7   import de.l3s.icrawl.domain.api.ApiFetcherDocument;
8   import de.l3s.icrawl.domain.status.ApiStatus;
9   
10  /**
11   * Callback interface for API fetchers.
12   *
13   * An API fetcher receives an instance of this interface together with each
14   * source it should fetch and needs to return results through its methods. The
15   * crawler can give several {@link ApiFetcherContext}s for the same source, in
16   * that case the API fetcher should store all contexts and report results for
17   * each of them (subscriber pattern).
18   */
19  public interface ApiFetcherContext {
20      /** Report a fetched document. */
21      void writeDocument(ApiFetcherDocument doc);
22  
23      /**
24       * Report a new link that should be crawled.
25       *
26       * @param uri
27       *            the URI to crawl
28       *
29       * @param metadata
30       *            additional metadata for the link
31       */
32      void writeOutlink(String uri, Map<String, String> metadata);
33  
34      /**
35       * Report a resolved redirect, e.g. from an URL shortener service.
36       *
37       * The crawler MAY skip downloading the <tt>fromUri</tt> to save bandwidth.
38       *
39       * @param fromUri
40       *            the original URI
41       * @param toUri
42       *            the URI that fromUri redirects to
43       * @param metadata
44       *            additional metadata for the source
45       */
46      void writeRedirect(String fromUri, String toUri, Map<String, String> metadata);
47  
48      /**
49       * Report an error while fetching from the API.
50       *
51       * The ApiFetcher should do its best to discover the cause of the error and
52       * find a reasonable strategy for its future behavior.
53       *
54       * @param type
55       *            the error class
56       * @param behavior
57       *            how the ApiFetcher will continue for this URI
58       * @param message
59       *            a human-readable description of the error and/or the future
60       *            behavior (optional)
61       * @param e
62       *            the actual exception, e.g. in case of parser errors (optional)
63       */
64      void reportError(ApiStatus.ErrorType type, ApiStatus.ErrorBehavior behavior, @Nullable String message,
65              @Nullable Exception e);
66  
67      @Override
68      boolean equals(Object obj);
69  
70      @Override
71      int hashCode();
72  
73  }