View Javadoc
1   package de.l3s.icrawl.api;
2   
3   import java.io.Closeable;
4   import java.util.Collection;
5   
6   import org.apache.hadoop.conf.Configuration;
7   
8   import de.l3s.icrawl.domain.api.ApiFetcherException;
9   import de.l3s.icrawl.domain.api.ApiRequest;
10  
11  /**
12   * Module to monitor streams of documents from an API.
13   *
14   * An API fetcher is responsible for retrieving data from a (social media)
15   * platform API. It is given one or more URIs that define the data to be
16   * retrieved. Each API fetcher is started as a singleton, it is responsible for
17   * maintaining quota and politeness restrictions itself.
18   *
19   * The module needs to implement a Listener pattern, where other modules can
20   * subscribe to updates from a URI by passing in a {@link ApiFetcherContext}.
21   */
22  public interface ApiFetcher extends Closeable {
23      // TODO boolean supportsUri(...)
24      /**
25       * Current runtime status of the fetcher.
26       */
27      enum Status {
28          /**
29           * The fetcher is waiting to be initialized
30           */
31          INIT, RUNNING, FAILED, STOPPED, KILLED
32      }
33  
34      /**
35       * Hook for injecting the system configuration.
36       *
37       * This method is called immediately after the fetcher is instantiated.
38       * After this method returns the fetcher MUST be ready to handle fetch
39       * requests.
40       *
41       * @param conf
42       *            the system configuration
43       * @throws IllegalArgumentException
44       *             if required configuration parameters are missing
45       */
46      void setConfiguration(Configuration conf);
47  
48      /**
49       * Return the URI prefix that this fetcher can handle.
50       *
51       * @return a string ending in ":" or "://", e.g. <code>twitter:</code> or
52       *         <code>atom:</code>. May not return null or an empty string.
53       */
54      String getUriScheme();
55  
56      /**
57       * Start following updates for the given URI
58       *
59       * The processing of the request SHOULD happen asynchronously. Retrieved
60       * documents should be passed back using the methods on <code>context</code>
61       *
62       * @param requests
63       *            the requests to monitor
64       * @param context
65       *            used to report updates
66       * @throws IllegalArgumentException
67       *             if the URI is not a valid fetch request for this parser
68       * @throws IllegalStateException
69       *             if the fetcher has been {@link #stop() stopped},
70       *             {@link #kill() killed} or if there is a permanent problem
71       *             with the API
72       * @throws ApiFetcherException
73       */
74      void addRequests(Collection<ApiRequest> requests, ApiFetcherContext apiFetcherJob);
75  
76      /**
77       * Stop following updates for the given URI
78       *
79       * @param uri
80       *            the previously monitored requests
81       * @param context
82       *            the context to unsubscribe
83       */
84      void removeRequests(Collection<ApiRequest> requests, ApiFetcherContext context);
85  
86      /**
87       * Stop monitoring all URIs for the given context.
88       */
89      void stop(ApiFetcherContext context);
90  
91      /**
92       * Request that the fetcher should finish retrieving data in the near
93       * future.
94       *
95       * The fetcher should shut itself down afterwards. Any further method calls
96       * SHOULD cause an IllegalStateException.
97       */
98      void stop();
99  
100     /**
101      * Stop the fetcher immediately, interrupting any ongoing transfers.
102      *
103      * The fetcher should shut itself down afterwards. Any further method calls
104      * SHOULD cause an IllegalStateException.
105      */
106     void kill();
107 
108     /**
109      * Get the current status of the fetcher.
110      *
111      * @return the status code
112      */
113     Status getStatus();
114 
115 }