View Javadoc
1   package de.l3s.icrawl.api;
2   
3   import java.nio.charset.StandardCharsets;
4   import java.util.Map;
5   
6   import org.apache.commons.httpclient.HttpStatus;
7   
8   import com.google.common.base.Preconditions;
9   
10  /**
11   * A document retrieved from a (social media) API.
12   *
13   * See {@link Builder} for instructions on how to create instances.
14   */
15  public class ApiFetcherDocument {
16  
17      private final String uri;
18      private final int statusCode;
19      private final String statusMessage;
20      private final Map<String, String> headers;
21      private final String contentType;
22      private final byte[] content;
23      private final long fetchTime;
24      private final Long modifiedTime;
25      private final String sourceUrl;
26  
27      ApiFetcherDocument(String uri, int statusCode, String statusMessage,
28              Map<String, String> headers,
29              String contentType, byte[] content, long fetchTime, Long modifiedTime,
30              String sourceUrl) {
31          this.uri = Preconditions.checkNotNull(uri);
32          this.statusCode = statusCode;
33          this.statusMessage = statusMessage;
34          this.headers = Preconditions.checkNotNull(headers);
35          this.contentType = Preconditions.checkNotNull(contentType);
36          this.content = Preconditions.checkNotNull(content);
37          this.fetchTime = fetchTime;
38          this.modifiedTime = modifiedTime;
39          this.sourceUrl = sourceUrl;
40      }
41  
42      public static Builder builder() {
43          return new Builder();
44      }
45  
46      public String getUri() {
47          return uri;
48      }
49  
50      public int getStatusCode() {
51          return statusCode;
52      }
53  
54      public String getStatusMessage() {
55          return statusMessage;
56      }
57  
58      public Map<String, String> getHeaders() {
59          return headers;
60      }
61  
62      public String getContentType() {
63          return contentType;
64      }
65  
66      public byte[] getContent() {
67          return content;
68      }
69  
70      public long getFetchTime() {
71          return fetchTime;
72      }
73  
74      public Long getModifiedTime() {
75          return modifiedTime;
76      }
77  
78      public String getSourceUrl() {
79          return sourceUrl;
80      }
81  
82      /**
83       * Construct a ApiFetcherDocument step by step.
84       *
85       * Example:
86       *
87       * <pre>
88       * ApiFetcherDocument doc = ApiFetcherDocument.builder()
89       *                            .setUrl(url)
90       *                            .setStatusCode(200)
91       *                            ...
92       *                            .build();
93       * </pre>
94       *
95       * When this document is retrieved as part of a collection (e.g. a search
96       * result), than the values for the collection document can be used except
97       * where noted otherwise.
98       */
99      public static class Builder {
100         private String url;
101         private int statusCode = -1;
102         private String statusMessage;
103         private Map<String, String> headers;
104         private String contentType;
105         private byte[] content;
106         private long fetchTime = -1;
107         private Long modifiedTime;
108         private String sourceUrl;
109 
110         /**
111          * Add the URL of the document.
112          *
113          * @param url
114          *            a URL or URI of the document. If this document is
115          *            retrieved as part of e.g. a search result, than the
116          *            specified URL may be different than the URL used in the
117          *            request.
118          * @return the builder for chaining
119          */
120         public Builder setUrl(String url) {
121             this.url = url;
122             return this;
123         }
124 
125         /**
126          * Set the status code of the request.
127          *
128          * @param statusCode
129          *            the status code of the API request. If possible use or map
130          *            to the HTTP status code
131          * @return the builder for chaining
132          */
133         public Builder setStatusCode(int statusCode) {
134             this.statusCode = statusCode;
135             return this;
136         }
137 
138         /**
139          * Set the status message of the request.
140          *
141          * @param statusMessage
142          *            the status message of the API request. When this value is
143          *            not set, the default HTTP message for the status code is
144          *            used
145          * @return the builder for chaining
146          */
147         public Builder setStatusMessage(String statusMessage) {
148             this.statusMessage = statusMessage;
149             return this;
150         }
151 
152         /**
153          * Set the HTTP headers
154          *
155          * @param headers
156          *            the full HTTP headers returned for the request
157          * @return the builder for chaining
158          */
159         public Builder setHeaders(Map<String, String> headers) {
160             this.headers = headers;
161             return this;
162         }
163 
164         /**
165          * Set the MIME type of the content.
166          *
167          * @param contentType
168          *            the content MIME type. If the content is of an
169          *            API-specific type, than a application-specific type should
170          *            be used (e.g. application/twitter-tweet+json for a JSON
171          *            encoded tweet)
172          * @return the builder for chaining
173          */
174         public Builder setContentType(String contentType) {
175             this.contentType = contentType;
176             return this;
177         }
178 
179         /**
180          * Set the body of the request as a String.
181          *
182          * If the body is a binary format, use {@link #setContent(byte[])}
183          * instead.
184          *
185          * @param content
186          *            the payload of the document
187          * @return the builder for chaining
188          */
189         public Builder setContent(String content) {
190             if (content != null) {
191                 this.content = content.getBytes(StandardCharsets.UTF_8);
192             }
193             return this;
194         }
195 
196         /**
197          * Set the body of the request as a byte array.
198          *
199          * If the body is acually in a textual format, use
200          * {@link #setContent(String)} instead or make sure that the body is
201          * encoded in UTF-8.
202          *
203          * @param content
204          *            the payload of the document
205          * @return the builder for chaining
206          */
207         public Builder setContent(byte[] content) {
208             this.content = content;
209             return this;
210         }
211 
212         /**
213          * Set the timestamp of fetching the document.
214          *
215          * @param fetchTime
216          *            the time when the fetch was started or finished as a Unix
217          *            timestamp (in milliseconds)
218          * @return the builder for chaining
219          */
220         public Builder setFetchTime(long fetchTime) {
221             this.fetchTime = fetchTime;
222             return this;
223         }
224 
225         /**
226          * Set the last-modified time of the document (optional).
227          *
228          * This parameter should be set to the value provided by the API either
229          * through HTTP headers or as part of the data. Leave unset if the
230          * last-modified time is not provided.
231          *
232          * API fetchers MAY also discard last-modified times that are likely to
233          * be wrong (e.g. in the future).
234          *
235          * @param modifiedTime
236          *            the last modified time of this document
237          * @return the builder for chaining
238          */
239         public Builder setModifiedTime(long modifiedTime) {
240             this.modifiedTime = modifiedTime;
241             return this;
242         }
243 
244         public Builder setSourceUrl(String sourceUrl) {
245             this.sourceUrl = sourceUrl;
246             return this;
247         }
248 
249         /**
250          * Check the set values and create a new ApiFetcherDocument object.
251          *
252          * @return an {@link ApiFetcherDocument} instance with the provided
253          *         values
254          * @throws IllegalStateException
255          *             if one or more required fields are not set
256          */
257         public ApiFetcherDocument build() {
258             Preconditions.checkState(url != null, "url is not set");
259             Preconditions.checkState(statusCode >= 0, "statusCode is not set");
260             if (statusMessage == null) {
261                 statusMessage = HttpStatus.getStatusText(statusCode);
262             }
263             Preconditions.checkState(headers != null, "headers is not set");
264             Preconditions.checkState(contentType != null, "content type is not set");
265             Preconditions.checkState(content != null, "content is not set");
266             Preconditions.checkState(fetchTime >= 0, "fetchTime is not set");
267             return new ApiFetcherDocument(url, statusCode, statusMessage, headers, contentType,
268                 content,
269                 fetchTime, modifiedTime, sourceUrl);
270         }
271     }
272 }