1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.pipeline.stage;
19
20 import java.io.BufferedInputStream;
21 import java.io.BufferedOutputStream;
22 import java.io.File;
23 import java.io.FileOutputStream;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.OutputStream;
27 import java.net.HttpURLConnection;
28 import java.net.MalformedURLException;
29 import java.net.URL;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.commons.pipeline.StageException;
34 import org.apache.commons.pipeline.validation.ConsumedTypes;
35 import org.apache.commons.pipeline.validation.ProducedTypes;
36
37
38
39
40
41
42
43 @ConsumedTypes({URL.class, String.class})
44 @ProducedTypes({File.class})
45 public class HttpFileDownloadStage extends BaseStage {
46 private static final int BUFFER_SIZE = 10000;
47 private String workDir = null;
48 private Log log = LogFactory.getLog(HttpFileDownloadStage.class);
49
50 public HttpFileDownloadStage() { }
51
52
53
54
55
56
57 public HttpFileDownloadStage(String workDir) {
58 this.workDir = workDir;
59 }
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 public void process(Object obj) throws StageException {
75
76
77 URL url;
78 try {
79 if (obj instanceof String) {
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 url = new URL((String) obj);
97
98 } else if (obj instanceof URL) {
99 url = (URL) obj;
100 } else {
101 throw new IllegalArgumentException("Unrecognized parameter class to process() for HttpFileDownload: " + obj.getClass().getName() + "; must be URL or String");
102 }
103 } catch (MalformedURLException e) {
104 throw new StageException(this, "Malformed URL: " + obj, e);
105 }
106
107 log.debug("Retrieving data from " + url.toString());
108
109
110
111
112
113
114
115
116 HttpURLConnection con = null;
117 try {
118 con = (java.net.HttpURLConnection) url.openConnection();
119
120
121
122
123
124
125
126
127 File workDir = (this.workDir == null) ? null : new File(this.workDir);
128 File workFile = File.createTempFile("http-file-download","tmp", workDir);
129
130 InputStream in = new BufferedInputStream(con.getInputStream());
131 OutputStream out = new BufferedOutputStream(new FileOutputStream(workFile, false));
132 byte[] buffer = new byte[BUFFER_SIZE];
133 for (int results = 0; (results = in.read(buffer)) != -1;) {
134 out.write(buffer, 0, results);
135 }
136 out.close();
137 in.close();
138
139 this.emit(workFile);
140 } catch (IOException e) {
141 throw new StageException(this, "An error occurred downloading a data file from " + url.toString(), e);
142 } finally {
143 con.disconnect();
144 }
145 }
146
147
148
149
150
151
152
153
154 public void setWorkDir(String workDir) {
155 this.workDir = workDir;
156 }
157
158
159
160
161 public String getWorkDir() {
162 return this.workDir;
163 }
164
165
166
167
168
169
170
171
172 public URL handleRedirects(URL url) throws IOException, MalformedURLException {
173 java.net.HttpURLConnection.setFollowRedirects(false);
174 HttpURLConnection con = (HttpURLConnection) url.openConnection();
175 int response = con.getResponseCode();
176 log.debug("Response code for " + url + " = " + response);
177
178 if (response == java.net.HttpURLConnection.HTTP_MOVED_PERM || response == java.net.HttpURLConnection.HTTP_MOVED_TEMP) {
179 String location = con.getHeaderField("Location");
180 log.debug("Handling redirect to location: " + location);
181
182 if (location.startsWith("http:")) {
183 url = new URL(location);
184 } else if (location.startsWith("/")) {
185 url = new URL("http://" + url.getHost() + location);
186 } else {
187 url = new URL(con.getURL(), location);
188 }
189
190 url = handleRedirects(url);
191 }
192
193 return url;
194 }
195 }