1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.net.examples.mail;
19
20 import java.io.BufferedWriter;
21 import java.io.IOException;
22 import java.io.UncheckedIOException;
23 import java.net.URI;
24 import java.net.URISyntaxException;
25 import java.nio.charset.Charset;
26 import java.nio.file.Files;
27 import java.nio.file.Path;
28 import java.nio.file.Paths;
29 import java.nio.file.StandardOpenOption;
30 import java.text.ParseException;
31 import java.text.SimpleDateFormat;
32 import java.util.ArrayList;
33 import java.util.Date;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.TimeZone;
37 import java.util.concurrent.atomic.AtomicInteger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40
41 import org.apache.commons.net.PrintCommandListener;
42 import org.apache.commons.net.ProtocolCommandEvent;
43 import org.apache.commons.net.imap.IMAP;
44 import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
45 import org.apache.commons.net.imap.IMAPClient;
46 import org.apache.commons.net.imap.IMAPReply;
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84 public final class IMAPExportMbox {
85
86 private static final class MboxListener implements IMAPChunkListener {
87
88 private final BufferedWriter bufferedWriter;
89 volatile AtomicInteger total = new AtomicInteger();
90 volatile String lastFetched;
91 volatile List<String> missingIds = new ArrayList<>();
92 volatile long lastSeq = -1;
93 private final String lineSeparator;
94 private final SimpleDateFormat DATE_FORMAT
95 = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
96
97
98
99 private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
100 private final boolean printHash;
101 private final boolean printMarker;
102 private final boolean checkSequence;
103
104 MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
105 final boolean checkSequence) {
106 this.lineSeparator = lineSeparator;
107 this.printHash = printHash;
108 this.printMarker = printMarker;
109 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
110 this.bufferedWriter = bufferedWriter;
111 this.checkSequence = checkSequence;
112 }
113
114 @Override
115 public boolean chunkReceived(final IMAP imap) {
116 final String[] replyStrings = imap.getReplyStrings();
117 Date received = new Date();
118 final String firstLine = replyStrings[0];
119 Matcher m = PATID.matcher(firstLine);
120 if (m.lookingAt()) {
121 final String date = m.group(PATID_DATE_GROUP);
122 try {
123 received = IDPARSE.parse(date);
124 } catch (final ParseException e) {
125 System.err.println(e);
126 }
127 } else {
128 System.err.println("No timestamp found in: " + firstLine + " - using current time");
129 }
130 String replyTo = "MAILER-DAEMON";
131 for (int i = 1; i < replyStrings.length - 1; i++) {
132 final String line = replyStrings[i];
133 if (line.startsWith("Return-Path: ")) {
134 final String[] parts = line.split(" ", 2);
135 if (!parts[1].equals("<>")) {
136 replyTo = parts[1];
137 if (replyTo.startsWith("<")) {
138 if (replyTo.endsWith(">")) {
139 replyTo = replyTo.substring(1, replyTo.length() - 1);
140 } else {
141 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
142 }
143 }
144 }
145 break;
146 }
147 }
148 try {
149
150 bufferedWriter.append("From ");
151 bufferedWriter.append(replyTo);
152 bufferedWriter.append(' ');
153 bufferedWriter.append(DATE_FORMAT.format(received));
154 bufferedWriter.append(lineSeparator);
155
156 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
157 if (printMarker) {
158 System.err.println("[" + total + "] " + firstLine);
159 }
160
161 for (int i = 1; i < replyStrings.length - 1; i++) {
162 final String line = replyStrings[i];
163 if (startsWith(line, PATFROM)) {
164 bufferedWriter.append('>');
165 }
166 bufferedWriter.append(line);
167 bufferedWriter.append(lineSeparator);
168 }
169
170 final String lastLine = replyStrings[replyStrings.length - 1];
171 final int lastLength = lastLine.length();
172 if (lastLength > 1) {
173 bufferedWriter.append(lastLine, 0, lastLength - 1);
174 bufferedWriter.append(lineSeparator);
175 }
176 bufferedWriter.append(lineSeparator);
177 } catch (final IOException e) {
178 e.printStackTrace();
179 throw new UncheckedIOException(e);
180 }
181 lastFetched = firstLine;
182 total.incrementAndGet();
183 if (checkSequence) {
184 m = PATSEQ.matcher(firstLine);
185 if (m.lookingAt()) {
186 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP));
187 if (lastSeq != -1) {
188 final long missing = msgSeq - lastSeq - 1;
189 if (missing != 0) {
190 for (long j = lastSeq + 1; j < msgSeq; j++) {
191 missingIds.add(String.valueOf(j));
192 }
193 System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
194 }
195 }
196 lastSeq = msgSeq;
197 }
198 }
199 if (printHash) {
200 System.err.print(".");
201 }
202 return true;
203 }
204
205 public void close() throws IOException {
206 if (bufferedWriter != null) {
207 bufferedWriter.close();
208 }
209 }
210 }
211
212 private static final String CRLF = "\r\n";
213 private static final String LF = "\n";
214
215 private static final String EOL_DEFAULT = System.lineSeparator();
216 private static final Pattern PATFROM = Pattern.compile(">*From ");
217
218 private static final Pattern PATID =
219 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
220
221 private static final int PATID_DATE_GROUP = 1;
222 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) ");
223
224 private static final int PATSEQ_SEQUENCE_GROUP = 1;
225
226
227 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS");
228
229
230 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
231 private static final int CONNECT_TIMEOUT = 10;
232
233 private static final int READ_TIMEOUT = 10;
234
235 public static void main(final String[] args) throws IOException, URISyntaxException {
236 int connect_timeout = CONNECT_TIMEOUT;
237 int read_timeout = READ_TIMEOUT;
238
239 int argIdx = 0;
240 String eol = EOL_DEFAULT;
241 boolean printHash = false;
242 boolean printMarker = false;
243 int retryWaitSecs = 0;
244
245 for (argIdx = 0; argIdx < args.length; argIdx++) {
246 if (args[argIdx].equals("-c")) {
247 connect_timeout = Integer.parseInt(args[++argIdx]);
248 } else if (args[argIdx].equals("-r")) {
249 read_timeout = Integer.parseInt(args[++argIdx]);
250 } else if (args[argIdx].equals("-R")) {
251 retryWaitSecs = Integer.parseInt(args[++argIdx]);
252 } else if (args[argIdx].equals("-LF")) {
253 eol = LF;
254 } else if (args[argIdx].equals("-CRLF")) {
255 eol = CRLF;
256 } else if (args[argIdx].equals("-.")) {
257 printHash = true;
258 } else if (args[argIdx].equals("-X")) {
259 printMarker = true;
260 } else {
261 break;
262 }
263 }
264
265 final int argCount = args.length - argIdx;
266
267 if (argCount < 2) {
268 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
269 + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
270 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
271 System.err.println("\t-c connect timeout in seconds (default 10)");
272 System.err.println("\t-r read timeout in seconds (default 10)");
273 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
274 System.err.println("\t-. print a . for each complete message received");
275 System.err.println("\t-X print the X-IMAP line for each complete message received");
276 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
277 System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
278 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
279 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
280 + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
281 System.exit(1);
282 }
283
284 final String uriString = args[argIdx++];
285 URI uri;
286 try {
287 uri = URI.create(uriString);
288 } catch (final IllegalArgumentException e) {
289 final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
290 if (!m.matches()) {
291 throw e;
292 }
293 uri = URI.create(m.group(1));
294 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
295 }
296 final String file = args[argIdx++];
297 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
298 final String itemNames;
299
300 if (argCount > 3) {
301 if (argCount > 4) {
302 final StringBuilder sb = new StringBuilder();
303 sb.append("(");
304 for (int i = 4; i <= argCount; i++) {
305 if (i > 4) {
306 sb.append(" ");
307 }
308 sb.append(args[argIdx++]);
309 }
310 sb.append(")");
311 itemNames = sb.toString();
312 } else {
313 itemNames = args[argIdx++];
314 }
315 } else {
316 itemNames = "(INTERNALDATE BODY.PEEK[])";
317 }
318
319 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)");
320 final MboxListener mboxListener;
321 if (file.equals("-")) {
322 mboxListener = null;
323 } else if (file.startsWith("+")) {
324 final Path mboxPath = Paths.get(file.substring(1));
325 System.out.println("Appending to file " + mboxPath);
326 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.APPEND),
327 eol, printHash, printMarker, checkSequence);
328 } else if (file.startsWith("-")) {
329 final Path mboxPath = Paths.get(file.substring(1));
330 System.out.println("Writing to file " + mboxPath);
331 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
332 checkSequence);
333 } else {
334 final Path mboxPath = Paths.get(file);
335 if (Files.exists(mboxPath) && Files.size(mboxPath) > 0) {
336 throw new IOException("mailbox file: " + mboxPath + " already exists and is non-empty!");
337 }
338 System.out.println("Creating file " + mboxPath);
339 mboxListener = new MboxListener(Files.newBufferedWriter(mboxPath, Charset.defaultCharset(), StandardOpenOption.CREATE), eol, printHash, printMarker,
340 checkSequence);
341 }
342
343 final String path = uri.getPath();
344 if (path == null || path.length() < 1) {
345 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
346 }
347 final String folder = path.substring(1);
348
349
350 final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
351 @Override
352 public void protocolReplyReceived(final ProtocolCommandEvent event) {
353 if (event.getReplyCode() != IMAPReply.PARTIAL) {
354 super.protocolReplyReceived(event);
355 }
356 }
357 };
358
359
360 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
361
362 String maxIndexInFolder = null;
363
364 try {
365
366 imap.setSoTimeout(read_timeout * 1000);
367
368 if (!imap.select(folder)) {
369 throw new IOException("Could not select folder: " + folder);
370 }
371
372 for (final String line : imap.getReplyStrings()) {
373 maxIndexInFolder = matches(line, PATEXISTS, 1);
374 if (maxIndexInFolder != null) {
375 break;
376 }
377 }
378
379 if (mboxListener != null) {
380 imap.setChunkListener(mboxListener);
381 }
382
383 while (true) {
384 final boolean ok = imap.fetch(sequenceSet, itemNames);
385
386 if (ok || retryWaitSecs <= 0 || mboxListener == null || !checkSequence) {
387 break;
388 }
389 final String replyString = imap.getReplyString();
390 if (!startsWith(replyString, PATTEMPFAIL)) {
391 throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
392 }
393 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
394 sequenceSet = mboxListener.lastSeq + 1 + ":*";
395 try {
396 Thread.sleep(retryWaitSecs * 1000);
397 } catch (final InterruptedException e) {
398
399 }
400 }
401
402 } catch (final IOException ioe) {
403 final String count = mboxListener == null ? "?" : mboxListener.total.toString();
404 System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
405 if (mboxListener != null) {
406 System.err.println("Last complete response seen: " + mboxListener.lastFetched);
407 }
408 throw ioe;
409 } finally {
410
411 if (printHash) {
412 System.err.println();
413 }
414
415 if (mboxListener != null) {
416 mboxListener.close();
417 final Iterator<String> missingIds = mboxListener.missingIds.iterator();
418 if (missingIds.hasNext()) {
419 final StringBuilder sb = new StringBuilder();
420 for (;;) {
421 sb.append(missingIds.next());
422 if (!missingIds.hasNext()) {
423 break;
424 }
425 sb.append(",");
426 }
427 System.err.println("*** Missing ids: " + sb.toString());
428 }
429 }
430 imap.logout();
431 imap.disconnect();
432 }
433 if (mboxListener != null) {
434 System.out.println("Processed " + mboxListener.total + " messages.");
435 }
436 if (maxIndexInFolder != null) {
437 System.out.println("Folder contained " + maxIndexInFolder + " messages.");
438 }
439 }
440
441 private static String matches(final String input, final Pattern pat, final int index) {
442 final Matcher m = pat.matcher(input);
443 if (m.lookingAt()) {
444 return m.group(index);
445 }
446 return null;
447 }
448
449 private static boolean startsWith(final String input, final Pattern pat) {
450 final Matcher m = pat.matcher(input);
451 return m.lookingAt();
452 }
453 }