1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18 import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19 import com.liferay.portal.kernel.log.Log;
20 import com.liferay.portal.kernel.log.LogFactoryUtil;
21 import com.liferay.portal.kernel.util.ContentTypes;
22 import com.liferay.portal.kernel.util.FileComparator;
23 import com.liferay.portal.kernel.util.JavaProps;
24 import com.liferay.portal.kernel.util.MimeTypesUtil;
25 import com.liferay.portal.kernel.util.ServerDetector;
26 import com.liferay.portal.kernel.util.StreamUtil;
27 import com.liferay.portal.kernel.util.StringBundler;
28 import com.liferay.portal.kernel.util.StringPool;
29 import com.liferay.portal.kernel.util.StringUtil;
30 import com.liferay.portal.kernel.util.Time;
31 import com.liferay.portal.kernel.util.Validator;
32 import com.liferay.util.PwdGenerator;
33 import com.liferay.util.SystemProperties;
34 import com.liferay.util.lucene.JerichoHTMLTextExtractor;
35
36 import java.io.BufferedInputStream;
37 import java.io.File;
38 import java.io.FileInputStream;
39 import java.io.FileOutputStream;
40 import java.io.FileReader;
41 import java.io.IOException;
42 import java.io.InputStream;
43 import java.io.OutputStreamWriter;
44 import java.io.Reader;
45 import java.io.StringReader;
46 import java.io.Writer;
47
48 import java.util.ArrayList;
49 import java.util.Arrays;
50 import java.util.HashMap;
51 import java.util.List;
52 import java.util.Map;
53 import java.util.Properties;
54
55 import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
56 import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
57 import org.apache.jackrabbit.extractor.MsWordTextExtractor;
58 import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
59 import org.apache.jackrabbit.extractor.PdfTextExtractor;
60 import org.apache.jackrabbit.extractor.PlainTextExtractor;
61 import org.apache.jackrabbit.extractor.RTFTextExtractor;
62 import org.apache.jackrabbit.extractor.TextExtractor;
63 import org.apache.jackrabbit.extractor.XMLTextExtractor;
64 import org.apache.poi.POITextExtractor;
65 import org.apache.poi.extractor.ExtractorFactory;
66
67 import org.mozilla.intl.chardet.nsDetector;
68 import org.mozilla.intl.chardet.nsPSMDetector;
69
70
76 public class FileImpl implements com.liferay.portal.kernel.util.File {
77
78 public static FileImpl getInstance() {
79 return _instance;
80 }
81
82 public FileImpl() {
83 Class<?>[] textExtractorClasses = new Class[] {
84 JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
85 MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
86 OpenOfficeTextExtractor.class, PdfTextExtractor.class,
87 PlainTextExtractor.class, RTFTextExtractor.class,
88 XMLTextExtractor.class
89 };
90
91 for (Class<?> textExtractorClass : textExtractorClasses) {
92 try {
93 TextExtractor textExtractor =
94 (TextExtractor)textExtractorClass.newInstance();
95
96 String[] contentTypes = textExtractor.getContentTypes();
97
98 for (String contentType : contentTypes) {
99 _textExtractors.put(contentType, textExtractor);
100 }
101 }
102 catch (Exception e) {
103 _log.error(e, e);
104 }
105 }
106 }
107
108 public void copyDirectory(String sourceDirName, String destinationDirName) {
109 copyDirectory(new File(sourceDirName), new File(destinationDirName));
110 }
111
112 public void copyDirectory(File source, File destination) {
113 if (source.exists() && source.isDirectory()) {
114 if (!destination.exists()) {
115 destination.mkdirs();
116 }
117
118 File[] fileArray = source.listFiles();
119
120 for (int i = 0; i < fileArray.length; i++) {
121 if (fileArray[i].isDirectory()) {
122 copyDirectory(
123 fileArray[i],
124 new File(destination.getPath() + File.separator
125 + fileArray[i].getName()));
126 }
127 else {
128 copyFile(
129 fileArray[i],
130 new File(destination.getPath() + File.separator
131 + fileArray[i].getName()));
132 }
133 }
134 }
135 }
136
137 public void copyFile(String source, String destination) {
138 copyFile(source, destination, false);
139 }
140
141 public void copyFile(String source, String destination, boolean lazy) {
142 copyFile(new File(source), new File(destination), lazy);
143 }
144
145 public void copyFile(File source, File destination) {
146 copyFile(source, destination, false);
147 }
148
149 public void copyFile(File source, File destination, boolean lazy) {
150 if (!source.exists()) {
151 return;
152 }
153
154 if (lazy) {
155 String oldContent = null;
156
157 try {
158 oldContent = read(source);
159 }
160 catch (Exception e) {
161 return;
162 }
163
164 String newContent = null;
165
166 try {
167 newContent = read(destination);
168 }
169 catch (Exception e) {
170 }
171
172 if ((oldContent == null) || !oldContent.equals(newContent)) {
173 copyFile(source, destination, false);
174 }
175 }
176 else {
177 if ((destination.getParentFile() != null) &&
178 (!destination.getParentFile().exists())) {
179
180 destination.getParentFile().mkdirs();
181 }
182
183 try {
184 StreamUtil.transfer(
185 new FileInputStream(source),
186 new FileOutputStream(destination));
187 }
188 catch (IOException ioe) {
189 _log.error(ioe.getMessage());
190 }
191 }
192 }
193
194 public File createTempFile() {
195 return createTempFile(null);
196 }
197
198 public File createTempFile(String extension) {
199 return new File(createTempFileName(extension));
200 }
201
202 public String createTempFileName() {
203 return createTempFileName(null);
204 }
205
206 public String createTempFileName(String extension) {
207 StringBundler sb = new StringBundler();
208
209 sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
210 sb.append(StringPool.SLASH);
211 sb.append(Time.getTimestamp());
212 sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
213
214 if (Validator.isNotNull(extension)) {
215 sb.append(StringPool.PERIOD);
216 sb.append(extension);
217 }
218
219 return sb.toString();
220 }
221
222 public String decodeSafeFileName(String fileName) {
223 return StringUtil.replace(
224 fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
225 }
226
227 public boolean delete(String file) {
228 return delete(new File(file));
229 }
230
231 public boolean delete(File file) {
232 if ((file != null) && file.exists()) {
233 return file.delete();
234 }
235 else {
236 return false;
237 }
238 }
239
240 public void deltree(String directory) {
241 deltree(new File(directory));
242 }
243
244 public void deltree(File directory) {
245 if (directory.exists() && directory.isDirectory()) {
246 File[] fileArray = directory.listFiles();
247
248 for (int i = 0; i < fileArray.length; i++) {
249 if (fileArray[i].isDirectory()) {
250 deltree(fileArray[i]);
251 }
252 else {
253 fileArray[i].delete();
254 }
255 }
256
257 directory.delete();
258 }
259 }
260
261 public String encodeSafeFileName(String fileName) {
262 if (fileName == null) {
263 return StringPool.BLANK;
264 }
265
266 return StringUtil.replace(
267 fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
268 }
269
270 public boolean exists(String fileName) {
271 return exists(new File(fileName));
272 }
273
274 public boolean exists(File file) {
275 return file.exists();
276 }
277
278 public String extractText(InputStream is, String fileName) {
279 String text = null;
280
281 try {
282 if (!is.markSupported()) {
283 is = new BufferedInputStream(is);
284 }
285
286 String contentType = MimeTypesUtil.getContentType(is, fileName);
287
288 if (_log.isInfoEnabled()) {
289 _log.info(
290 "Attempting to extract text from " + fileName +
291 " of type " + contentType);
292 }
293
294 TextExtractor textExtractor = _textExtractors.get(contentType);
295
296 if (textExtractor != null) {
297 if (_log.isInfoEnabled()) {
298 _log.info(
299 "Using text extractor " +
300 textExtractor.getClass().getName());
301 }
302
303 StringBuilder sb = new StringBuilder();
304
305 Reader reader = null;
306
307 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
308 contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
309
310 if (_log.isWarnEnabled()) {
311 _log.warn(
312 "JOnAS 5 with JDK 6 has a known issue with text " +
313 "extraction of Word documents. Use JDK 5 if " +
314 "you require indexing of Word documents.");
315 }
316
317 if (_log.isDebugEnabled()) {
318
319
322 reader = textExtractor.extractText(
323 is, contentType, null);
324 }
325 else {
326 reader = new StringReader(StringPool.BLANK);
327 }
328 }
329 else {
330 reader = textExtractor.extractText(
331 is, contentType, null);
332 }
333
334 try{
335 char[] buffer = new char[1024];
336
337 int result = -1;
338
339 while ((result = reader.read(buffer)) != -1) {
340 sb.append(buffer, 0, result);
341 }
342 }
343 finally {
344 try {
345 reader.close();
346 }
347 catch (IOException ioe) {
348 }
349 }
350
351 text = sb.toString();
352 }
353 else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
354 contentType.startsWith(
355 "application/vnd.openxmlformats-officedocument.")) {
356
357 try {
358 POITextExtractor poiTextExtractor =
359 ExtractorFactory.createExtractor(is);
360
361 text = poiTextExtractor.getText();
362 }
363 catch (Exception e) {
364 if (_log.isInfoEnabled()) {
365 _log.info(e.getMessage());
366 }
367 }
368 }
369 }
370 catch (Exception e) {
371 _log.error(e, e);
372 }
373
374 if (_log.isInfoEnabled()) {
375 if (text == null) {
376 _log.info("No text extractor found for " + fileName);
377 }
378 else {
379 _log.info("Text was extracted for " + fileName);
380 }
381 }
382
383 if (_log.isDebugEnabled()) {
384 _log.debug("Extractor returned text:\n\n" + text);
385 }
386
387 if (text == null) {
388 text = StringPool.BLANK;
389 }
390
391 return text;
392 }
393
394 public String getAbsolutePath(File file) {
395 return StringUtil.replace(
396 file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
397 }
398
399 public byte[] getBytes(File file) throws IOException {
400 if ((file == null) || !file.exists()) {
401 return null;
402 }
403
404 FileInputStream is = new FileInputStream(file);
405
406 byte[] bytes = getBytes(is, (int)file.length());
407
408 is.close();
409
410 return bytes;
411 }
412
413 public byte[] getBytes(InputStream is) throws IOException {
414 return getBytes(is, -1);
415 }
416
417 public byte[] getBytes(InputStream inputStream, int bufferSize)
418 throws IOException {
419
420 UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
421 new UnsyncByteArrayOutputStream();
422
423 StreamUtil.transfer(
424 inputStream, unsyncByteArrayOutputStream, bufferSize);
425
426 return unsyncByteArrayOutputStream.toByteArray();
427 }
428
429 public String getExtension(String fileName) {
430 if (fileName == null) {
431 return null;
432 }
433
434 int pos = fileName.lastIndexOf(StringPool.PERIOD);
435
436 if (pos > 0) {
437 return fileName.substring(pos + 1, fileName.length()).toLowerCase();
438 }
439 else {
440 return StringPool.BLANK;
441 }
442 }
443
444 public String getPath(String fullFileName) {
445 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
446
447 if (pos == -1) {
448 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
449 }
450
451 String shortFileName = fullFileName.substring(0, pos);
452
453 if (Validator.isNull(shortFileName)) {
454 return StringPool.SLASH;
455 }
456
457 return shortFileName;
458 }
459
460 public String getShortFileName(String fullFileName) {
461 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
462
463 if (pos == -1) {
464 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
465 }
466
467 String shortFileName =
468 fullFileName.substring(pos + 1, fullFileName.length());
469
470 return shortFileName;
471 }
472
473 public boolean isAscii(File file) throws IOException {
474 boolean ascii = true;
475
476 nsDetector detector = new nsDetector(nsPSMDetector.ALL);
477
478 InputStream inputStream = new FileInputStream(file);
479
480 byte[] buffer = new byte[1024];
481
482 int len = 0;
483
484 while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
485
486 if (ascii) {
487 ascii = detector.isAscii(buffer, len);
488
489 if (!ascii) {
490 break;
491 }
492 }
493 }
494
495 detector.DataEnd();
496
497 inputStream.close();
498
499 return ascii;
500 }
501
502 public String[] listDirs(String fileName) {
503 return listDirs(new File(fileName));
504 }
505
506 public String[] listDirs(File file) {
507 List<String> dirs = new ArrayList<String>();
508
509 File[] fileArray = file.listFiles();
510
511 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
512 if (fileArray[i].isDirectory()) {
513 dirs.add(fileArray[i].getName());
514 }
515 }
516
517 return dirs.toArray(new String[dirs.size()]);
518 }
519
520 public String[] listFiles(String fileName) {
521 if (Validator.isNull(fileName)) {
522 return new String[0];
523 }
524
525 return listFiles(new File(fileName));
526 }
527
528 public String[] listFiles(File file) {
529 List<String> files = new ArrayList<String>();
530
531 File[] fileArray = file.listFiles();
532
533 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
534 if (fileArray[i].isFile()) {
535 files.add(fileArray[i].getName());
536 }
537 }
538
539 return files.toArray(new String[files.size()]);
540 }
541
542 public void mkdirs(String pathName) {
543 File file = new File(pathName);
544
545 file.mkdirs();
546 }
547
548 public boolean move(String sourceFileName, String destinationFileName) {
549 return move(new File(sourceFileName), new File(destinationFileName));
550 }
551
552 public boolean move(File source, File destination) {
553 if (!source.exists()) {
554 return false;
555 }
556
557 destination.delete();
558
559 return source.renameTo(destination);
560 }
561
562 public String read(String fileName) throws IOException {
563 return read(new File(fileName));
564 }
565
566 public String read(File file) throws IOException {
567 return read(file, false);
568 }
569
570 public String read(File file, boolean raw) throws IOException {
571 FileInputStream fis = new FileInputStream(file);
572
573 byte[] bytes = new byte[fis.available()];
574
575 fis.read(bytes);
576
577 fis.close();
578
579 String s = new String(bytes, StringPool.UTF8);
580
581 if (raw) {
582 return s;
583 }
584 else {
585 return StringUtil.replace(
586 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
587 }
588 }
589
590 public String replaceSeparator(String fileName) {
591 return StringUtil.replace(
592 fileName, StringPool.BACK_SLASH, StringPool.SLASH);
593 }
594
595 public File[] sortFiles(File[] files) {
596 if (files == null) {
597 return null;
598 }
599
600 Arrays.sort(files, new FileComparator());
601
602 List<File> directoryList = new ArrayList<File>();
603 List<File> fileList = new ArrayList<File>();
604
605 for (int i = 0; i < files.length; i++) {
606 if (files[i].isDirectory()) {
607 directoryList.add(files[i]);
608 }
609 else {
610 fileList.add(files[i]);
611 }
612 }
613
614 directoryList.addAll(fileList);
615
616 return directoryList.toArray(new File[directoryList.size()]);
617 }
618
619 public String stripExtension(String fileName) {
620 if (fileName == null) {
621 return null;
622 }
623
624 String ext = getExtension(fileName);
625
626 if (ext.length() > 0) {
627 return fileName.substring(0, fileName.length() - ext.length() - 1);
628 }
629 else {
630 return fileName;
631 }
632 }
633
634 public List<String> toList(Reader reader) {
635 List<String> list = new ArrayList<String>();
636
637 try {
638 UnsyncBufferedReader unsyncBufferedReader =
639 new UnsyncBufferedReader(reader);
640
641 String line = null;
642
643 while ((line = unsyncBufferedReader.readLine()) != null) {
644 list.add(line);
645 }
646
647 unsyncBufferedReader.close();
648 }
649 catch (IOException ioe) {
650 }
651
652 return list;
653 }
654
655 public List<String> toList(String fileName) {
656 try {
657 return toList(new FileReader(fileName));
658 }
659 catch (IOException ioe) {
660 return new ArrayList<String>();
661 }
662 }
663
664 public Properties toProperties(FileInputStream fis) {
665 Properties props = new Properties();
666
667 try {
668 props.load(fis);
669 }
670 catch (IOException ioe) {
671 }
672
673 return props;
674 }
675
676 public Properties toProperties(String fileName) {
677 try {
678 return toProperties(new FileInputStream(fileName));
679 }
680 catch (IOException ioe) {
681 return new Properties();
682 }
683 }
684
685 public void write(String fileName, String s) throws IOException {
686 write(new File(fileName), s);
687 }
688
689 public void write(String fileName, String s, boolean lazy)
690 throws IOException {
691
692 write(new File(fileName), s, lazy);
693 }
694
695 public void write(String fileName, String s, boolean lazy, boolean append)
696 throws IOException {
697
698 write(new File(fileName), s, lazy, append);
699 }
700
701 public void write(String pathName, String fileName, String s)
702 throws IOException {
703
704 write(new File(pathName, fileName), s);
705 }
706
707 public void write(String pathName, String fileName, String s, boolean lazy)
708 throws IOException {
709
710 write(new File(pathName, fileName), s, lazy);
711 }
712
713 public void write(
714 String pathName, String fileName, String s, boolean lazy,
715 boolean append)
716 throws IOException {
717
718 write(new File(pathName, fileName), s, lazy, append);
719 }
720
721 public void write(File file, String s) throws IOException {
722 write(file, s, false);
723 }
724
725 public void write(File file, String s, boolean lazy)
726 throws IOException {
727
728 write(file, s, lazy, false);
729 }
730
731 public void write(File file, String s, boolean lazy, boolean append)
732 throws IOException {
733
734 if (file.getParent() != null) {
735 mkdirs(file.getParent());
736 }
737
738 if (lazy && file.exists()) {
739 String content = read(file);
740
741 if (content.equals(s)) {
742 return;
743 }
744 }
745
746 Writer writer = new OutputStreamWriter(
747 new FileOutputStream(file, append), StringPool.UTF8);
748
749 writer.write(s);
750
751 writer.close();
752 }
753
754 public void write(String fileName, byte[] bytes) throws IOException {
755 write(new File(fileName), bytes);
756 }
757
758 public void write(File file, byte[] bytes) throws IOException {
759 write(file, bytes, 0, bytes.length);
760 }
761
762 public void write(File file, byte[] bytes, int offset, int length)
763 throws IOException {
764
765 if (file.getParent() != null) {
766 mkdirs(file.getParent());
767 }
768
769 FileOutputStream fos = new FileOutputStream(file);
770
771 fos.write(bytes, offset, length);
772
773 fos.close();
774 }
775
776 public void write(String fileName, InputStream is) throws IOException {
777 write(new File(fileName), is);
778 }
779
780 public void write(File file, InputStream is) throws IOException {
781 if (file.getParent() != null) {
782 mkdirs(file.getParent());
783 }
784
785 StreamUtil.transfer(is, new FileOutputStream(file));
786 }
787
788 private static final String[] _SAFE_FILE_NAME_1 = {
789 StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
790 StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
791 };
792
793 private static final String[] _SAFE_FILE_NAME_2 = {
794 "_AMP_", "_CP_", "_OP_", "_SEM_"
795 };
796
797 private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
798
799 private static FileImpl _instance = new FileImpl();
800
801 private Map<String, TextExtractor> _textExtractors =
802 new HashMap<String, TextExtractor>();
803
804 }