1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
30  import com.liferay.portal.kernel.log.Log;
31  import com.liferay.portal.kernel.log.LogFactoryUtil;
32  import com.liferay.portal.kernel.util.ArrayUtil;
33  import com.liferay.portal.kernel.util.MapUtil;
34  import com.liferay.portal.kernel.util.ObjectValuePair;
35  import com.liferay.portal.kernel.util.ProgressTracker;
36  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
37  import com.liferay.portal.kernel.util.StringPool;
38  import com.liferay.portal.kernel.util.StringUtil;
39  import com.liferay.portal.kernel.util.Validator;
40  import com.liferay.portal.kernel.xml.Document;
41  import com.liferay.portal.kernel.xml.DocumentException;
42  import com.liferay.portal.kernel.xml.Element;
43  import com.liferay.portal.kernel.xml.SAXReaderUtil;
44  import com.liferay.portal.kernel.zip.ZipReader;
45  import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
46  import com.liferay.portal.model.User;
47  import com.liferay.portal.service.ServiceContext;
48  import com.liferay.portal.service.UserLocalServiceUtil;
49  import com.liferay.portal.util.PropsValues;
50  import com.liferay.portlet.tags.NoSuchEntryException;
51  import com.liferay.portlet.tags.model.TagsEntry;
52  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
53  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
54  import com.liferay.portlet.tags.util.TagsUtil;
55  import com.liferay.portlet.wiki.ImportFilesException;
56  import com.liferay.portlet.wiki.NoSuchPageException;
57  import com.liferay.portlet.wiki.importers.WikiImporter;
58  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
59  import com.liferay.portlet.wiki.model.WikiNode;
60  import com.liferay.portlet.wiki.model.WikiPage;
61  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
62  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
63  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
64  
65  import java.io.File;
66  import java.io.FileReader;
67  import java.io.IOException;
68  
69  import java.util.ArrayList;
70  import java.util.Collections;
71  import java.util.HashMap;
72  import java.util.Iterator;
73  import java.util.List;
74  import java.util.Map;
75  import java.util.regex.Matcher;
76  import java.util.regex.Pattern;
77  
78  /**
79   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
80   *
81   * @author Alvaro del Castillo
82   * @author Jorge Ferrer
83   */
84  public class MediaWikiImporter implements WikiImporter {
85  
86      public static final String SHARED_IMAGES_CONTENT = "See attachments";
87  
88      public static final String SHARED_IMAGES_TITLE = "SharedImages";
89  
90      public void importPages(
91              long userId, WikiNode node, File[] files,
92              Map<String, String[]> options)
93          throws PortalException {
94  
95          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96              throw new PortalException("The pages file is mandatory");
97          }
98  
99          File pagesFile = files[0];
100         File usersFile = files[1];
101         File imagesFile = files[2];
102 
103         try {
104             Document doc = SAXReaderUtil.read(pagesFile);
105 
106             Map<String, String> usersMap = readUsersFile(usersFile);
107 
108             Element root = doc.getRootElement();
109 
110             List<String> specialNamespaces = readSpecialNamespaces(root);
111 
112             processSpecialPages(userId, node, root, specialNamespaces);
113             processRegularPages(
114                 userId, node, root, specialNamespaces, usersMap, imagesFile,
115                 options);
116             processImages(userId, node, imagesFile);
117 
118             moveFrontPage(userId, node, options);
119         }
120         catch (DocumentException de) {
121             throw new ImportFilesException("Invalid XML file provided");
122         }
123         catch (IOException de) {
124             throw new ImportFilesException("Error reading the files provided");
125         }
126         catch (PortalException e) {
127             throw e;
128         }
129         catch (Exception e) {
130             throw new PortalException(e);
131         }
132     }
133 
134     protected long getUserId(
135             long userId, WikiNode node, String author,
136             Map<String, String> usersMap)
137         throws PortalException, SystemException {
138 
139         User user = null;
140 
141         String emailAddress = usersMap.get(author);
142 
143         try {
144             if (Validator.isNull(emailAddress)) {
145                 user = UserLocalServiceUtil.getUserByScreenName(
146                     node.getCompanyId(), author.toLowerCase());
147             }
148             else {
149                 user = UserLocalServiceUtil.getUserByEmailAddress(
150                     node.getCompanyId(), emailAddress);
151             }
152         }
153         catch (NoSuchUserException nsue) {
154             user = UserLocalServiceUtil.getUserById(userId);
155         }
156 
157         return user.getUserId();
158     }
159 
160     protected void importPage(
161             long userId, String author, WikiNode node, String title,
162             String content, String summary, Map<String, String> usersMap)
163         throws PortalException {
164 
165         try {
166             long authorUserId = getUserId(userId, node, author, usersMap);
167             String parentTitle = readParentTitle(content);
168             String redirectTitle = readRedirectTitle(content);
169 
170             ServiceContext serviceContext = new ServiceContext();
171 
172             serviceContext.setAddCommunityPermissions(true);
173             serviceContext.setAddGuestPermissions(true);
174             serviceContext.setTagsEntries(
175                 readTagsEntries(userId, node, content));
176 
177             if (Validator.isNull(redirectTitle)) {
178                 content = _translator.translate(content);
179             }
180             else {
181                 content =
182                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
183                         StringPool.DOUBLE_CLOSE_BRACKET;
184             }
185 
186             WikiPage page = null;
187 
188             try {
189                 page = WikiPageLocalServiceUtil.getPage(
190                     node.getNodeId(), title);
191             }
192             catch (NoSuchPageException nspe) {
193                 page = WikiPageLocalServiceUtil.addPage(
194                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
195                     null, true, serviceContext);
196             }
197 
198             WikiPageLocalServiceUtil.updatePage(
199                 authorUserId, node.getNodeId(), title, page.getVersion(),
200                 content, summary, true, "creole", parentTitle, redirectTitle,
201                 serviceContext);
202         }
203         catch (Exception e) {
204             throw new PortalException("Error importing page " + title, e);
205         }
206     }
207 
208     protected boolean isSpecialMediaWikiPage(
209         String title, List<String> specialNamespaces) {
210 
211         for (String namespace: specialNamespaces) {
212             if (title.startsWith(namespace + StringPool.COLON)) {
213                 return true;
214             }
215         }
216 
217         return false;
218     }
219 
220     protected boolean isValidImage(String[] paths, byte[] bytes) {
221         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
222             return false;
223         }
224 
225         if ((paths.length > 1) &&
226             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
227 
228             return false;
229         }
230 
231         String fileName = paths[paths.length - 1];
232 
233         try {
234             DLLocalServiceUtil.validate(fileName, bytes);
235         }
236         catch (PortalException pe) {
237             return false;
238         }
239         catch (SystemException se) {
240             return false;
241         }
242 
243         return true;
244     }
245 
246     protected void moveFrontPage(
247         long userId, WikiNode node, Map<String, String[]> options) {
248 
249         String frontPageTitle = MapUtil.getString(
250             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
251 
252         if (Validator.isNotNull(frontPageTitle)) {
253             frontPageTitle = normalizeTitle(frontPageTitle);
254 
255             try {
256                 if (WikiPageLocalServiceUtil.getPagesCount(
257                         node.getNodeId(), frontPageTitle, true) > 0) {
258 
259                     ServiceContext serviceContext = new ServiceContext();
260 
261                     serviceContext.setAddCommunityPermissions(true);
262                     serviceContext.setAddGuestPermissions(true);
263 
264                     WikiPageLocalServiceUtil.movePage(
265                         userId, node.getNodeId(), frontPageTitle,
266                         WikiPageImpl.FRONT_PAGE, false, serviceContext);
267 
268                 }
269             }
270             catch (Exception e) {
271                 if (_log.isWarnEnabled()) {
272                     StringBuilder sb = new StringBuilder();
273 
274                     sb.append("Could not move ");
275                     sb.append(WikiPageImpl.FRONT_PAGE);
276                     sb.append(" to the title provided: ");
277                     sb.append(frontPageTitle);
278 
279                     _log.warn(sb.toString(), e);
280                 }
281             }
282 
283         }
284 
285     }
286 
287     protected String normalize(String categoryName, int length) {
288         categoryName = TagsUtil.toWord(categoryName.trim());
289 
290         return StringUtil.shorten(categoryName, length);
291     }
292 
293     protected String normalizeDescription(String description) {
294         description = description.replaceAll(
295             _categoriesPattern.pattern(), StringPool.BLANK);
296 
297         return normalize(description, 300);
298     }
299 
300     protected String normalizeTitle(String title) {
301         title = title.replaceAll(
302             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
303 
304         return StringUtil.shorten(title, 75);
305     }
306 
307     protected void processImages(long userId, WikiNode node, File imagesFile)
308         throws Exception {
309 
310         if ((imagesFile == null) || (!imagesFile.exists())) {
311             return;
312         }
313 
314         ProgressTracker progressTracker =
315             ProgressTrackerThreadLocal.getProgressTracker();
316 
317         int count = 0;
318 
319         ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
320 
321         List<String> entries = zipReader.getEntries();
322 
323         int total = entries.size();
324 
325         if (total > 0) {
326             try {
327                 WikiPageLocalServiceUtil.getPage(
328                     node.getNodeId(), SHARED_IMAGES_TITLE);
329             }
330             catch (NoSuchPageException nspe) {
331                 ServiceContext serviceContext = new ServiceContext();
332 
333                 serviceContext.setAddCommunityPermissions(true);
334                 serviceContext.setAddGuestPermissions(true);
335 
336                 WikiPageLocalServiceUtil.addPage(
337                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
338                     SHARED_IMAGES_CONTENT, null, true, serviceContext);
339             }
340         }
341 
342         List<ObjectValuePair<String, byte[]>> attachments =
343             new ArrayList<ObjectValuePair<String, byte[]>>();
344 
345         int percentage = 50;
346 
347         for (int i = 0; i < entries.size(); i++) {
348             String entry = entries.get(i);
349 
350             String key = entry;
351             byte[] value = zipReader.getEntryAsByteArray(entry);
352 
353             if (key.endsWith(StringPool.SLASH)) {
354                 if (_log.isInfoEnabled()) {
355                     _log.info("Ignoring " + key);
356                 }
357 
358                 continue;
359             }
360 
361             String[] paths = StringUtil.split(key, StringPool.SLASH);
362 
363             if (!isValidImage(paths, value)) {
364                 if (_log.isInfoEnabled()) {
365                     _log.info("Ignoring " + key);
366                 }
367 
368                 continue;
369             }
370 
371             String fileName = paths[paths.length - 1].toLowerCase();
372 
373             attachments.add(
374                 new ObjectValuePair<String, byte[]>(fileName, value));
375 
376             count++;
377 
378             if ((i % 5) == 0) {
379                 WikiPageLocalServiceUtil.addPageAttachments(
380                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
381 
382                 attachments.clear();
383 
384                 percentage = Math.min(50 + (i * 50) / total, 99);
385 
386                 progressTracker.updateProgress(percentage);
387             }
388         }
389 
390         if (!attachments.isEmpty()) {
391             WikiPageLocalServiceUtil.addPageAttachments(
392                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
393         }
394 
395         zipReader.close();
396 
397         if (_log.isInfoEnabled()) {
398             _log.info("Imported " + count + " images into " + node.getName());
399         }
400     }
401 
402     protected void processRegularPages(
403         long userId, WikiNode node, Element root,
404         List<String> specialNamespaces, Map<String, String> usersMap,
405         File imagesFile, Map<String, String[]> options) {
406 
407         boolean importLatestVersion = MapUtil.getBoolean(
408             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
409 
410         ProgressTracker progressTracker =
411             ProgressTrackerThreadLocal.getProgressTracker();
412 
413         int count = 0;
414 
415         List<Element> pages = root.elements("page");
416 
417         int total = pages.size();
418 
419         Iterator<Element> itr = root.elements("page").iterator();
420 
421         int percentage = 10;
422         int maxPercentage = 50;
423 
424         if ((imagesFile == null) || (!imagesFile.exists())) {
425             maxPercentage = 99;
426         }
427 
428         int percentageRange = maxPercentage - percentage;
429 
430         for (int i = 0; itr.hasNext(); i++) {
431             Element pageEl = itr.next();
432 
433             String title = pageEl.elementText("title");
434 
435             title = normalizeTitle(title);
436 
437             percentage = Math.min(
438                 10 + (i * percentageRange) / total, maxPercentage);
439 
440             progressTracker.updateProgress(percentage);
441 
442             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
443                 continue;
444             }
445 
446             List<Element> revisionEls = pageEl.elements("revision");
447 
448             if (importLatestVersion) {
449                 Element lastRevisionEl = revisionEls.get(
450                     revisionEls.size() - 1);
451 
452                 revisionEls = new ArrayList<Element>();
453 
454                 revisionEls.add(lastRevisionEl);
455             }
456 
457             for (Element curRevisionEl : revisionEls) {
458                 String author = curRevisionEl.element(
459                     "contributor").elementText("username");
460                 String content = curRevisionEl.elementText("text");
461                 String summary = curRevisionEl.elementText("comment");
462 
463                 try {
464                     importPage(
465                         userId, author, node, title, content, summary,
466                         usersMap);
467                 }
468                 catch (Exception e) {
469                     if (_log.isWarnEnabled()) {
470                         StringBuilder sb = new StringBuilder();
471 
472                         sb.append("Page with title ");
473                         sb.append(title);
474                         sb.append(" could not be imported");
475 
476                         _log.warn(sb.toString(), e);
477                     }
478                 }
479             }
480 
481             count++;
482         }
483 
484         if (_log.isInfoEnabled()) {
485             _log.info("Imported " + count + " pages into " + node.getName());
486         }
487     }
488 
489     protected void processSpecialPages(
490             long userId, WikiNode node, Element root,
491             List<String> specialNamespaces)
492         throws PortalException {
493 
494         ProgressTracker progressTracker =
495             ProgressTrackerThreadLocal.getProgressTracker();
496 
497         List<Element> pages = root.elements("page");
498 
499         int total = pages.size();
500 
501         Iterator<Element> itr = pages.iterator();
502 
503         for (int i = 0; itr.hasNext(); i++) {
504             Element page = itr.next();
505 
506             String title = page.elementText("title");
507 
508             if (!title.startsWith("Category:")) {
509                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
510                     root.remove(page);
511                 }
512 
513                 continue;
514             }
515 
516             String categoryName = title.substring("Category:".length());
517 
518             categoryName = normalize(categoryName, 75);
519 
520             String description = page.element("revision").elementText("text");
521 
522             description = normalizeDescription(description);
523 
524             try {
525                 TagsEntry tagsEntry = null;
526 
527                 try {
528                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
529                         node.getCompanyId(), categoryName);
530                 }
531                 catch (NoSuchEntryException nsee) {
532                     ServiceContext serviceContext = new ServiceContext();
533 
534                     serviceContext.setAddCommunityPermissions(true);
535                     serviceContext.setAddGuestPermissions(true);
536                     serviceContext.setScopeGroupId(node.getGroupId());
537 
538                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
539                         userId, null, categoryName, null, null, serviceContext);
540                 }
541 
542                 if (Validator.isNotNull(description)) {
543                     TagsPropertyLocalServiceUtil.addProperty(
544                         userId, tagsEntry.getEntryId(), "description",
545                         description);
546                 }
547             }
548             catch (SystemException se) {
549                  _log.error(se, se);
550             }
551 
552             if ((i % 5) == 0) {
553                 progressTracker.updateProgress((i * 10) / total);
554             }
555         }
556     }
557 
558     protected String readParentTitle(String content) {
559         Matcher matcher = _parentPattern.matcher(content);
560 
561         String redirectTitle = StringPool.BLANK;
562 
563         if (matcher.find()) {
564             redirectTitle = matcher.group(1);
565 
566             redirectTitle = normalizeTitle(redirectTitle);
567 
568             redirectTitle += " (disambiguation)";
569         }
570 
571         return redirectTitle;
572     }
573 
574     protected String readRedirectTitle(String content) {
575         Matcher matcher = _redirectPattern.matcher(content);
576 
577         String redirectTitle = StringPool.BLANK;
578 
579         if (matcher.find()) {
580             redirectTitle = matcher.group(1);
581 
582             redirectTitle = normalizeTitle(redirectTitle);
583         }
584 
585         return redirectTitle;
586     }
587 
588     protected List<String> readSpecialNamespaces(Element root)
589         throws ImportFilesException {
590 
591         List<String> namespaces = new ArrayList<String>();
592 
593         Element siteinfoEl = root.element("siteinfo");
594 
595         if (siteinfoEl == null) {
596             throw new ImportFilesException("Invalid pages XML file");
597         }
598 
599         Iterator<Element> itr = siteinfoEl.element(
600             "namespaces").elements("namespace").iterator();
601 
602         while (itr.hasNext()) {
603             Element namespace = itr.next();
604 
605             if (!namespace.attribute("key").getData().equals("0")) {
606                 namespaces.add(namespace.getText());
607             }
608         }
609 
610         return namespaces;
611     }
612 
613     protected String[] readTagsEntries(
614             long userId, WikiNode node, String content)
615         throws PortalException, SystemException {
616 
617         Matcher matcher = _categoriesPattern.matcher(content);
618 
619         List<String> tagsEntries = new ArrayList<String>();
620 
621         while (matcher.find()) {
622             String categoryName = matcher.group(1);
623 
624             categoryName = normalize(categoryName, 75);
625 
626             TagsEntry tagsEntry = null;
627 
628             try {
629                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
630                     node.getCompanyId(), categoryName);
631             }
632             catch (NoSuchEntryException nsee) {
633                 ServiceContext serviceContext = new ServiceContext();
634 
635                 serviceContext.setAddCommunityPermissions(true);
636                 serviceContext.setAddGuestPermissions(true);
637                 serviceContext.setScopeGroupId(node.getGroupId());
638 
639                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
640                     userId, null, categoryName, null, null, serviceContext);
641             }
642 
643             tagsEntries.add(tagsEntry.getName());
644         }
645 
646         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
647             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
648         }
649 
650         return tagsEntries.toArray(new String[tagsEntries.size()]);
651     }
652 
653     protected Map<String, String> readUsersFile(File usersFile)
654         throws IOException {
655 
656         if ((usersFile == null) || (!usersFile.exists())) {
657             return Collections.EMPTY_MAP;
658         }
659 
660         Map<String, String> usersMap = new HashMap<String, String>();
661 
662         UnsyncBufferedReader unsyncBufferedReader =
663             new UnsyncBufferedReader(new FileReader(usersFile));
664 
665         String line = unsyncBufferedReader.readLine();
666 
667         while (line != null) {
668             String[] array = StringUtil.split(line);
669 
670             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
671                 (Validator.isNotNull(array[1]))) {
672 
673                 usersMap.put(array[0], array[1]);
674             }
675             else {
676                 if (_log.isInfoEnabled()) {
677                     _log.info(
678                         "Ignoring line " + line +
679                             " because it does not contain exactly 2 columns");
680                 }
681             }
682 
683             line = unsyncBufferedReader.readLine();
684         }
685 
686         return usersMap;
687     }
688 
689     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
690         "thumb", "temp", "archive"
691     };
692 
693     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
694 
695     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
696 
697     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
698 
699     private static Pattern _categoriesPattern = Pattern.compile(
700         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
701     private static Pattern _parentPattern = Pattern.compile(
702         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
703     private static Pattern _redirectPattern = Pattern.compile(
704         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
705 
706     private MediaWikiToCreoleTranslator _translator =
707         new MediaWikiToCreoleTranslator();
708 
709 }