1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
30 import com.liferay.portal.kernel.log.Log;
31 import com.liferay.portal.kernel.log.LogFactoryUtil;
32 import com.liferay.portal.kernel.util.ArrayUtil;
33 import com.liferay.portal.kernel.util.MapUtil;
34 import com.liferay.portal.kernel.util.ObjectValuePair;
35 import com.liferay.portal.kernel.util.ProgressTracker;
36 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
37 import com.liferay.portal.kernel.util.StringPool;
38 import com.liferay.portal.kernel.util.StringUtil;
39 import com.liferay.portal.kernel.util.Validator;
40 import com.liferay.portal.kernel.xml.Document;
41 import com.liferay.portal.kernel.xml.DocumentException;
42 import com.liferay.portal.kernel.xml.Element;
43 import com.liferay.portal.kernel.xml.SAXReaderUtil;
44 import com.liferay.portal.kernel.zip.ZipReader;
45 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
46 import com.liferay.portal.model.User;
47 import com.liferay.portal.service.ServiceContext;
48 import com.liferay.portal.service.UserLocalServiceUtil;
49 import com.liferay.portal.util.PropsValues;
50 import com.liferay.portlet.tags.NoSuchEntryException;
51 import com.liferay.portlet.tags.model.TagsEntry;
52 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
53 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
54 import com.liferay.portlet.tags.util.TagsUtil;
55 import com.liferay.portlet.wiki.ImportFilesException;
56 import com.liferay.portlet.wiki.NoSuchPageException;
57 import com.liferay.portlet.wiki.importers.WikiImporter;
58 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
59 import com.liferay.portlet.wiki.model.WikiNode;
60 import com.liferay.portlet.wiki.model.WikiPage;
61 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
62 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
63 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
64
65 import java.io.File;
66 import java.io.FileReader;
67 import java.io.IOException;
68
69 import java.util.ArrayList;
70 import java.util.Collections;
71 import java.util.HashMap;
72 import java.util.Iterator;
73 import java.util.List;
74 import java.util.Map;
75 import java.util.regex.Matcher;
76 import java.util.regex.Pattern;
77
78
84 public class MediaWikiImporter implements WikiImporter {
85
86 public static final String SHARED_IMAGES_CONTENT = "See attachments";
87
88 public static final String SHARED_IMAGES_TITLE = "SharedImages";
89
90 public void importPages(
91 long userId, WikiNode node, File[] files,
92 Map<String, String[]> options)
93 throws PortalException {
94
95 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96 throw new PortalException("The pages file is mandatory");
97 }
98
99 File pagesFile = files[0];
100 File usersFile = files[1];
101 File imagesFile = files[2];
102
103 try {
104 Document doc = SAXReaderUtil.read(pagesFile);
105
106 Map<String, String> usersMap = readUsersFile(usersFile);
107
108 Element root = doc.getRootElement();
109
110 List<String> specialNamespaces = readSpecialNamespaces(root);
111
112 processSpecialPages(userId, node, root, specialNamespaces);
113 processRegularPages(
114 userId, node, root, specialNamespaces, usersMap, imagesFile,
115 options);
116 processImages(userId, node, imagesFile);
117
118 moveFrontPage(userId, node, options);
119 }
120 catch (DocumentException de) {
121 throw new ImportFilesException("Invalid XML file provided");
122 }
123 catch (IOException de) {
124 throw new ImportFilesException("Error reading the files provided");
125 }
126 catch (PortalException e) {
127 throw e;
128 }
129 catch (Exception e) {
130 throw new PortalException(e);
131 }
132 }
133
134 protected long getUserId(
135 long userId, WikiNode node, String author,
136 Map<String, String> usersMap)
137 throws PortalException, SystemException {
138
139 User user = null;
140
141 String emailAddress = usersMap.get(author);
142
143 try {
144 if (Validator.isNull(emailAddress)) {
145 user = UserLocalServiceUtil.getUserByScreenName(
146 node.getCompanyId(), author.toLowerCase());
147 }
148 else {
149 user = UserLocalServiceUtil.getUserByEmailAddress(
150 node.getCompanyId(), emailAddress);
151 }
152 }
153 catch (NoSuchUserException nsue) {
154 user = UserLocalServiceUtil.getUserById(userId);
155 }
156
157 return user.getUserId();
158 }
159
160 protected void importPage(
161 long userId, String author, WikiNode node, String title,
162 String content, String summary, Map<String, String> usersMap)
163 throws PortalException {
164
165 try {
166 long authorUserId = getUserId(userId, node, author, usersMap);
167 String parentTitle = readParentTitle(content);
168 String redirectTitle = readRedirectTitle(content);
169
170 ServiceContext serviceContext = new ServiceContext();
171
172 serviceContext.setAddCommunityPermissions(true);
173 serviceContext.setAddGuestPermissions(true);
174 serviceContext.setTagsEntries(
175 readTagsEntries(userId, node, content));
176
177 if (Validator.isNull(redirectTitle)) {
178 content = _translator.translate(content);
179 }
180 else {
181 content =
182 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
183 StringPool.DOUBLE_CLOSE_BRACKET;
184 }
185
186 WikiPage page = null;
187
188 try {
189 page = WikiPageLocalServiceUtil.getPage(
190 node.getNodeId(), title);
191 }
192 catch (NoSuchPageException nspe) {
193 page = WikiPageLocalServiceUtil.addPage(
194 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
195 null, true, serviceContext);
196 }
197
198 WikiPageLocalServiceUtil.updatePage(
199 authorUserId, node.getNodeId(), title, page.getVersion(),
200 content, summary, true, "creole", parentTitle, redirectTitle,
201 serviceContext);
202 }
203 catch (Exception e) {
204 throw new PortalException("Error importing page " + title, e);
205 }
206 }
207
208 protected boolean isSpecialMediaWikiPage(
209 String title, List<String> specialNamespaces) {
210
211 for (String namespace: specialNamespaces) {
212 if (title.startsWith(namespace + StringPool.COLON)) {
213 return true;
214 }
215 }
216
217 return false;
218 }
219
220 protected boolean isValidImage(String[] paths, byte[] bytes) {
221 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
222 return false;
223 }
224
225 if ((paths.length > 1) &&
226 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
227
228 return false;
229 }
230
231 String fileName = paths[paths.length - 1];
232
233 try {
234 DLLocalServiceUtil.validate(fileName, bytes);
235 }
236 catch (PortalException pe) {
237 return false;
238 }
239 catch (SystemException se) {
240 return false;
241 }
242
243 return true;
244 }
245
246 protected void moveFrontPage(
247 long userId, WikiNode node, Map<String, String[]> options) {
248
249 String frontPageTitle = MapUtil.getString(
250 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
251
252 if (Validator.isNotNull(frontPageTitle)) {
253 frontPageTitle = normalizeTitle(frontPageTitle);
254
255 try {
256 if (WikiPageLocalServiceUtil.getPagesCount(
257 node.getNodeId(), frontPageTitle, true) > 0) {
258
259 ServiceContext serviceContext = new ServiceContext();
260
261 serviceContext.setAddCommunityPermissions(true);
262 serviceContext.setAddGuestPermissions(true);
263
264 WikiPageLocalServiceUtil.movePage(
265 userId, node.getNodeId(), frontPageTitle,
266 WikiPageImpl.FRONT_PAGE, false, serviceContext);
267
268 }
269 }
270 catch (Exception e) {
271 if (_log.isWarnEnabled()) {
272 StringBuilder sb = new StringBuilder();
273
274 sb.append("Could not move ");
275 sb.append(WikiPageImpl.FRONT_PAGE);
276 sb.append(" to the title provided: ");
277 sb.append(frontPageTitle);
278
279 _log.warn(sb.toString(), e);
280 }
281 }
282
283 }
284
285 }
286
287 protected String normalize(String categoryName, int length) {
288 categoryName = TagsUtil.toWord(categoryName.trim());
289
290 return StringUtil.shorten(categoryName, length);
291 }
292
293 protected String normalizeDescription(String description) {
294 description = description.replaceAll(
295 _categoriesPattern.pattern(), StringPool.BLANK);
296
297 return normalize(description, 300);
298 }
299
300 protected String normalizeTitle(String title) {
301 title = title.replaceAll(
302 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
303
304 return StringUtil.shorten(title, 75);
305 }
306
307 protected void processImages(long userId, WikiNode node, File imagesFile)
308 throws Exception {
309
310 if ((imagesFile == null) || (!imagesFile.exists())) {
311 return;
312 }
313
314 ProgressTracker progressTracker =
315 ProgressTrackerThreadLocal.getProgressTracker();
316
317 int count = 0;
318
319 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
320
321 List<String> entries = zipReader.getEntries();
322
323 int total = entries.size();
324
325 if (total > 0) {
326 try {
327 WikiPageLocalServiceUtil.getPage(
328 node.getNodeId(), SHARED_IMAGES_TITLE);
329 }
330 catch (NoSuchPageException nspe) {
331 ServiceContext serviceContext = new ServiceContext();
332
333 serviceContext.setAddCommunityPermissions(true);
334 serviceContext.setAddGuestPermissions(true);
335
336 WikiPageLocalServiceUtil.addPage(
337 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
338 SHARED_IMAGES_CONTENT, null, true, serviceContext);
339 }
340 }
341
342 List<ObjectValuePair<String, byte[]>> attachments =
343 new ArrayList<ObjectValuePair<String, byte[]>>();
344
345 int percentage = 50;
346
347 for (int i = 0; i < entries.size(); i++) {
348 String entry = entries.get(i);
349
350 String key = entry;
351 byte[] value = zipReader.getEntryAsByteArray(entry);
352
353 if (key.endsWith(StringPool.SLASH)) {
354 if (_log.isInfoEnabled()) {
355 _log.info("Ignoring " + key);
356 }
357
358 continue;
359 }
360
361 String[] paths = StringUtil.split(key, StringPool.SLASH);
362
363 if (!isValidImage(paths, value)) {
364 if (_log.isInfoEnabled()) {
365 _log.info("Ignoring " + key);
366 }
367
368 continue;
369 }
370
371 String fileName = paths[paths.length - 1].toLowerCase();
372
373 attachments.add(
374 new ObjectValuePair<String, byte[]>(fileName, value));
375
376 count++;
377
378 if ((i % 5) == 0) {
379 WikiPageLocalServiceUtil.addPageAttachments(
380 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
381
382 attachments.clear();
383
384 percentage = Math.min(50 + (i * 50) / total, 99);
385
386 progressTracker.updateProgress(percentage);
387 }
388 }
389
390 if (!attachments.isEmpty()) {
391 WikiPageLocalServiceUtil.addPageAttachments(
392 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
393 }
394
395 zipReader.close();
396
397 if (_log.isInfoEnabled()) {
398 _log.info("Imported " + count + " images into " + node.getName());
399 }
400 }
401
402 protected void processRegularPages(
403 long userId, WikiNode node, Element root,
404 List<String> specialNamespaces, Map<String, String> usersMap,
405 File imagesFile, Map<String, String[]> options) {
406
407 boolean importLatestVersion = MapUtil.getBoolean(
408 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
409
410 ProgressTracker progressTracker =
411 ProgressTrackerThreadLocal.getProgressTracker();
412
413 int count = 0;
414
415 List<Element> pages = root.elements("page");
416
417 int total = pages.size();
418
419 Iterator<Element> itr = root.elements("page").iterator();
420
421 int percentage = 10;
422 int maxPercentage = 50;
423
424 if ((imagesFile == null) || (!imagesFile.exists())) {
425 maxPercentage = 99;
426 }
427
428 int percentageRange = maxPercentage - percentage;
429
430 for (int i = 0; itr.hasNext(); i++) {
431 Element pageEl = itr.next();
432
433 String title = pageEl.elementText("title");
434
435 title = normalizeTitle(title);
436
437 percentage = Math.min(
438 10 + (i * percentageRange) / total, maxPercentage);
439
440 progressTracker.updateProgress(percentage);
441
442 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
443 continue;
444 }
445
446 List<Element> revisionEls = pageEl.elements("revision");
447
448 if (importLatestVersion) {
449 Element lastRevisionEl = revisionEls.get(
450 revisionEls.size() - 1);
451
452 revisionEls = new ArrayList<Element>();
453
454 revisionEls.add(lastRevisionEl);
455 }
456
457 for (Element curRevisionEl : revisionEls) {
458 String author = curRevisionEl.element(
459 "contributor").elementText("username");
460 String content = curRevisionEl.elementText("text");
461 String summary = curRevisionEl.elementText("comment");
462
463 try {
464 importPage(
465 userId, author, node, title, content, summary,
466 usersMap);
467 }
468 catch (Exception e) {
469 if (_log.isWarnEnabled()) {
470 StringBuilder sb = new StringBuilder();
471
472 sb.append("Page with title ");
473 sb.append(title);
474 sb.append(" could not be imported");
475
476 _log.warn(sb.toString(), e);
477 }
478 }
479 }
480
481 count++;
482 }
483
484 if (_log.isInfoEnabled()) {
485 _log.info("Imported " + count + " pages into " + node.getName());
486 }
487 }
488
489 protected void processSpecialPages(
490 long userId, WikiNode node, Element root,
491 List<String> specialNamespaces)
492 throws PortalException {
493
494 ProgressTracker progressTracker =
495 ProgressTrackerThreadLocal.getProgressTracker();
496
497 List<Element> pages = root.elements("page");
498
499 int total = pages.size();
500
501 Iterator<Element> itr = pages.iterator();
502
503 for (int i = 0; itr.hasNext(); i++) {
504 Element page = itr.next();
505
506 String title = page.elementText("title");
507
508 if (!title.startsWith("Category:")) {
509 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
510 root.remove(page);
511 }
512
513 continue;
514 }
515
516 String categoryName = title.substring("Category:".length());
517
518 categoryName = normalize(categoryName, 75);
519
520 String description = page.element("revision").elementText("text");
521
522 description = normalizeDescription(description);
523
524 try {
525 TagsEntry tagsEntry = null;
526
527 try {
528 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
529 node.getCompanyId(), categoryName);
530 }
531 catch (NoSuchEntryException nsee) {
532 ServiceContext serviceContext = new ServiceContext();
533
534 serviceContext.setAddCommunityPermissions(true);
535 serviceContext.setAddGuestPermissions(true);
536 serviceContext.setScopeGroupId(node.getGroupId());
537
538 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
539 userId, null, categoryName, null, null, serviceContext);
540 }
541
542 if (Validator.isNotNull(description)) {
543 TagsPropertyLocalServiceUtil.addProperty(
544 userId, tagsEntry.getEntryId(), "description",
545 description);
546 }
547 }
548 catch (SystemException se) {
549 _log.error(se, se);
550 }
551
552 if ((i % 5) == 0) {
553 progressTracker.updateProgress((i * 10) / total);
554 }
555 }
556 }
557
558 protected String readParentTitle(String content) {
559 Matcher matcher = _parentPattern.matcher(content);
560
561 String redirectTitle = StringPool.BLANK;
562
563 if (matcher.find()) {
564 redirectTitle = matcher.group(1);
565
566 redirectTitle = normalizeTitle(redirectTitle);
567
568 redirectTitle += " (disambiguation)";
569 }
570
571 return redirectTitle;
572 }
573
574 protected String readRedirectTitle(String content) {
575 Matcher matcher = _redirectPattern.matcher(content);
576
577 String redirectTitle = StringPool.BLANK;
578
579 if (matcher.find()) {
580 redirectTitle = matcher.group(1);
581
582 redirectTitle = normalizeTitle(redirectTitle);
583 }
584
585 return redirectTitle;
586 }
587
588 protected List<String> readSpecialNamespaces(Element root)
589 throws ImportFilesException {
590
591 List<String> namespaces = new ArrayList<String>();
592
593 Element siteinfoEl = root.element("siteinfo");
594
595 if (siteinfoEl == null) {
596 throw new ImportFilesException("Invalid pages XML file");
597 }
598
599 Iterator<Element> itr = siteinfoEl.element(
600 "namespaces").elements("namespace").iterator();
601
602 while (itr.hasNext()) {
603 Element namespace = itr.next();
604
605 if (!namespace.attribute("key").getData().equals("0")) {
606 namespaces.add(namespace.getText());
607 }
608 }
609
610 return namespaces;
611 }
612
613 protected String[] readTagsEntries(
614 long userId, WikiNode node, String content)
615 throws PortalException, SystemException {
616
617 Matcher matcher = _categoriesPattern.matcher(content);
618
619 List<String> tagsEntries = new ArrayList<String>();
620
621 while (matcher.find()) {
622 String categoryName = matcher.group(1);
623
624 categoryName = normalize(categoryName, 75);
625
626 TagsEntry tagsEntry = null;
627
628 try {
629 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
630 node.getCompanyId(), categoryName);
631 }
632 catch (NoSuchEntryException nsee) {
633 ServiceContext serviceContext = new ServiceContext();
634
635 serviceContext.setAddCommunityPermissions(true);
636 serviceContext.setAddGuestPermissions(true);
637 serviceContext.setScopeGroupId(node.getGroupId());
638
639 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
640 userId, null, categoryName, null, null, serviceContext);
641 }
642
643 tagsEntries.add(tagsEntry.getName());
644 }
645
646 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
647 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
648 }
649
650 return tagsEntries.toArray(new String[tagsEntries.size()]);
651 }
652
653 protected Map<String, String> readUsersFile(File usersFile)
654 throws IOException {
655
656 if ((usersFile == null) || (!usersFile.exists())) {
657 return Collections.EMPTY_MAP;
658 }
659
660 Map<String, String> usersMap = new HashMap<String, String>();
661
662 UnsyncBufferedReader unsyncBufferedReader =
663 new UnsyncBufferedReader(new FileReader(usersFile));
664
665 String line = unsyncBufferedReader.readLine();
666
667 while (line != null) {
668 String[] array = StringUtil.split(line);
669
670 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
671 (Validator.isNotNull(array[1]))) {
672
673 usersMap.put(array[0], array[1]);
674 }
675 else {
676 if (_log.isInfoEnabled()) {
677 _log.info(
678 "Ignoring line " + line +
679 " because it does not contain exactly 2 columns");
680 }
681 }
682
683 line = unsyncBufferedReader.readLine();
684 }
685
686 return usersMap;
687 }
688
689 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
690 "thumb", "temp", "archive"
691 };
692
693 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
694
695 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
696
697 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
698
699 private static Pattern _categoriesPattern = Pattern.compile(
700 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
701 private static Pattern _parentPattern = Pattern.compile(
702 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
703 private static Pattern _redirectPattern = Pattern.compile(
704 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
705
706 private MediaWikiToCreoleTranslator _translator =
707 new MediaWikiToCreoleTranslator();
708
709 }