1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.MapUtil;
33 import com.liferay.portal.kernel.util.ObjectValuePair;
34 import com.liferay.portal.kernel.util.ProgressTracker;
35 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
36 import com.liferay.portal.kernel.util.StringPool;
37 import com.liferay.portal.kernel.util.StringUtil;
38 import com.liferay.portal.kernel.util.Validator;
39 import com.liferay.portal.kernel.xml.Document;
40 import com.liferay.portal.kernel.xml.DocumentException;
41 import com.liferay.portal.kernel.xml.Element;
42 import com.liferay.portal.kernel.xml.SAXReaderUtil;
43 import com.liferay.portal.kernel.zip.ZipReader;
44 import com.liferay.portal.model.User;
45 import com.liferay.portal.service.UserLocalServiceUtil;
46 import com.liferay.portal.util.PropsValues;
47 import com.liferay.portlet.tags.NoSuchEntryException;
48 import com.liferay.portlet.tags.model.TagsEntry;
49 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51 import com.liferay.portlet.tags.util.TagsUtil;
52 import com.liferay.portlet.wiki.ImportFilesException;
53 import com.liferay.portlet.wiki.NoSuchPageException;
54 import com.liferay.portlet.wiki.importers.WikiImporter;
55 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56 import com.liferay.portlet.wiki.model.WikiNode;
57 import com.liferay.portlet.wiki.model.WikiPage;
58 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61
62 import java.io.BufferedReader;
63 import java.io.File;
64 import java.io.FileReader;
65 import java.io.IOException;
66
67 import java.util.ArrayList;
68 import java.util.Collections;
69 import java.util.HashMap;
70 import java.util.Iterator;
71 import java.util.List;
72 import java.util.Map;
73 import java.util.regex.Matcher;
74 import java.util.regex.Pattern;
75
76
83 public class MediaWikiImporter implements WikiImporter {
84
85 public static final String SHARED_IMAGES_CONTENT = "See attachments";
86
87 public static final String SHARED_IMAGES_TITLE = "SharedImages";
88
89 public void importPages(
90 long userId, WikiNode node, File[] files,
91 Map<String, String[]> options)
92 throws PortalException {
93
94 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
95 throw new PortalException("The pages file is mandatory");
96 }
97
98 File pagesFile = files[0];
99 File usersFile = files[1];
100 File imagesFile = files[2];
101
102 try {
103 Document doc = SAXReaderUtil.read(pagesFile);
104
105 Map<String, String> usersMap = readUsersFile(usersFile);
106
107 Element root = doc.getRootElement();
108
109 List<String> specialNamespaces = readSpecialNamespaces(root);
110
111 processSpecialPages(userId, node, root, specialNamespaces);
112 processRegularPages(
113 userId, node, root, specialNamespaces, usersMap, imagesFile,
114 options);
115 processImages(userId, node, imagesFile);
116
117 moveFrontPage(userId, node, options);
118 }
119 catch (DocumentException de) {
120 throw new ImportFilesException("Invalid XML file provided");
121 }
122 catch (IOException de) {
123 throw new ImportFilesException("Error reading the files provided");
124 }
125 catch (PortalException e) {
126 throw e;
127 }
128 catch (Exception e) {
129 throw new PortalException(e);
130 }
131 }
132
133 protected long getUserId(
134 long userId, WikiNode node, String author,
135 Map<String, String> usersMap)
136 throws PortalException, SystemException {
137
138 User user = null;
139
140 String emailAddress = usersMap.get(author);
141
142 try {
143 if (Validator.isNull(emailAddress)) {
144 user = UserLocalServiceUtil.getUserByScreenName(
145 node.getCompanyId(), author.toLowerCase());
146 }
147 else {
148 user = UserLocalServiceUtil.getUserByEmailAddress(
149 node.getCompanyId(), emailAddress);
150 }
151 }
152 catch (NoSuchUserException nsue) {
153 user = UserLocalServiceUtil.getUserById(userId);
154 }
155
156 return user.getUserId();
157 }
158
159 protected void importPage(
160 long userId, String author, WikiNode node, String title,
161 String content, String summary, Map<String, String> usersMap)
162 throws PortalException {
163
164 try {
165 long authorUserId = getUserId(userId, node, author, usersMap);
166 String parentTitle = readParentTitle(content);
167 String redirectTitle = readRedirectTitle(content);
168 String[] tagsEntries = readTagsEntries(userId, node, content);
169
170 if (Validator.isNull(redirectTitle)) {
171 content = _translator.translate(content);
172 }
173 else {
174 content =
175 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
176 StringPool.DOUBLE_CLOSE_BRACKET;
177 }
178
179 WikiPage page = null;
180
181 try {
182 page = WikiPageLocalServiceUtil.getPage(
183 node.getNodeId(), title);
184 }
185 catch (NoSuchPageException nspe) {
186 page = WikiPageLocalServiceUtil.addPage(
187 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
188 null, true, null, null);
189 }
190
191 WikiPageLocalServiceUtil.updatePage(
192 authorUserId, node.getNodeId(), title, page.getVersion(),
193 content, summary, true, "creole", parentTitle,
194 redirectTitle, tagsEntries, null, null);
195 }
196 catch (Exception e) {
197 throw new PortalException("Error importing page " + title, e);
198 }
199 }
200
201 protected boolean isSpecialMediaWikiPage(
202 String title, List<String> specialNamespaces) {
203
204 for (String namespace: specialNamespaces) {
205 if (title.startsWith(namespace + StringPool.COLON)) {
206 return true;
207 }
208 }
209
210 return false;
211 }
212
213 protected boolean isValidImage(String[] paths, byte[] bytes) {
214 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
215 return false;
216 }
217
218 if ((paths.length > 1) &&
219 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
220
221 return false;
222 }
223
224 String fileName = paths[paths.length - 1];
225
226 try {
227 DLLocalServiceUtil.validate(fileName, bytes);
228 }
229 catch (PortalException pe) {
230 return false;
231 }
232
233 return true;
234 }
235
236 protected void moveFrontPage(
237 long userId, WikiNode node, Map<String, String[]> options) {
238
239 String frontPageTitle = MapUtil.getString(
240 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
241
242 if (Validator.isNotNull(frontPageTitle)) {
243 frontPageTitle = normalizeTitle(frontPageTitle);
244
245 try {
246 if (WikiPageLocalServiceUtil.getPagesCount(
247 node.getNodeId(), frontPageTitle, true) > 0) {
248
249 WikiPageLocalServiceUtil.movePage(
250 userId, node.getNodeId(), frontPageTitle,
251 WikiPageImpl.FRONT_PAGE, false, null, null);
252
253 }
254 }
255 catch (Exception e) {
256 if (_log.isWarnEnabled()) {
257 StringBuilder sb = new StringBuilder();
258
259 sb.append("Could not move ");
260 sb.append(WikiPageImpl.FRONT_PAGE);
261 sb.append(" to the title provided: ");
262 sb.append(frontPageTitle);
263
264 _log.warn(sb.toString(), e);
265 }
266 }
267
268 }
269
270 }
271
272 protected String normalize(String categoryName, int length) {
273 categoryName = TagsUtil.toWord(categoryName.trim());
274
275 return StringUtil.shorten(categoryName, length);
276 }
277
278 protected String normalizeDescription(String description) {
279 description = description.replaceAll(
280 _categoriesPattern.pattern(), StringPool.BLANK);
281
282 return normalize(description, 300);
283 }
284
285 protected String normalizeTitle(String title) {
286 title = title.replaceAll(
287 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
288
289 return StringUtil.shorten(title, 75);
290 }
291
292 private void processImages(long userId, WikiNode node, File imagesFile)
293 throws Exception {
294
295 if ((imagesFile == null) || (!imagesFile.exists())) {
296 return;
297 }
298
299 ProgressTracker progressTracker =
300 ProgressTrackerThreadLocal.getProgressTracker();
301
302 int count = 0;
303
304 ZipReader zipReader = new ZipReader(imagesFile);
305
306 Map<String, byte[]> entries = zipReader.getEntries();
307
308 int total = entries.size();
309
310 if (total > 0) {
311 try {
312 WikiPageLocalServiceUtil.getPage(
313 node.getNodeId(), SHARED_IMAGES_TITLE);
314 }
315 catch (NoSuchPageException nspe) {
316 WikiPageLocalServiceUtil.addPage(
317 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
318 SHARED_IMAGES_CONTENT, null, true, null, null);
319 }
320 }
321
322 List<ObjectValuePair<String, byte[]>> attachments =
323 new ArrayList<ObjectValuePair<String, byte[]>>();
324
325 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
326
327 int percentage = 50;
328
329 for (int i = 0; itr.hasNext(); i++) {
330 Map.Entry<String, byte[]> entry = itr.next();
331
332 String key = entry.getKey();
333 byte[] value = entry.getValue();
334
335 if (key.endsWith(StringPool.SLASH)) {
336 if (_log.isInfoEnabled()) {
337 _log.info("Ignoring " + key);
338 }
339
340 continue;
341 }
342
343 String[] paths = StringUtil.split(key, StringPool.SLASH);
344
345 if (!isValidImage(paths, value)) {
346 if (_log.isInfoEnabled()) {
347 _log.info("Ignoring " + key);
348 }
349
350 continue;
351 }
352
353 String fileName = paths[paths.length - 1].toLowerCase();
354
355 attachments.add(
356 new ObjectValuePair<String, byte[]>(fileName, value));
357
358 count++;
359
360 if ((i % 5) == 0) {
361 WikiPageLocalServiceUtil.addPageAttachments(
362 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
363
364 attachments.clear();
365
366 percentage = Math.min(50 + (i * 50) / total, 99);
367
368 progressTracker.updateProgress(percentage);
369 }
370 }
371
372 if (!attachments.isEmpty()) {
373 WikiPageLocalServiceUtil.addPageAttachments(
374 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
375 }
376
377 if (_log.isInfoEnabled()) {
378 _log.info("Imported " + count + " images into " + node.getName());
379 }
380 }
381
382 protected void processRegularPages(
383 long userId, WikiNode node, Element root,
384 List<String> specialNamespaces, Map<String, String> usersMap,
385 File imagesFile, Map<String, String[]> options) {
386
387 boolean importLatestVersion = MapUtil.getBoolean(
388 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
389
390 ProgressTracker progressTracker =
391 ProgressTrackerThreadLocal.getProgressTracker();
392
393 int count = 0;
394
395 List<Element> pages = root.elements("page");
396
397 int total = pages.size();
398
399 Iterator<Element> itr = root.elements("page").iterator();
400
401 int percentage = 10;
402 int maxPercentage = 50;
403
404 if ((imagesFile == null) || (!imagesFile.exists())) {
405 maxPercentage = 99;
406 }
407
408 int percentageRange = maxPercentage - percentage;
409
410 for (int i = 0; itr.hasNext(); i++) {
411 Element pageEl = itr.next();
412
413 String title = pageEl.elementText("title");
414
415 title = normalizeTitle(title);
416
417 percentage = Math.min(
418 10 + (i * percentageRange) / total, maxPercentage);
419
420 progressTracker.updateProgress(percentage);
421
422 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
423 continue;
424 }
425
426 List<Element> revisionEls = pageEl.elements("revision");
427
428 if (importLatestVersion) {
429 Element lastRevisionEl = revisionEls.get(
430 revisionEls.size() - 1);
431
432 revisionEls = new ArrayList<Element>();
433
434 revisionEls.add(lastRevisionEl);
435 }
436
437 for (Element curRevisionEl : revisionEls) {
438 String author = curRevisionEl.element(
439 "contributor").elementText("username");
440 String content = curRevisionEl.elementText("text");
441 String summary = curRevisionEl.elementText("comment");
442
443 try {
444 importPage(
445 userId, author, node, title, content, summary,
446 usersMap);
447 }
448 catch (Exception e) {
449 if (_log.isWarnEnabled()) {
450 StringBuilder sb = new StringBuilder();
451
452 sb.append("Page with title ");
453 sb.append(title);
454 sb.append(" could not be imported");
455
456 _log.warn(sb.toString(), e);
457 }
458 }
459 }
460
461 count++;
462 }
463
464 if (_log.isInfoEnabled()) {
465 _log.info("Imported " + count + " pages into " + node.getName());
466 }
467 }
468
469 protected void processSpecialPages(
470 long userId, WikiNode node, Element root,
471 List<String> specialNamespaces)
472 throws PortalException {
473
474 ProgressTracker progressTracker =
475 ProgressTrackerThreadLocal.getProgressTracker();
476
477 List<Element> pages = root.elements("page");
478
479 int total = pages.size();
480
481 Iterator<Element> itr = pages.iterator();
482
483 for (int i = 0; itr.hasNext(); i++) {
484 Element page = itr.next();
485
486 String title = page.elementText("title");
487
488 if (!title.startsWith("Category:")) {
489 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
490 root.remove(page);
491 }
492
493 continue;
494 }
495
496 String categoryName = title.substring("Category:".length());
497
498 categoryName = normalize(categoryName, 75);
499
500 String description = page.element("revision").elementText("text");
501
502 description = normalizeDescription(description);
503
504 try {
505 TagsEntry tagsEntry = null;
506
507 try {
508 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
509 node.getCompanyId(), categoryName);
510 }
511 catch (NoSuchEntryException nsee) {
512 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
513 userId, categoryName);
514 }
515
516 if (Validator.isNotNull(description)) {
517 TagsPropertyLocalServiceUtil.addProperty(
518 userId, tagsEntry.getEntryId(), "description",
519 description);
520 }
521 }
522 catch (SystemException se) {
523 _log.error(se, se);
524 }
525
526 if ((i % 5) == 0) {
527 progressTracker.updateProgress((i * 10) / total);
528 }
529 }
530 }
531
532 protected String readParentTitle(String content) {
533 Matcher matcher = _parentPattern.matcher(content);
534
535 String redirectTitle = StringPool.BLANK;
536
537 if (matcher.find()) {
538 redirectTitle = matcher.group(1);
539
540 redirectTitle = normalizeTitle(redirectTitle);
541
542 redirectTitle += " (disambiguation)";
543 }
544
545 return redirectTitle;
546 }
547
548 protected String readRedirectTitle(String content) {
549 Matcher matcher = _redirectPattern.matcher(content);
550
551 String redirectTitle = StringPool.BLANK;
552
553 if (matcher.find()) {
554 redirectTitle = matcher.group(1);
555
556 redirectTitle = normalizeTitle(redirectTitle);
557 }
558
559 return redirectTitle;
560 }
561
562 protected List<String> readSpecialNamespaces(Element root)
563 throws ImportFilesException {
564
565 List<String> namespaces = new ArrayList<String>();
566
567 Element siteinfoEl = root.element("siteinfo");
568
569 if (siteinfoEl == null) {
570 throw new ImportFilesException("Invalid pages XML file");
571 }
572
573 Iterator<Element> itr = siteinfoEl.element(
574 "namespaces").elements("namespace").iterator();
575
576 while (itr.hasNext()) {
577 Element namespace = itr.next();
578
579 if (!namespace.attribute("key").getData().equals("0")) {
580 namespaces.add(namespace.getText());
581 }
582 }
583
584 return namespaces;
585 }
586
587 protected String[] readTagsEntries(
588 long userId, WikiNode node, String content)
589 throws PortalException, SystemException {
590
591 Matcher matcher = _categoriesPattern.matcher(content);
592
593 List<String> tagsEntries = new ArrayList<String>();
594
595 while (matcher.find()) {
596 String categoryName = matcher.group(1);
597
598 categoryName = normalize(categoryName, 75);
599
600 TagsEntry tagsEntry = null;
601
602 try {
603 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
604 node.getCompanyId(), categoryName);
605 }
606 catch (NoSuchEntryException nsee) {
607 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
608 userId, categoryName);
609 }
610
611 tagsEntries.add(tagsEntry.getName());
612 }
613
614 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
615 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
616 }
617
618 return tagsEntries.toArray(new String[tagsEntries.size()]);
619 }
620
621 protected Map<String, String> readUsersFile(File usersFile)
622 throws IOException {
623
624 if ((usersFile == null) || (!usersFile.exists())) {
625 return Collections.EMPTY_MAP;
626 }
627
628 Map<String, String> usersMap = new HashMap<String, String>();
629
630 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
631
632 String line = reader.readLine();
633
634 while (line != null) {
635 String[] array = StringUtil.split(line);
636
637 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
638 (Validator.isNotNull(array[1]))) {
639
640 usersMap.put(array[0], array[1]);
641 }
642 else {
643 if (_log.isInfoEnabled()) {
644 _log.info(
645 "Ignoring line " + line +
646 " because it does not contain exactly 2 columns");
647 }
648 }
649
650 line = reader.readLine();
651 }
652
653 return usersMap;
654 }
655
656 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
657 "thumb", "temp", "archive"};
658
659 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
660
661 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
662
663 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
664
665 private static Pattern _categoriesPattern = Pattern.compile(
666 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
667
668 private static Pattern _parentPattern = Pattern.compile(
669 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
670
671 private static Pattern _redirectPattern = Pattern.compile(
672 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
673
674 private MediaWikiToCreoleTranslator _translator =
675 new MediaWikiToCreoleTranslator();
676
677 }