<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extracting all links in published content in Alfresco Archive</title>
    <link>https://connect.hyland.com/t5/alfresco-archive/extracting-all-links-in-published-content/m-p/228436#M181566</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;I have found a way to do so by using HTML parser api at&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt; &lt;/SPAN&gt;&lt;A href="http://htmlparser.sourceforge.net/javadoc/overview-summary.html" rel="nofollow noopener noreferrer"&gt;http://htmlparser.sourceforge.net/javadoc/overview-summary.html&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;&amp;amp; UriExtractor class at &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;A href="http://svn.alfresco.com/repos/alfresco-open-mirror/alfresco/HEAD/root/projects/link-validation/source/java/org/alfresco/linkvalidation/UriExtractor.java" rel="nofollow noopener noreferrer"&gt;http://svn.alfresco.com/repos/alfresco-open-mirror/alfresco/HEAD/root/projects/link-validation/source/java/org/alfresco/linkvalidation/UriExtractor.java&lt;/A&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; You have to tweak the UriExtractor so that you can send the HTML content as String object to extractURIs() method &amp;amp; get all the http links as a Map.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Or you can use xml extractor for extracting http links from xml which is being published.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Mon, 20 Jul 2009 07:50:51 GMT</pubDate>
    <dc:creator>dynamolalit</dc:creator>
    <dc:date>2009-07-20T07:50:51Z</dc:date>
    <item>
      <title>Extracting all links in published content</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extracting-all-links-in-published-content/m-p/228435#M181565</link>
      <description>Hi,I am using Alfresco 3.1.1 over JBoss 4.2.3 AS.I have a requirement that i should be able to retrieve all the links that are present in a content(xml) which has been published &amp;amp; i need to update a table with the same.I have gone through http://wiki.alfresco.com/wiki/Metadata_Extraction#XML_Met</description>
      <pubDate>Tue, 14 Jul 2009 12:37:07 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extracting-all-links-in-published-content/m-p/228435#M181565</guid>
      <dc:creator>dynamolalit</dc:creator>
      <dc:date>2009-07-14T12:37:07Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting all links in published content</title>
      <link>https://connect.hyland.com/t5/alfresco-archive/extracting-all-links-in-published-content/m-p/228436#M181566</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;SPAN&gt;Hi,&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;I have found a way to do so by using HTML parser api at&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt; &lt;/SPAN&gt;&lt;A href="http://htmlparser.sourceforge.net/javadoc/overview-summary.html" rel="nofollow noopener noreferrer"&gt;http://htmlparser.sourceforge.net/javadoc/overview-summary.html&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;&amp;amp; UriExtractor class at &lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;A href="http://svn.alfresco.com/repos/alfresco-open-mirror/alfresco/HEAD/root/projects/link-validation/source/java/org/alfresco/linkvalidation/UriExtractor.java" rel="nofollow noopener noreferrer"&gt;http://svn.alfresco.com/repos/alfresco-open-mirror/alfresco/HEAD/root/projects/link-validation/source/java/org/alfresco/linkvalidation/UriExtractor.java&lt;/A&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; You have to tweak the UriExtractor so that you can send the HTML content as String object to extractURIs() method &amp;amp; get all the http links as a Map.&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Or you can use xml extractor for extracting http links from xml which is being published.&lt;/SPAN&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 20 Jul 2009 07:50:51 GMT</pubDate>
      <guid>https://connect.hyland.com/t5/alfresco-archive/extracting-all-links-in-published-content/m-p/228436#M181566</guid>
      <dc:creator>dynamolalit</dc:creator>
      <dc:date>2009-07-20T07:50:51Z</dc:date>
    </item>
  </channel>
</rss>

