cancel
Showing results for 
Search instead for 
Did you mean: 

Weird Search Result Issue

mcook
Champ in-the-making
Champ in-the-making
We are running alfresco version 2.9 in 4.2.2.GA on Linux 2.6.18-8.el5 (i386).  We have created some custom scripts to manage driver documents.  It checks to see if a driver has all the required documents on file and when they need updated (they have expired or will expire soon).  We are currently having an issue with the documents that are being returned by the lucene search we are doing.  We are supposed to send an email to those concerned listing the documents that are going to expire or have expired; however, the list is not showing all the documents when the task is run automatically every morning.  The list differs (contains all the documents it should) when the same script is manually run by accessing it via the url in a browser.  I cannot show the document sets because the file names contain sensitive information and we would be in violation of regulations.  My supervisor stated we could present to you dummy data if you request an example of our results.  Here is part of the script we are running:

<import resource="/Company Home/Data Dictionary/Scripts/burris-common.js">
<import resource="/Company Home/Data Dictionary/Scripts/exclusion-set.js">

function transportation_dailyTasks() {
   checkExpires();
   checkRetention();
   checkDocumentSets();
}

///////////////////////////////////////////////////
function checkExpires() {
   // records that are burris, are expireable, aren't superceded, aren't terminated
   var results = search.luceneSearch("TYPE:\""+BURRIS_DOC_TYPE+"\" +ASPECT:\""+EXPIREABLE_ASPECT+"\" -ASPECT:\""+SUPERCEDED_ASPECT+"\" -ASPECT:\""+TERMINATED_ASPECT+"\"");
   var expireds = findExpiredRecords(results);

   sendExpiredNotifications(expireds);
}

function findExpiredRecords(results) {
   var found = new Array();
   
   // find the docs that are expired and store according to group responsible so that they get one email with all the expirations
   for each(var result in results) {

      // files that are sitting at the facility-level are unfiled and not approve yet, ignore them
      if(result.parent.parent.name == "Transportation") continue;
   
      var properties = result.properties;
      var expirationDate = properties[EXPIRATION_DATE];
      var now = new Date();
      
      var groupEntry = found[result.properties[GROUP_RESPONSIBLE]];
      if(!groupEntry) {
         groupEntry = new Array();
         groupEntry["group"] = result.properties[GROUP_RESPONSIBLE];
         found[result.properties[GROUP_RESPONSIBLE]] = groupEntry;
      }
      
      var entry = new Array();
      entry["file"] = result.name;

      var difference = getDayDifference(expirationDate, now);
      if(difference <= 0) {
         result.properties[EXPIRED] = true;
         result.save();
      
         entry["isExpired"] = true;
         groupEntry.push(entry);
      } else if(difference <= 30) {
         entry["isExpired"] = false;
         groupEntry.push(entry);
      }
   }

   return found;
}

function sendExpiredNotifications(results) {
   for each(var group in results) {
      // the only reason a file won't have a group is if it hasn't been approved yet
      if(group["group"] && !group["group"].match(/_x0020_/)) {
         var users = people.getMembers(people.getGroup(group["group"]));
   
         for each(var p in users) {
            if (!p.properties.email || p.properties.email == " ") {
               continue;
            }
         
            var mail = actions.create("mail");
            
            mail.parameters.to = p.properties.email;
            mail.parameters.from = "Burris Alfresco <noreply@burrislogistics.com>";
            mail.parameters.subject = "Results from Alfresco Expiration Check";
            mail.parameters.text  = "Hello " + p.properties.firstName + ",\r\n\r\n";
            mail.parameters.text += "This is an automated email, the result of an ";
            mail.parameters.text += "Alfresco Expiring Record check. The results are ";
            mail.parameters.text += "listed below.\r\n\r\n";
         
                 var hasFiles = false;
            for each(var item in group) {
               if (!item["file"]) continue;

                    hasFiles = true;
               mail.parameters.text += "   * " + item["file"] + ": ";
               mail.parameters.text += item["isExpired"] ? "expired" : "expiring in less than 30 days";
               mail.parameters.text += "\r\n";
            }
         
            mail.parameters.text += "\r\n";
            mail.parameters.text += "Please login to Alfresco (" + ALFRESCO_URL + ") ";
            mail.parameters.text += "to update these documents.\r\n\r\n";
            mail.parameters.text += "Thank you.";
      
            if(hasFiles) {
               mail.execute(roothome);
            }

            appendLog( mail.parameters.text );
         }
         sendLog();
      }
      else {
         appendLog("These items have a Group Name that does not behave:");
         for each(var item in group) {
            if(item["file"] == undefined) continue;
                 appendLog("   * " + item["file"]);
         }
         sendLog();
      }
   }
}

Please help us resolve this as this issue is time sensitive for our company.  Any help will be greatly appreciated.
12 REPLIES 12

dhalupa
Champ on-the-rise
Champ on-the-rise
Not in a cluster environment. Some transactions might be missing in the local lucene index of an Alfresco instance.
Yes, and indexes might also get corrupted, but these are not not normal behaviors, I am referring to situation when indexes are not corrupted and are properly synchronized between cluster nodes. In that case Lucene query will always return a correct number of results, cutting will occur later during acl evaluation.

ebell
Champ in-the-making
Champ in-the-making
Would you mind showing some examples of using the direct database queries through hibernate? 
I thought that direct access to the Alfresco database was not recommended.  Did any further problems arise with your solution? 
Would an XPath query based on node service provide consistent/stable results, or would that have the same problem with inconsistent results as Lucene did?

ebell
Champ in-the-making
Champ in-the-making
After some further investigation, I've changed
search.luceneSearch("TYPE:\""+BURRIS_DOC_TYPE+"\" +ASPECT:\""+EXPIREABLE_ASPECT+"\" -ASPECT:\""+SUPERCEDED_ASPECT+"\" -ASPECT:\""+TERMINATED_ASPECT+"\"");
to
search.xpathSearch("TYPE:\""+BURRIS_DOC_TYPE+"\" +ASPECT:\""+EXPIREABLE_ASPECT+"\" -ASPECT:\""+SUPERCEDED_ASPECT+"\" -ASPECT:\""+TERMINATED_ASPECT+"\"");
etc. in all instances in the javascript code. 
It still appears that Lucene is being run to do part of the searching.  However, as in the one post above, I'm looking to create a stable report every time.  Am I heading in the right direction, or would using Hibernate be the way to go?