cancel
Showing results for 
Search instead for 
Did you mean: 

Optimized Script comparing metadata

dranakan
Champ on-the-rise
Champ on-the-rise
Hello,

I have a script checking that documents have not same metadata. It's working fine but I would like to optimise it.

My documents have metadata FirmNb and DocumentNb. For all same types, the script search if same FirmNb and DocumentNb exist.

The code get all documents for each types and after compare the documents.
Could I do better/faster ?
I also would execute it by schedule-action and get problems : http://forums.alfresco.com/en/viewtopic.php?f=27&t=40082#p117669

Script

// Mail
var destinationMail="watchdog@custom.ch";

// Lucene Search
var exactTypeString = "EXACTTYPE"; // no children (sub-type)

// Qname finance
var qNameInvoiceCreditor = "{http://www.custom.ch/model/finance/1.0}invoiceCreditor";
var qNameInvoiceDebtor = "{http://www.custom.ch/model/finance/1.0}invoiceDebtor";
var qNamePaymentCreditor = "{http://www.custom.ch/model/finance/1.0}paymentCreditor";
var qNamePaymentDebtor = "{http://www.custom.ch/model/finance/1.0}paymentDebtor";
var qNameFinancialAccount = "{http://www.custom.ch/model/finance/1.0}financialAccount";
var qNameCondensedAccount = "{http://www.custom.ch/model/finance/1.0}condensedAccount";
var arrayQName= [qNameInvoiceCreditor,
                 qNameInvoiceDebtor,
                 qNamePaymentCreditor,
                 qNamePaymentDebtor,
                 qNameFinancialAccount,
                 qNameCondensedAccount];

// Query that would be execute
var queryString;

var response = "";

// Information about webscript
var nameWebscriptUser = "Webscript custom Redundant" + " by "
      + person.properties.userName;
var arguments = "Arguments : ";

script: {

   
   logger.log(nameWebscriptUser);
   
   var existSameFile=false; // Will be set to true if same file exist
   
   // For all Types
   for (var i=0;i< arrayQName.length;i++){
      logger.log("Search for "+arrayQName[i]);
      // Search and compare
      if (checkSameAttributes(arrayQName[i])==true){
         existSameFile=true;
      }
   }
   //sendMail();
   if (existSameFile){
      response+="Problems… Same file exists:-( \n" ;
      response+="See in alfresco logs the results… \n" ;
   }else{
      response+="All is ok 🙂 \n" ;
   }
   
   logger.log("End");
}

/*
* Check if there are same attributes (type, …)
*/
function checkSameAttributes(qName) {
   // Request
   // Type
   var queryString= "(" + exactTypeString + ":\"" + qName + "\")";
   
   // Query
   var docs = search.luceneSearch(queryString);
   
   logger.log("Total : "+docs.length);
   
   // Need to continue ?
   if (docs.length<2){
      return false;
   }
   
   var existSameFile=false; // Will be set to true if same file exist
   
   
   // Test
   for ( var current = 0; current < (docs.length-1); current++) {
      //logger.log("Read "+current+"/"+docs.length);
      for ( var other = (current +1); other < docs.length; other++) {
         // Same firm
         if (getNumber(docs[current].properties["sg:firmNb"]) == 0 ||
               getNumber(docs[current].properties["sg:firmNb"]) != getNumber(docs[other].properties["sg:firmNb"])){
            continue;
         }
   
         // Same Document Nb ?
         if ( getNumber(docs[current].properties["sg:documentNb"]) == 0 ||
               getNumber(docs[current].properties["sg:documentNb"]) != getNumber(docs[other].properties["sg:documentNb"])){
            continue;
         }else{
            // Yes… same.
            printSameFile(docs[current].displayPath+"/"+ docs[current].name,docs[other].displayPath+ "/"+docs[other].name);
            existSameFile=true;
         }
      }
   }
   return existSameFile;
}

/*
* Print output for same files (in logger)
*/
function printSameFile(file1,file2){
   logger.log("Same attributs !!!");
   logger.log("\t"+file1);
   logger.log("\t"+file2);
}

/*
* Get number (null will be 0)
*/
function getNumber(numberCouldBeNull){
   if (numberCouldBeNull== null || numberCouldBeNull== "null" || numberCouldBeNull==""){
      return 0;
   }
   return numberCouldBeNull;
}

/*
* Send a mail
*/
function sendMail(){
  logger.log("Send a mail notification to : "+destinationMail);
  // Create mail action
  var mail = actions.create("mail");
  mail.parameters.to = destinationMail;
  mail.parameters.subject = "Warning - GED - Data Redundant";
  mail.parameters.text = "Redundant data are in Alfresco (documents with same keys (documentNb + firmNb)). Please check the Alfresco logs to see the details.";
  // execute action against a document
  mail.execute(document);
}

It's necessary to allow Alfresco searching a lot of documents.
alfresco-global.properties

#
# Properties to limit resources spent on individual searches
# (Useful for the script test SearchRedundant. It allow to show more 1000 documents)
# The maximum time spent pruning results (*********I Changed from 10000 to 100000)
system.acl.maxPermissionCheckTimeMillis=100000
# The maximum number of results to perform permission checks against (*********I Changed from 1000 to 10000)
system.acl.maxPermissionChecks=10000

Thank you
1 REPLY 1

dranakan
Champ on-the-rise
Champ on-the-rise
In SQL we can do something like this :


select a, b, c, count(*)
from tablename
where…
group by a, b, c
having count(*) > 1

Could I do the same with Lucene ?

Thank you