02-26-2014 12:29 PM
How do I implement weighting on fulltext search results ?
I'm working on Nuxeo EP 5.6HF29 with Postgresql 9.1. I'm looking for a way to implement weighting on search results based on a multi criteria query. The NXQL query looks like this :
Select * from Document where ecm:fulltext = 'Discipline' or ecm:fulltext_title = 'Discipline'
Nuxeo converts the NXQL query to Postgresql SQL and it looks like that :
SELECT "_C1" FROM(
SELECT "hierarchy"."id" AS "_C1"
FROM "hierarchy" JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
WHERE(
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost')
)
AND (((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext"))) OR ((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext_title"))))
)
UNION ALL
SELECT "_H"."id" AS "_C1"
FROM "hierarchy" "_H"
JOIN "proxies" ON "_H"."id" = "proxies"."id"
JOIN "hierarchy" ON "proxies"."targetid" = "hierarchy"."id"
JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
WHERE (
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost'))
AND (((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext"))) OR ((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext_title"))))
)) AS "_T"
LIMIT 201 OFFSET 0
I want to rank search results based on the weighting applied on the title and fulltext columns. By specifying two criterias in the query, the Postgresql ranking function disapears (and there is no way to apply the desired weight on each column) For example : Select * from Document where ecm:fulltext = 'Discipline' generates
SELECT "_C1" FROM(
SELECT "hierarchy"."id" AS "_C1", TS_RANK_CD(NX_TO_TSVECTOR("fulltext"."fulltext"), TO_TSQUERY('fr', '(Discipline)'), 32) AS _nxscore
FROM "hierarchy" JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
WHERE(
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost')
)
AND (((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext"))))
)
UNION ALL
SELECT "_H"."id" AS "_C1", TS_RANK_CD(NX_TO_TSVECTOR("fulltext"."fulltext"), TO_TSQUERY('fr', '(Discipline)'), 32) AS _nxscore
FROM "hierarchy" "_H"
JOIN "proxies" ON "_H"."id" = "proxies"."id"
JOIN "hierarchy" ON "proxies"."targetid" = "hierarchy"."id"
JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
WHERE (
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost'))
AND (((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext"))))
)) AS "_T"
ORDER BY _nxscore
LIMIT 201 OFFSET 0
I want to implement a multi criteria query (with custom weights) directly in the Java source code (query based on the official documentation of Postgresql available here : Controlling Text Search) :
SELECT "_C1" FROM(
SELECT "hierarchy"."id" AS "_C1", TS_RANK_CD('{0.1,0.4,0.6,1.0}', setweight(to_tsvector(coalesce(fulltext_title,'')), 'A') || setweight(to_tsvector(coalesce(fulltext,'')), 'D'), TO_TSQUERY('fr', '(Discipline)')) AS _nxscore
FROM "hierarchy" JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
LEFT JOIN "misc" "_F1" ON "hierarchy"."id" = "_F1"."id"
WHERE (
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost'))
AND ((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext_title")) OR (TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext")))
)
UNION ALL
SELECT "_H"."id" AS "_C1", TS_RANK_CD('{0.1,0.4,0.6,1.0}',setweight(to_tsvector(coalesce(fulltext_title,'')), 'A') || setweight(to_tsvector(coalesce(fulltext,'')), 'D'), TO_TSQUERY('fr', '(Discipline)')) AS _nxscore
FROM "hierarchy" "_H"
JOIN "proxies" ON "_H"."id" = "proxies"."id"
JOIN "hierarchy" ON "proxies"."targetid" = "hierarchy"."id"
JOIN "fulltext" ON "fulltext"."id" = "hierarchy"."id"
LEFT JOIN "misc" "_F1" ON "hierarchy"."id" = "_F1"."id"
WHERE (
("hierarchy"."primarytype" IN ('SectionRoot', 'concours', 'Workspace', 'RelationSearch', 'BlogSite', 'ManagementRoot', 'Tag', 'AdministrativeStatusContainer', 'DocumentRoute', 'Thread', 'search_results', 'Space', 'documentrh', 'TemplateRoot', 'DocumentRouteStep', 'Domain', 'WorkspaceRoot', 'Comment', 'QueryNav', 'ConditionalStepFolder', 'SimpleTask', 'WebPage', 'ConditionalTask', 'StepFolder', 'WCOpenSocial', 'MailMessage', 'Forum', 'ContextualLink', 'PictureBook', 'Unit', 'TaskRoot', 'Post', 'WCHtml', 'DocumentRouteInstancesRoot', 'FollowLifeCycleTransitionTask', 'PublishTask', 'DocumentRouteModelsRoot', 'AdministrativeStatus', 'UserWorkspacesRoot', 'Picture', 'Document', 'Folder', 'WCPicture', 'FacetedSearch', 'RouteNode', 'File', 'AdvancedSearch', 'WebSite', 'FacetedSearchDefault', 'Gadget', 'Note', 'TaskDoc', 'HiddenFolder', 'Section', 'UserProfile', 'MailFolder', 'OrderedFolder', 'BasicAuditSearch', 'CommentRoot', 'BlogPost'))
AND ((TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext_title")) OR (TO_TSQUERY('fr', '(Discipline)') @@ NX_TO_TSVECTOR("fulltext"."fulltext")))
)) AS "_T"
JOIN hierarchy ON hierarchy.id = "_C1"
ORDER BY _nxscore DESC LIMIT 201 OFFSET 0
How do I proceed ?
Thank you for your help. David
03-18-2014 09:21 AM
You'll want to use your own datasource and JDBC connection to the database and do the query entirely by hand.
Since Nuxeo 5.8 if you're using single-datasource mode (the default), you can use ConnectionHelper.getConnection(null)
to get to the low-level connection already used by VCS.
As a side not, if it's of no interest to you if you don't use proxies, you can drop the UNION ALL
part of the query by adding AND ecm:isProxy = 0
to the NXQL query.
Find what you came for
We want to make your experience in Hyland Connect as valuable as possible, so we put together some helpful links.