doc/xslt/webhelp/docs/content/search/nwSearchFnt.js

   1 /*----------------------------------------------------------------------------\r
   2  * JavaScript for webhelp search\r
   3  *----------------------------------------------------------------------------\r
   4  This file is part of the webhelpsearch plugin for DocBook WebHelp\r
   5  Copyright (c) 2007-2008 NexWave Solutions All Rights Reserved.\r
   6  www.nexwave.biz Nadege Quaine\r
   7  http://kasunbg.blogspot.com/ Kasun Gajasinghe\r
   8  */\r
   9 \r
  10 //string initialization\r
  11 var htmlfileList = "htmlFileList.js";\r
  12 var htmlfileinfoList = "htmlFileInfoList.js";\r
  13 var useCJKTokenizing = false;\r
  14 \r
  15 /* Cette fonction verifie la validite de la recherche entrre par l utilisateur */\r
  16 function Verifie(ditaSearch_Form) {\r
  17 \r
  18     // Check browser compatibitily\r
  19     if (navigator.userAgent.indexOf("Konquerer") > -1) {\r
  20 \r
  21         alert(txt_browser_not_supported);\r
  22         return;\r
  23     }\r
  24 \r
  25 \r
  26     var expressionInput = document.ditaSearch_Form.textToSearch.value;\r
  27     //Set a cookie to store the searched keywords\r
  28     $.cookie('textToSearch', expressionInput);\r
  29 \r
  30 \r
  31     if (expressionInput.length < 1) {\r
  32 \r
  33         // expression is invalid\r
  34         alert(txt_enter_at_least_1_char);\r
  35         // reactive la fenetre de search (utile car cadres)\r
  36         document.ditaSearch_Form.textToSearch.focus();\r
  37     }\r
  38     else {\r
  39 \r
  40         // Effectuer la recherche\r
  41         Effectuer_recherche(expressionInput);\r
  42 \r
  43         // reactive la fenetre de search (utile car cadres)\r
  44         document.ditaSearch_Form.textToSearch.focus();\r
  45     }\r
  46 }\r
  47 \r
  48 var stemQueryMap = new Array();  // A hashtable which maps stems to query words\r
  49 \r
  50 /* This function parses the search expression, loads the indices and displays the results*/\r
  51 function Effectuer_recherche(expressionInput) {\r
  52 \r
  53     /* Display a waiting message */\r
  54     //DisplayWaitingMessage();\r
  55 \r
  56     /*data initialisation*/\r
  57     var searchFor = "";       // expression en lowercase et sans les caracte    res speciaux\r
  58     //w = new Object();  // hashtable, key=word, value = list of the index of the html files\r
  59     scriptLetterTab = new Scriptfirstchar(); // Array containing the first letter of each word to look for\r
  60     var wordsList = new Array(); // Array with the words to look for\r
  61     var finalWordsList = new Array(); // Array with the words to look for after removing spaces\r
  62     var linkTab = new Array();\r
  63     var fileAndWordList = new Array();\r
  64     var txt_wordsnotfound = "";\r
  65 \r
  66 \r
  67     /*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/\r
  68     searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
  69 \r
  70     searchFor = searchFor.replace(/  +/g, " ");\r
  71     searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");\r
  72 \r
  73     wordsList = searchFor.split(" ");\r
  74     wordsList.sort();\r
  75 \r
  76     //set the tokenizing method\r
  77     if(typeof indexerLanguage != "undefined" && (indexerLanguage=="zh" || indexerLanguage=="ja" ||indexerLanguage=="ko")){\r
  78         useCJKTokenizing=true;\r
  79     } else {\r
  80         useCJKTokenizing=false;\r
  81     }\r
  82     //If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.\r
  83     // 2-gram tokenizinghappens in CJKTokenizing,  \r
  84     if(useCJKTokenizing){\r
  85         finalWordsList = cjkTokenize(wordsList);\r
  86     } else { \r
  87         finalWordsList = tokenize(wordsList);\r
  88     }\r
  89 \r
  90     //load the scripts with the indices: the following lines do not work on the server. To be corrected\r
  91     /*if (IEBrowser) {\r
  92      scriptsarray = loadTheIndexScripts (scriptLetterTab);\r
  93      } */\r
  94 \r
  95     /**\r
  96      * Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.\r
  97      */\r
  98     var tempTab = new Array();\r
  99     for (var t in finalWordsList) {\r
 100         if (w[finalWordsList[t].toString()] == undefined) {\r
 101             txt_wordsnotfound += finalWordsList[t] + " ";\r
 102         } else {\r
 103             tempTab.push(finalWordsList[t]);\r
 104         }\r
 105     }\r
 106     finalWordsList = tempTab;\r
 107 \r
 108     if (finalWordsList.length) {\r
 109 \r
 110         //search 'and' and 'or' one time\r
 111         fileAndWordList = SortResults(finalWordsList);\r
 112 \r
 113         var cpt = fileAndWordList.length;\r
 114         for (var i = cpt - 1; i >= 0; i--) {\r
 115             if (fileAndWordList[i] != undefined) {\r
 116                 linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>");\r
 117 \r
 118                 linkTab.push("<ul class='searchresult'>");\r
 119                 for (t in fileAndWordList[i]) {\r
 120                     //DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste);\r
 121                     //linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>");\r
 122                     var tempInfo = fil[fileAndWordList[i][t].filenb];\r
 123                     var pos1 = tempInfo.indexOf("@@@");\r
 124                     var pos2 = tempInfo.lastIndexOf("@@@");\r
 125                     var tempPath = tempInfo.substring(0, pos1);\r
 126                     var tempTitle = tempInfo.substring(pos1 + 3, pos2);\r
 127                     var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length);\r
 128 \r
 129                     //file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html\r
 130                     var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>";\r
 131                     // var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>";\r
 132                     if ((tempShortdesc != "null")) {\r
 133                         linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>";\r
 134                     }\r
 135                     linkString += "</li>";\r
 136                     linkTab.push(linkString);\r
 137                 }\r
 138                 linkTab.push("</ul>");\r
 139             }\r
 140         }\r
 141     }\r
 142 \r
 143     var results = "";\r
 144     if (linkTab.length > 0) { \r
 145         /*writeln ("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">"  + cleanwordsList + "</span>" + "<br/>"+"</p>");*/\r
 146         results = "<p>";\r
 147         //write("<ul class='searchresult'>");\r
 148         for (t in linkTab) {\r
 149             results += linkTab[t].toString();\r
 150         }\r
 151         results += "</p>";\r
 152     } else {\r
 153         results = "<p>" + "Your search returned no results for " + "<span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>";\r
 154     }\r
 155     //alert(results);\r
 156     document.getElementById('searchResults').innerHTML = results; \r
 157 }\r
 158 \r
 159 function tokenize(wordsList){\r
 160     var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces\r
 161     var cleanwordsList = new Array(); // Array with the words to look for\r
 162     for(var j in wordsList){\r
 163         var word = wordsList[j];\r
 164         if(typeof stemmer != "undefined" ){\r
 165             stemQueryMap[stemmer(word)] = word;\r
 166         } else {\r
 167             stemQueryMap[word] = word;\r
 168         }\r
 169     } \r
 170      //stemmedWordsList is the stemmed list of words separated by spaces.\r
 171     for (var t in wordsList) {\r
 172         wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");\r
 173         if (wordsList[t] != "%20") {\r
 174             scriptLetterTab.add(wordsList[t].charAt(0));\r
 175             cleanwordsList.push(wordsList[t]);\r
 176         }\r
 177     }\r
 178 \r
 179     if(typeof stemmer != "undefined" ){\r
 180         //Do the stemming using Porter's stemming algorithm\r
 181         for (var i = 0; i < cleanwordsList.length; i++) {\r
 182             var stemWord = stemmer(cleanwordsList[i]);\r
 183             stemmedWordsList.push(stemWord);\r
 184         }\r
 185     } else {\r
 186         stemmedWordsList = cleanwordsList;\r
 187     }\r
 188     return stemmedWordsList;\r
 189 }\r
 190 \r
 191 //Invoker of CJKTokenizer class methods.\r
 192 function cjkTokenize(wordsList){\r
 193     var allTokens= new Array();\r
 194     var notCJKTokens= new Array();\r
 195     var j=0;\r
 196     for(j=0;j<wordsList.length;j++){\r
 197         var word = wordsList[j];\r
 198         if(getAvgAsciiValue(word) < 127){\r
 199             notCJKTokens.push(word);\r
 200         } else { \r
 201             var tokenizer = new CJKTokenizer(word);\r
 202             var tokensTmp = tokenizer.getAllTokens();\r
 203             allTokens = allTokens.concat(tokensTmp);\r
 204         }\r
 205     }\r
 206     allTokens = allTokens.concat(tokenize(notCJKTokens));\r
 207     return allTokens;\r
 208 }\r
 209 \r
 210 //A simple way to determine whether the query is in english or not.\r
 211 function getAvgAsciiValue(word){\r
 212     var tmp = 0;\r
 213     var num = word.length < 5 ? word.length:5;\r
 214     for(var i=0;i<num;i++){\r
 215         if(i==5) break;\r
 216         tmp += word.charCodeAt(i);\r
 217     }\r
 218     return tmp/num;\r
 219 }\r
 220 \r
 221 //CJKTokenizer\r
 222 function CJKTokenizer(input){\r
 223     this.input = input;\r
 224     this.offset=-1;\r
 225     this.tokens = new Array(); \r
 226     this.incrementToken = incrementToken;\r
 227     this.tokenize = tokenize;\r
 228     this.getAllTokens = getAllTokens;\r
 229     this.unique = unique;\r
 230 \r
 231     function incrementToken(){\r
 232                 if(this.input.length - 2 <= this.offset){\r
 233                 //      console.log("false "+offset);\r
 234                         return false;\r
 235                 }\r
 236                 else {\r
 237                         this.offset+=1;\r
 238                         return true;\r
 239                 }\r
 240         }\r
 241 \r
 242         function tokenize(){\r
 243                 //document.getElementById("content").innerHTML += x.substring(offset,offset+2)+"<br>";\r
 244                 return this.input.substring(this.offset,this.offset+2);\r
 245         }\r
 246 \r
 247         function getAllTokens(){\r
 248                 while(this.incrementToken()){\r
 249                         var tmp = this.tokenize();\r
 250                         this.tokens.push(tmp);\r
 251                 }\r
 252         return this.unique(this.tokens);\r
 253 //              document.getElementById("content").innerHTML += tokens+" ";\r
 254 //              document.getElementById("content").innerHTML += "<br>dada"+sortedTokens+" ";\r
 255 //              console.log(tokens.length+"dsdsds");\r
 256                 /*for(i=0;i<tokens.length;i++){\r
 257                         console.log(tokens[i]);\r
 258                         var ss = tokens[i] == sortedTokens[i];\r
 259 \r
 260 //                      document.getElementById("content").innerHTML += "<br>dada"+un[i]+"- "+stems[i]+"&nbsp;&nbsp;&nbsp;"+ ss;\r
 261                         document.getElementById("content").innerHTML += "<br>"+sortedTokens[i];\r
 262                 }*/\r
 263         }\r
 264 \r
 265         function unique(a)\r
 266         {\r
 267            var r = new Array();\r
 268            o:for(var i = 0, n = a.length; i < n; i++)\r
 269            {\r
 270               for(var x = 0, y = r.length; x < y; x++)\r
 271               {\r
 272                  if(r[x]==a[i]) continue o;\r
 273               }\r
 274               r[r.length] = a[i];\r
 275            }\r
 276            return r;\r
 277         } \r
 278 }\r
 279 \r
 280 \r
 281 /* Scriptfirstchar: to gather the first letter of index js files to upload */\r
 282 function Scriptfirstchar() {\r
 283     this.strLetters = "";\r
 284     this.add = addLettre;\r
 285 }\r
 286 \r
 287 function addLettre(caract) {\r
 288 \r
 289     if (this.strLetters == 'undefined') {\r
 290         this.strLetters = caract;\r
 291     } else if (this.strLetters.indexOf(caract) < 0) {\r
 292         this.strLetters += caract;\r
 293     }\r
 294 \r
 295     return 0;\r
 296 }\r
 297 /* end of scriptfirstchar */\r
 298 \r
 299 /*main loader function*/\r
 300 /*tab contains the first letters of each word looked for*/\r
 301 function loadTheIndexScripts(tab) {\r
 302 \r
 303     //alert (tab.strLetters);\r
 304     var scriptsarray = new Array();\r
 305 \r
 306     for (var i = 0; i < tab.strLetters.length; i++) {\r
 307 \r
 308         scriptsarray[i] = "..\/search" + "\/" + tab.strLetters.charAt(i) + ".js";\r
 309     }\r
 310     // add the list of html files\r
 311     i++;\r
 312     scriptsarray[i] = "..\/search" + "\/" + htmlfileList;\r
 313 \r
 314     //debug\r
 315     for (var t in scriptsarray) {\r
 316         //alert (scriptsarray[t]);\r
 317     }\r
 318 \r
 319     tab = new ScriptLoader();\r
 320     for (t in scriptsarray) {\r
 321         tab.add(scriptsarray[t]);\r
 322     }\r
 323     tab.load();\r
 324     //alert ("scripts loaded");\r
 325     return (scriptsarray);\r
 326 }\r
 327 \r
 328 /* ScriptLoader: to load the scripts and wait that it's finished */\r
 329 function ScriptLoader() {\r
 330     this.cpt = 0;\r
 331     this.scriptTab = new Array();\r
 332     this.add = addAScriptInTheList;\r
 333     this.load = loadTheScripts;\r
 334     this.onScriptLoaded = onScriptLoadedFunc;\r
 335 }\r
 336 \r
 337 function addAScriptInTheList(scriptPath) {\r
 338     this.scriptTab.push(scriptPath);\r
 339 }\r
 340 \r
 341 function loadTheScripts() {\r
 342     var script;\r
 343     var head;\r
 344 \r
 345     head = document.getElementsByTagName('head').item(0);\r
 346 \r
 347     //script = document.createElement('script');\r
 348 \r
 349     for (var el in this.scriptTab) {\r
 350         //alert (el+this.scriptTab[el]);\r
 351         script = document.createElement('script');\r
 352         script.src = this.scriptTab[el];\r
 353         script.type = 'text/javascript';\r
 354         script.defer = false;\r
 355 \r
 356         head.appendChild(script);\r
 357     }\r
 358 \r
 359 }\r
 360 \r
 361 function onScriptLoadedFunc(e) {\r
 362     e = e || window.event;\r
 363     var target = e.target || e.srcElement;\r
 364     var isComplete = true;\r
 365     if (typeof target.readyState != undefined) {\r
 366 \r
 367         isComplete = (target.readyState == "complete" || target.readyState == "loaded");\r
 368     }\r
 369     if (isComplete) {\r
 370         ScriptLoader.cpt++;\r
 371         if (ScriptLoader.cpt == ScriptLoader.scripts.length) {\r
 372             ScriptLoader.onLoadComplete();\r
 373         }\r
 374     }\r
 375 }\r
 376 \r
 377 /*\r
 378 function onLoadComplete() {\r
 379     alert("loaded !!");\r
 380 } */\r
 381 \r
 382 /* End of scriptloader functions */\r
 383  \r
 384 // Array.unique( strict ) - Remove duplicate values\r
 385 function unique(tab) {\r
 386     var a = new Array();\r
 387     var i;\r
 388     var l = tab.length;\r
 389 \r
 390     if (tab[0] != undefined) {\r
 391         a[0] = tab[0];\r
 392     }\r
 393     else {\r
 394         return -1\r
 395     }\r
 396 \r
 397     for (i = 1; i < l; i++) {\r
 398         if (indexof(a, tab[i], 0) < 0) {\r
 399             a.push(tab[i]);\r
 400         }\r
 401     }\r
 402     return a;\r
 403 }\r
 404 function indexof(tab, element, begin) {\r
 405     for (var i = begin; i < tab.length; i++) {\r
 406         if (tab[i] == element) {\r
 407             return i;\r
 408         }\r
 409     }\r
 410     return -1;\r
 411 \r
 412 }\r
 413 /* end of Array functions */\r
 414 \r
 415 \r
 416 /*\r
 417  Param: mots= list of words to look for.\r
 418  This function creates an hashtable:\r
 419  - The key is the index of a html file which contains a word to look for.\r
 420  - The value is the list of all words contained in the html file.\r
 421 \r
 422  Return value: the hashtable fileAndWordList\r
 423  */\r
 424 function SortResults(mots) {\r
 425 \r
 426     var fileAndWordList = new Object();\r
 427     if (mots.length == 0) {\r
 428         return null;\r
 429     }\r
 430 \r
 431     for (var t in mots) {\r
 432         // get the list of the indices of the files.\r
 433         var listNumerosDesFicStr = w[mots[t].toString()];\r
 434         //alert ("listNumerosDesFicStr "+listNumerosDesFicStr);\r
 435         var tab = listNumerosDesFicStr.split(",");\r
 436 \r
 437         //for each file (file's index):\r
 438         for (var t2 in tab) {\r
 439             var temp = tab[t2].toString();\r
 440             if (fileAndWordList[temp] == undefined) {\r
 441 \r
 442                 fileAndWordList[temp] = "" + mots[t];\r
 443             } else {\r
 444 \r
 445                 fileAndWordList[temp] += "," + mots[t];\r
 446             }\r
 447         }\r
 448     }\r
 449 \r
 450     var fileAndWordListValuesOnly = new Array();\r
 451 \r
 452     // sort results according to values\r
 453     var temptab = new Array();\r
 454     for (t in fileAndWordList) {\r
 455         tab = fileAndWordList[t].split(',');\r
 456 \r
 457         var tempDisplay = new Array();\r
 458         for (var x in tab) {\r
 459             if(stemQueryMap[tab[x]] != undefined){\r
 460                 tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word.\r
 461             } else {\r
 462                 tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language)\r
 463             }\r
 464         }\r
 465         var tempDispString = tempDisplay.join(", ");\r
 466 \r
 467         temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString));\r
 468         fileAndWordListValuesOnly.push(fileAndWordList[t]);\r
 469     }\r
 470 \r
 471 \r
 472     //alert("t"+fileAndWordListValuesOnly.toString());\r
 473 \r
 474     fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly);\r
 475     fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots);\r
 476     //alert("t: "+fileAndWordListValuesOnly.join(';'));\r
 477 \r
 478     var listToOutput = new Array();\r
 479 \r
 480     for (var j in fileAndWordListValuesOnly) {\r
 481         for (t in temptab) {\r
 482             if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) {\r
 483                 if (listToOutput[j] == undefined) {\r
 484                     listToOutput[j] = new Array(temptab[t]);\r
 485                 } else {\r
 486                     listToOutput[j].push(temptab[t]);\r
 487                 }\r
 488             }\r
 489         }\r
 490     }\r
 491     return listToOutput;\r
 492 }\r
 493 \r
 494 function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) {\r
 495     this.filenb = filenb;\r
 496     this.motsliste = motsliste;\r
 497     this.motsnb = motsnb;\r
 498     this.motslisteDisplay= motslisteDisplay;\r
 499 }\r
 500 \r
 501 function compare_nbMots(s1, s2) {\r
 502     var t1 = s1.split(',');\r
 503     var t2 = s2.split(',');\r
 504     //alert ("s1:"+t1.length + " " +t2.length)\r
 505     if (t1.length == t2.length) {\r
 506         return 0;\r
 507     } else if (t1.length > t2.length) {\r
 508         return 1;\r
 509     } else {\r
 510         return -1;\r
 511     }\r
 512     //return t1.length - t2.length);\r
 513 }