1 /*----------------------------------------------------------------------------
\r
2 * JavaScript for webhelp search
\r
3 *----------------------------------------------------------------------------
\r
4 This file is part of the webhelpsearch plugin for DocBook WebHelp
\r
5 Copyright (c) 2007-2008 NexWave Solutions All Rights Reserved.
\r
6 www.nexwave.biz Nadege Quaine
\r
7 http://kasunbg.blogspot.com/ Kasun Gajasinghe
\r
10 //string initialization
\r
11 var htmlfileList = "htmlFileList.js";
\r
12 var htmlfileinfoList = "htmlFileInfoList.js";
\r
13 var useCJKTokenizing = false;
\r
15 /* Cette fonction verifie la validite de la recherche entrre par l utilisateur */
\r
16 function Verifie(ditaSearch_Form) {
\r
18 // Check browser compatibitily
\r
19 if (navigator.userAgent.indexOf("Konquerer") > -1) {
\r
21 alert(txt_browser_not_supported);
\r
26 var expressionInput = document.ditaSearch_Form.textToSearch.value;
\r
27 //Set a cookie to store the searched keywords
\r
28 $.cookie('textToSearch', expressionInput);
\r
31 if (expressionInput.length < 1) {
\r
33 // expression is invalid
\r
34 alert(txt_enter_at_least_1_char);
\r
35 // reactive la fenetre de search (utile car cadres)
\r
36 document.ditaSearch_Form.textToSearch.focus();
\r
40 // Effectuer la recherche
\r
41 Effectuer_recherche(expressionInput);
\r
43 // reactive la fenetre de search (utile car cadres)
\r
44 document.ditaSearch_Form.textToSearch.focus();
\r
48 var stemQueryMap = new Array(); // A hashtable which maps stems to query words
\r
50 /* This function parses the search expression, loads the indices and displays the results*/
\r
51 function Effectuer_recherche(expressionInput) {
\r
53 /* Display a waiting message */
\r
54 //DisplayWaitingMessage();
\r
56 /*data initialisation*/
\r
57 var searchFor = ""; // expression en lowercase et sans les caracte res speciaux
\r
58 //w = new Object(); // hashtable, key=word, value = list of the index of the html files
\r
59 scriptLetterTab = new Scriptfirstchar(); // Array containing the first letter of each word to look for
\r
60 var wordsList = new Array(); // Array with the words to look for
\r
61 var finalWordsList = new Array(); // Array with the words to look for after removing spaces
\r
62 var linkTab = new Array();
\r
63 var fileAndWordList = new Array();
\r
64 var txt_wordsnotfound = "";
\r
67 /*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/
\r
68 searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");
\r
70 searchFor = searchFor.replace(/ +/g, " ");
\r
71 searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");
\r
73 wordsList = searchFor.split(" ");
\r
76 //set the tokenizing method
\r
77 if(typeof indexerLanguage != "undefined" && (indexerLanguage=="zh" || indexerLanguage=="ja" ||indexerLanguage=="ko")){
\r
78 useCJKTokenizing=true;
\r
80 useCJKTokenizing=false;
\r
82 //If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.
\r
83 // 2-gram tokenizinghappens in CJKTokenizing,
\r
84 if(useCJKTokenizing){
\r
85 finalWordsList = cjkTokenize(wordsList);
\r
87 finalWordsList = tokenize(wordsList);
\r
90 //load the scripts with the indices: the following lines do not work on the server. To be corrected
\r
92 scriptsarray = loadTheIndexScripts (scriptLetterTab);
\r
96 * Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.
\r
98 var tempTab = new Array();
\r
99 for (var t in finalWordsList) {
\r
100 if (w[finalWordsList[t].toString()] == undefined) {
\r
101 txt_wordsnotfound += finalWordsList[t] + " ";
\r
103 tempTab.push(finalWordsList[t]);
\r
106 finalWordsList = tempTab;
\r
108 if (finalWordsList.length) {
\r
110 //search 'and' and 'or' one time
\r
111 fileAndWordList = SortResults(finalWordsList);
\r
113 var cpt = fileAndWordList.length;
\r
114 for (var i = cpt - 1; i >= 0; i--) {
\r
115 if (fileAndWordList[i] != undefined) {
\r
116 linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>");
\r
118 linkTab.push("<ul class='searchresult'>");
\r
119 for (t in fileAndWordList[i]) {
\r
120 //DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste);
\r
121 //linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>");
\r
122 var tempInfo = fil[fileAndWordList[i][t].filenb];
\r
123 var pos1 = tempInfo.indexOf("@@@");
\r
124 var pos2 = tempInfo.lastIndexOf("@@@");
\r
125 var tempPath = tempInfo.substring(0, pos1);
\r
126 var tempTitle = tempInfo.substring(pos1 + 3, pos2);
\r
127 var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length);
\r
129 //file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html
\r
130 var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>";
\r
131 // var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>";
\r
132 if ((tempShortdesc != "null")) {
\r
133 linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>";
\r
135 linkString += "</li>";
\r
136 linkTab.push(linkString);
\r
138 linkTab.push("</ul>");
\r
144 if (linkTab.length > 0) {
\r
145 /*writeln ("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + cleanwordsList + "</span>" + "<br/>"+"</p>");*/
\r
147 //write("<ul class='searchresult'>");
\r
148 for (t in linkTab) {
\r
149 results += linkTab[t].toString();
\r
153 results = "<p>" + "Your search returned no results for " + "<span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>";
\r
156 document.getElementById('searchResults').innerHTML = results;
\r
159 function tokenize(wordsList){
\r
160 var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces
\r
161 var cleanwordsList = new Array(); // Array with the words to look for
\r
162 for(var j in wordsList){
\r
163 var word = wordsList[j];
\r
164 if(typeof stemmer != "undefined" ){
\r
165 stemQueryMap[stemmer(word)] = word;
\r
167 stemQueryMap[word] = word;
\r
170 //stemmedWordsList is the stemmed list of words separated by spaces.
\r
171 for (var t in wordsList) {
\r
172 wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");
\r
173 if (wordsList[t] != "%20") {
\r
174 scriptLetterTab.add(wordsList[t].charAt(0));
\r
175 cleanwordsList.push(wordsList[t]);
\r
179 if(typeof stemmer != "undefined" ){
\r
180 //Do the stemming using Porter's stemming algorithm
\r
181 for (var i = 0; i < cleanwordsList.length; i++) {
\r
182 var stemWord = stemmer(cleanwordsList[i]);
\r
183 stemmedWordsList.push(stemWord);
\r
186 stemmedWordsList = cleanwordsList;
\r
188 return stemmedWordsList;
\r
191 //Invoker of CJKTokenizer class methods.
\r
192 function cjkTokenize(wordsList){
\r
193 var allTokens= new Array();
\r
194 var notCJKTokens= new Array();
\r
196 for(j=0;j<wordsList.length;j++){
\r
197 var word = wordsList[j];
\r
198 if(getAvgAsciiValue(word) < 127){
\r
199 notCJKTokens.push(word);
\r
201 var tokenizer = new CJKTokenizer(word);
\r
202 var tokensTmp = tokenizer.getAllTokens();
\r
203 allTokens = allTokens.concat(tokensTmp);
\r
206 allTokens = allTokens.concat(tokenize(notCJKTokens));
\r
210 //A simple way to determine whether the query is in english or not.
\r
211 function getAvgAsciiValue(word){
\r
213 var num = word.length < 5 ? word.length:5;
\r
214 for(var i=0;i<num;i++){
\r
216 tmp += word.charCodeAt(i);
\r
222 function CJKTokenizer(input){
\r
223 this.input = input;
\r
225 this.tokens = new Array();
\r
226 this.incrementToken = incrementToken;
\r
227 this.tokenize = tokenize;
\r
228 this.getAllTokens = getAllTokens;
\r
229 this.unique = unique;
\r
231 function incrementToken(){
\r
232 if(this.input.length - 2 <= this.offset){
\r
233 // console.log("false "+offset);
\r
242 function tokenize(){
\r
243 //document.getElementById("content").innerHTML += x.substring(offset,offset+2)+"<br>";
\r
244 return this.input.substring(this.offset,this.offset+2);
\r
247 function getAllTokens(){
\r
248 while(this.incrementToken()){
\r
249 var tmp = this.tokenize();
\r
250 this.tokens.push(tmp);
\r
252 return this.unique(this.tokens);
\r
253 // document.getElementById("content").innerHTML += tokens+" ";
\r
254 // document.getElementById("content").innerHTML += "<br>dada"+sortedTokens+" ";
\r
255 // console.log(tokens.length+"dsdsds");
\r
256 /*for(i=0;i<tokens.length;i++){
\r
257 console.log(tokens[i]);
\r
258 var ss = tokens[i] == sortedTokens[i];
\r
260 // document.getElementById("content").innerHTML += "<br>dada"+un[i]+"- "+stems[i]+" "+ ss;
\r
261 document.getElementById("content").innerHTML += "<br>"+sortedTokens[i];
\r
267 var r = new Array();
\r
268 o:for(var i = 0, n = a.length; i < n; i++)
\r
270 for(var x = 0, y = r.length; x < y; x++)
\r
272 if(r[x]==a[i]) continue o;
\r
274 r[r.length] = a[i];
\r
281 /* Scriptfirstchar: to gather the first letter of index js files to upload */
\r
282 function Scriptfirstchar() {
\r
283 this.strLetters = "";
\r
284 this.add = addLettre;
\r
287 function addLettre(caract) {
\r
289 if (this.strLetters == 'undefined') {
\r
290 this.strLetters = caract;
\r
291 } else if (this.strLetters.indexOf(caract) < 0) {
\r
292 this.strLetters += caract;
\r
297 /* end of scriptfirstchar */
\r
299 /*main loader function*/
\r
300 /*tab contains the first letters of each word looked for*/
\r
301 function loadTheIndexScripts(tab) {
\r
303 //alert (tab.strLetters);
\r
304 var scriptsarray = new Array();
\r
306 for (var i = 0; i < tab.strLetters.length; i++) {
\r
308 scriptsarray[i] = "..\/search" + "\/" + tab.strLetters.charAt(i) + ".js";
\r
310 // add the list of html files
\r
312 scriptsarray[i] = "..\/search" + "\/" + htmlfileList;
\r
315 for (var t in scriptsarray) {
\r
316 //alert (scriptsarray[t]);
\r
319 tab = new ScriptLoader();
\r
320 for (t in scriptsarray) {
\r
321 tab.add(scriptsarray[t]);
\r
324 //alert ("scripts loaded");
\r
325 return (scriptsarray);
\r
328 /* ScriptLoader: to load the scripts and wait that it's finished */
\r
329 function ScriptLoader() {
\r
331 this.scriptTab = new Array();
\r
332 this.add = addAScriptInTheList;
\r
333 this.load = loadTheScripts;
\r
334 this.onScriptLoaded = onScriptLoadedFunc;
\r
337 function addAScriptInTheList(scriptPath) {
\r
338 this.scriptTab.push(scriptPath);
\r
341 function loadTheScripts() {
\r
345 head = document.getElementsByTagName('head').item(0);
\r
347 //script = document.createElement('script');
\r
349 for (var el in this.scriptTab) {
\r
350 //alert (el+this.scriptTab[el]);
\r
351 script = document.createElement('script');
\r
352 script.src = this.scriptTab[el];
\r
353 script.type = 'text/javascript';
\r
354 script.defer = false;
\r
356 head.appendChild(script);
\r
361 function onScriptLoadedFunc(e) {
\r
362 e = e || window.event;
\r
363 var target = e.target || e.srcElement;
\r
364 var isComplete = true;
\r
365 if (typeof target.readyState != undefined) {
\r
367 isComplete = (target.readyState == "complete" || target.readyState == "loaded");
\r
370 ScriptLoader.cpt++;
\r
371 if (ScriptLoader.cpt == ScriptLoader.scripts.length) {
\r
372 ScriptLoader.onLoadComplete();
\r
378 function onLoadComplete() {
\r
379 alert("loaded !!");
\r
382 /* End of scriptloader functions */
\r
384 // Array.unique( strict ) - Remove duplicate values
\r
385 function unique(tab) {
\r
386 var a = new Array();
\r
388 var l = tab.length;
\r
390 if (tab[0] != undefined) {
\r
397 for (i = 1; i < l; i++) {
\r
398 if (indexof(a, tab[i], 0) < 0) {
\r
404 function indexof(tab, element, begin) {
\r
405 for (var i = begin; i < tab.length; i++) {
\r
406 if (tab[i] == element) {
\r
413 /* end of Array functions */
\r
417 Param: mots= list of words to look for.
\r
418 This function creates an hashtable:
\r
419 - The key is the index of a html file which contains a word to look for.
\r
420 - The value is the list of all words contained in the html file.
\r
422 Return value: the hashtable fileAndWordList
\r
424 function SortResults(mots) {
\r
426 var fileAndWordList = new Object();
\r
427 if (mots.length == 0) {
\r
431 for (var t in mots) {
\r
432 // get the list of the indices of the files.
\r
433 var listNumerosDesFicStr = w[mots[t].toString()];
\r
434 //alert ("listNumerosDesFicStr "+listNumerosDesFicStr);
\r
435 var tab = listNumerosDesFicStr.split(",");
\r
437 //for each file (file's index):
\r
438 for (var t2 in tab) {
\r
439 var temp = tab[t2].toString();
\r
440 if (fileAndWordList[temp] == undefined) {
\r
442 fileAndWordList[temp] = "" + mots[t];
\r
445 fileAndWordList[temp] += "," + mots[t];
\r
450 var fileAndWordListValuesOnly = new Array();
\r
452 // sort results according to values
\r
453 var temptab = new Array();
\r
454 for (t in fileAndWordList) {
\r
455 tab = fileAndWordList[t].split(',');
\r
457 var tempDisplay = new Array();
\r
458 for (var x in tab) {
\r
459 if(stemQueryMap[tab[x]] != undefined){
\r
460 tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word.
\r
462 tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language)
\r
465 var tempDispString = tempDisplay.join(", ");
\r
467 temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString));
\r
468 fileAndWordListValuesOnly.push(fileAndWordList[t]);
\r
472 //alert("t"+fileAndWordListValuesOnly.toString());
\r
474 fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly);
\r
475 fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots);
\r
476 //alert("t: "+fileAndWordListValuesOnly.join(';'));
\r
478 var listToOutput = new Array();
\r
480 for (var j in fileAndWordListValuesOnly) {
\r
481 for (t in temptab) {
\r
482 if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) {
\r
483 if (listToOutput[j] == undefined) {
\r
484 listToOutput[j] = new Array(temptab[t]);
\r
486 listToOutput[j].push(temptab[t]);
\r
491 return listToOutput;
\r
494 function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) {
\r
495 this.filenb = filenb;
\r
496 this.motsliste = motsliste;
\r
497 this.motsnb = motsnb;
\r
498 this.motslisteDisplay= motslisteDisplay;
\r
501 function compare_nbMots(s1, s2) {
\r
502 var t1 = s1.split(',');
\r
503 var t2 = s2.split(',');
\r
504 //alert ("s1:"+t1.length + " " +t2.length)
\r
505 if (t1.length == t2.length) {
\r
507 } else if (t1.length > t2.length) {
\r
512 //return t1.length - t2.length);
\r