You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

AggregationRecognition.java 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. package addition;
  2. import nlp.ds.DependencyTree;
  3. import nlp.ds.DependencyTreeNode;
  4. import nlp.ds.Word;
  5. import qa.Globals;
  6. import rdf.Sparql;
  7. import rdf.Triple;
  8. import log.QueryLogger;
  9. public class AggregationRecognition {
  10. // Numbers
  11. static String x[]={"zero","one","two","three","four","five","six","seven","eight","nine"};
  12. static String y[]={"ten","eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen","eighteen","nineteen"};
  13. static String z[]={"twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"};
  14. static int b;
  15. public static Integer translateNumbers(String str) // 1~100
  16. {
  17. int flag;
  18. try {
  19. b=Integer.valueOf(str);
  20. flag=1;
  21. }
  22. catch (Exception e){
  23. flag=2;
  24. }
  25. int i,j;
  26. switch(flag)
  27. {
  28. case 1:
  29. return b;
  30. case 2: // Words need to be translated into numbers
  31. for(i=0;i<8;i++) // 20~99
  32. {
  33. for(j=0;j<10;j++)
  34. {
  35. String str1=z[i],str2=x[j];
  36. if(str.equals((str1))){
  37. return i*10+20; // 1x
  38. }
  39. else if(str.equals((str1+" "+str2))){
  40. return i*10+j+20;
  41. }
  42. }
  43. }
  44. for(i=0;i<10;i++){
  45. if(str.equals(x[i])){
  46. return i;
  47. }
  48. else if(str.equals(y[i])){
  49. return 10+i;
  50. }
  51. }
  52. System.out.println("Warning: Can not Translate Number: " + str);
  53. }
  54. return 1;
  55. }
  56. public void recognize(QueryLogger qlog)
  57. {
  58. DependencyTree ds = qlog.s.dependencyTreeStanford;
  59. if(qlog.isMaltParserUsed)
  60. ds = qlog.s.dependencyTreeMalt;
  61. Word[] words = qlog.s.words;
  62. // how often | how many
  63. if(qlog.s.plainText.indexOf("How many")!=-1||qlog.s.plainText.indexOf("How often")!=-1||qlog.s.plainText.indexOf("how many")!=-1||qlog.s.plainText.indexOf("how often")!=-1)
  64. {
  65. for(Sparql sp: qlog.rankedSparqls)
  66. {
  67. sp.countTarget = true;
  68. // How many pages does War and Peace have? --> res:War_and_Peace dbo:numberOfPages ?n .
  69. // ?uri dbo:populationTotal ?inhabitants .
  70. for(Triple triple: sp.tripleList)
  71. {
  72. String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase();
  73. if(p.contains("number") || p.contains("total") || p.contains("calories") || p.contains("satellites"))
  74. {
  75. sp.countTarget = false;
  76. }
  77. }
  78. }
  79. }
  80. // more than [num] [node]
  81. for(DependencyTreeNode dtn: ds.nodesList)
  82. {
  83. if(dtn.word.baseForm.equals("more"))
  84. {
  85. if(dtn.father!=null && dtn.father.word.baseForm.equals("than"))
  86. {
  87. DependencyTreeNode tmp = dtn.father;
  88. if(tmp.father!=null && tmp.father.word.posTag.equals("CD") && tmp.father.father!=null && tmp.father.father.word.posTag.startsWith("N"))
  89. {
  90. DependencyTreeNode target = tmp.father.father;
  91. // Which caves have more than 3 entrances | entranceCount | filter
  92. for(Sparql sp: qlog.rankedSparqls)
  93. {
  94. if(target.father !=null && target.father.word.baseForm.equals("have"))
  95. {
  96. sp.moreThanStr = "GROUP BY ?" + qlog.target.originalForm + "\nHAVING (COUNT(?"+target.word.originalForm + ") > "+tmp.father.word.baseForm+")";
  97. }
  98. else
  99. {
  100. int num = translateNumbers(tmp.father.word.baseForm);
  101. sp.moreThanStr = "FILTER (?"+target.word.originalForm+"> " + num + ")";
  102. }
  103. }
  104. }
  105. }
  106. }
  107. }
  108. // most
  109. for(Word word: words)
  110. {
  111. if(word.baseForm.equals("most"))
  112. {
  113. Word modifiedWord = word.modifiedWord;
  114. if(modifiedWord != null)
  115. {
  116. for(Sparql sp: qlog.rankedSparqls)
  117. {
  118. // Which Indian company has the most employees? --> ... dbo:numberOfEmployees ?n . || ?employees dbo:company ...
  119. sp.mostStr = "ORDER BY DESC(COUNT(?"+modifiedWord.originalForm+"))\nOFFSET 0 LIMIT 1";
  120. for(Triple triple: sp.tripleList)
  121. {
  122. String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase();
  123. if(p.contains("number") || p.contains("total"))
  124. {
  125. sp.mostStr = "ORDER BY DESC(?"+modifiedWord.originalForm+")\nOFFSET 0 LIMIT 1";
  126. }
  127. }
  128. }
  129. }
  130. }
  131. }
  132. }
  133. public static void main(String[] args) {
  134. System.out.println(translateNumbers("Twelve"));
  135. System.out.println(translateNumbers("thirty two"));
  136. }
  137. }

GAnswer system is a natural language QA system developed by Institute of Computer Science & Techonology Data Management Lab, Peking University, led by Prof. Zou Lei. GAnswer is able to translate natural language questions to query graphs containing semant