You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

PolarityBasic.java 3.0 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. package eshore.cn.it.sentiment;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import com.aliasi.classify.Classification;
  5. import com.aliasi.classify.Classified;
  6. import com.aliasi.classify.DynamicLMClassifier;
  7. import com.aliasi.lm.NGramProcessLM;
  8. import com.aliasi.util.Files;
  9. public class PolarityBasic {
  10. File mPolarityDir;
  11. String[] mCategories;
  12. DynamicLMClassifier<NGramProcessLM> mClassifier;
  13. public PolarityBasic(String[] args) {
  14. mPolarityDir = new File("data/polarity_corpus","txt_sentoken");
  15. mCategories = mPolarityDir.list();
  16. int nGram = 8;
  17. mClassifier
  18. = DynamicLMClassifier
  19. .createNGramProcess(mCategories,nGram);
  20. }
  21. public static void main(String[] args) {
  22. try {
  23. new PolarityBasic(args).run();
  24. } catch (Throwable t) {
  25. System.out.println("Thrown: " + t);
  26. t.printStackTrace(System.out);
  27. }
  28. }
  29. private void run() throws ClassNotFoundException,
  30. IOException {
  31. train();
  32. evaluate();
  33. }
  34. private void train() throws IOException {
  35. for (int i = 0; i < mCategories.length; ++i) {
  36. String category = mCategories[i];
  37. Classification classification
  38. = new Classification(category);
  39. File dir = new File(mPolarityDir, mCategories[i]);
  40. File[] trainFiles = dir.listFiles();
  41. for (int j = 0; j < trainFiles.length; ++j) {
  42. File trainFile = trainFiles[j];
  43. if (isTrainingFile(trainFile)) {
  44. String review
  45. = Files.readFromFile(trainFile,"ISO-8859-1");
  46. Classified<CharSequence> classified
  47. = new Classified<CharSequence>(review,classification);
  48. mClassifier.handle(classified);
  49. }
  50. }
  51. }
  52. }
  53. boolean isTrainingFile(File file) {
  54. return file.getName().charAt(2) != '9'; // test on fold 9
  55. }
  56. void evaluate() throws IOException {
  57. int numTests = 0;
  58. int numCorrect = 0;
  59. for (int i = 0; i < mCategories.length; ++i) {
  60. String category = mCategories[i];
  61. File file = new File(mPolarityDir,mCategories[i]);
  62. File[] testFiles = file.listFiles();
  63. for (int j = 0; j < testFiles.length; ++j) {
  64. File testFile = testFiles[j];
  65. if (!isTrainingFile(testFile)) {
  66. String review
  67. = Files.readFromFile(testFile,"ISO-8859-1");
  68. ++numTests;
  69. Classification classification
  70. = mClassifier.classify(review);
  71. String resultCategory
  72. = classification.bestCategory();
  73. if (resultCategory.equals(category))
  74. ++numCorrect;
  75. }
  76. }
  77. }
  78. System.out.println(" # Test Cases="
  79. + numTests);
  80. System.out.println(" # Correct="
  81. + numCorrect);
  82. System.out.println(" % Correct="
  83. + ((double)numCorrect)
  84. /(double)numTests);
  85. }
  86. }

基于自然语言处理的情感分析工具

Contributors (1)