| Parametrization-for-Z-Axis-Solution.java | Subtyping-for-Z-Axis-Solution.java | ||
| 1 | import java.util.List; | 1 | import java.util.List; |
| 2 | import java.util.Locale; | 2 | import java.util.Locale; |
| 3 | import java.util.stream.Stream; | 3 | import java.util.stream.Stream; |
| 4 | 4 | ||
| 5 | /* **************************************** | 5 | /* **************************************** |
| 6 | * 1) CORE LANGUAGE-SPECIFIC TYPES (AXIS Z) | 6 | * 1) CORE LANGUAGE-SPECIFIC TYPES (AXIS Z) |
| 7 | ******************************************/ | 7 | ******************************************/ |
| 8 | 8 | ||
| 9 | /* | 9 | /* |
| 10 | * 1.A) PART OF SPEECH | 10 | * 1.A) PART OF SPEECH |
| 11 | * https://en.wikipedia.org/wiki/Part_of_speech | 11 | * https://en.wikipedia.org/wiki/Part_of_speech |
| 12 | */ | 12 | */ |
| 13 | 13 | ||
| 14 | // | 14 | // |
| 15 | // 1.A.I) ENGLISH PART OF SPEECH | 15 | // 1.A.I) ENGLISH PART OF SPEECH |
| 16 | // | 16 | // |
| 17 | 17 | ||
| 18 | /** | 18 | /** |
| 19 | * Implementing classes consist of one or more part-of-speech tags (each tag provided by a different tagger). | 19 | * Implementing classes consist of one or more part-of-speech tags (each tag provided by a different tagger). |
| 20 | */ | 20 | */ |
| 21 | interface PartOfSpeech { | 21 | interface PartOfSpeech { |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | class EnglishPartOfSpeech implements PartOfSpeech { | 24 | class EnglishPartOfSpeech implements PartOfSpeech { |
| 25 | private final Type1EnglishTag type1Tag = Type1EnglishTag.TO; // SIMPLIFICATION | 25 | private final Type1EnglishTag type1Tag = Type1EnglishTag.TO; // SIMPLIFICATION |
| 26 | private final Type2EnglishTag type2Tag = Type2EnglishTag.GENERAL_PREPOSITION; // SIMPLIFICATION | 26 | private final Type2EnglishTag type2Tag = Type2EnglishTag.GENERAL_PREPOSITION; // SIMPLIFICATION |
| 27 | 27 | ||
| 28 | boolean isInfinitiveMarker() { | 28 | boolean isInfinitiveMarker() { |
| 29 | return type1Tag == Type1EnglishTag.TO && type2Tag == Type2EnglishTag.INFINITIVE_MARKER; | 29 | return type1Tag == Type1EnglishTag.TO && type2Tag == Type2EnglishTag.INFINITIVE_MARKER; |
| 30 | } | 30 | } |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | enum Type1EnglishTag { | 33 | enum Type1EnglishTag { |
| 34 | // SIMPLIFICATION (partial content) | 34 | // SIMPLIFICATION (partial content) |
| 35 | TO // "to" as a preposition or an infinitive marker | 35 | TO // "to" as a preposition or an infinitive marker |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | enum Type2EnglishTag { | 38 | enum Type2EnglishTag { |
| 39 | // SIMPLIFICATION (partial content) | 39 | // SIMPLIFICATION (partial content) |
| 40 | GENERAL_PREPOSITION, // e.g. "to" in "I said to him that..." | 40 | GENERAL_PREPOSITION, // e.g. "to" in "I said to him that..." |
| 41 | INFINITIVE_MARKER // "to", e.g. in "I want to ask." | 41 | INFINITIVE_MARKER // "to", e.g. in "I want to ask." |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | // | 44 | // |
| 45 | // 1.A.II) GERMAN PART OF SPEECH | 45 | // 1.A.II) GERMAN PART OF SPEECH |
| 46 | // | 46 | // |
| 47 | 47 | ||
| 48 | class GermanPartOfSpeech implements PartOfSpeech { | 48 | class GermanPartOfSpeech implements PartOfSpeech { |
| 49 | private final Type1GermanTag type1Tag = Type1GermanTag.POSTPOSITION; // SIMPLIFICATION | 49 | private final Type1GermanTag type1Tag = Type1GermanTag.POSTPOSITION; // SIMPLIFICATION |
| 50 | 50 | ||
| 51 | boolean isPostposition() { | 51 | boolean isPostposition() { |
| 52 | return type1Tag == Type1GermanTag.POSTPOSITION; | 52 | return type1Tag == Type1GermanTag.POSTPOSITION; |
| 53 | } | 53 | } |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | enum Type1GermanTag { | 56 | enum Type1GermanTag { |
| 57 | // SIMPLIFICATION (partial content) | 57 | // SIMPLIFICATION (partial content) |
| 58 | POSTPOSITION, // e.g. "zufolge" | 58 | POSTPOSITION, // e.g. "zufolge" |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | /* | 61 | /* |
| 62 | * 1.B) CONSTITUENT | 62 | * 1.B) CONSTITUENT |
| 63 | * https://en.wikipedia.org/wiki/Phrase_structure_grammar | 63 | * https://en.wikipedia.org/wiki/Phrase_structure_grammar |
| 64 | */ | 64 | */ |
| 65 | interface ConstituentType { | 65 | interface ConstituentType { |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | enum EnglishConstituentType implements ConstituentType { | 68 | enum EnglishConstituentType implements ConstituentType { |
| 69 | // SIMPLIFICATION (partial content) | 69 | // SIMPLIFICATION (partial content) |
| 70 | VERB_PHRASE, // e.g. "is doing nice" | 70 | VERB_PHRASE, // e.g. "is doing nice" |
| 71 | WH_WORD_NOUN_PHRASE // e.g. "whose daughter" | 71 | WH_WORD_NOUN_PHRASE // e.g. "whose daughter" |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | // enum GermanConstituentType missing as of yet | 74 | // enum GermanConstituentType missing as of yet |
| 75 | 75 | ||
| 76 | /* | 76 | /* |
| 77 | * 1.C) DEPENDENCY | 77 | * 1.C) DEPENDENCY |
| 78 | * https://en.wikipedia.org/wiki/Dependency_grammar | 78 | * https://en.wikipedia.org/wiki/Dependency_grammar |
| 79 | */ | 79 | */ |
| 80 | interface DependencyType { | 80 | interface DependencyType { |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | enum EnglishDependencyType implements DependencyType { | 83 | enum EnglishDependencyType implements DependencyType { |
| 84 | // SIMPLIFICATION (partial content) | 84 | // SIMPLIFICATION (partial content) |
| 85 | NOMINAL_SUBJECT, // e.g. "he" <- "walks" in "He walks." | 85 | NOMINAL_SUBJECT, // e.g. "he" <- "walks" in "He walks." |
| 86 | PHRASAL_VERB_PARTICLE // e.g. "shut" -> "down" in "It was shut down." | 86 | PHRASAL_VERB_PARTICLE // e.g. "shut" -> "down" in "It was shut down." |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | // enum GermanDependencyType missing as of yet | 89 | // enum GermanDependencyType missing as of yet |
| 90 | 90 | ||
| 91 | /* *************** | 91 | /* *************** |
| 92 | * 2) HELPER TYPES | 92 | * 2) HELPER TYPES |
| 93 | *****************/ | 93 | *****************/ |
| 94 | interface TextualRange { | 94 | interface TextualRange { |
| 95 | String text(); | 95 | String text(); |
| 96 | 96 | ||
| 97 | int startOffset(); // inclusive | 97 | int startOffset(); // inclusive |
| 98 | 98 | ||
| 99 | int endOffset(); // exclusive | 99 | int endOffset(); // exclusive |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | interface Seq { | 102 | interface Seq { |
| 103 | 103 | ||
| 104 | /** | 104 | /** |
| 105 | * Stub for <a href="https://www.jooq.org/products/jOOλ/javadoc/0.9.12/org/jooq/lambda/Seq.html#ofType-java.util.str eam.Stream-java.lang.Class-">Seq.ofType()</a>. | 105 | * Stub for <a href="https://www.jooq.org/products/jOOλ/javadoc/0.9.12/org/jooq/lambda/Seq.html#ofType-java.util.str eam.Stream-java.lang.Class-">Seq.ofType()</a>. |
| 106 | */ | 106 | */ |
| 107 | @SuppressWarnings("unchecked") | 107 | @SuppressWarnings("unchecked") |
| 108 | static <T, U> Stream<U> ofType(Stream<? extends T> stream, Class<? extends U> type) { | 108 | static <T, U> Stream<U> ofType(Stream<? extends T> stream, Class<? extends U> type) { |
| 109 | return stream.filter(type::isInstance).map(t -> (U) t); | 109 | return stream.filter(type::isInstance).map(t -> (U) t); |
| 110 | } | 110 | } |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | /** | 113 | /** |
| 114 | * Defines the stage of text processing (AXIS Y). | 114 | * Defines the stage of text processing (AXIS Y). |
| 115 | */ | 115 | */ |
| 116 | enum Stage { | 116 | enum Stage { |
| 117 | UNPROCESSED, | 117 | UNPROCESSED, |
| 118 | TOKENIZED, // tokens present | 118 | TOKENIZED, // tokens present |
| 119 | SENTENCIZED, // sentences present | 119 | SENTENCIZED, // sentences present |
| 120 | TAGGED, // parts of speech present | 120 | TAGGED, // parts of speech present |
| 121 | PARSED // syntax present | 121 | PARSED // syntax present |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | /* *************************** | 124 | /* *************************** |
| 125 | * 3) SOLUTION USING SUBTYPING | 125 | * 3) SOLUTION USING SUBTYPING |
| 126 | *****************************/ | 126 | *****************************/ |
| 127 | 127 | ||
| 128 | /* | 128 | /* |
| 129 | * 3.A) GENERIC HIERARCHY (AXIS Y OVER X) | 129 | * 3.A) GENERIC HIERARCHY (AXIS Y OVER X) |
| 130 | */ | 130 | */ |
| 131 | 131 | ||
| 132 | // | 132 | // |
| 133 | // 3.A.I) UNPROCESSED | 133 | // 3.A.I) UNPROCESSED |
| 134 | // | 134 | // |
| 135 | interface LocalizedText extends TextualRange { | 135 | interface LocalizedText extends TextualRange { |
| 136 | Locale locale(); | 136 | Locale locale(); |
| 137 | 137 | ||
| 138 | default Stage maxStage() { | 138 | default Stage maxStage() { |
| 139 | return Stage.UNPROCESSED; | 139 | return Stage.UNPROCESSED; |
| 140 | } | 140 | } |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | // | 143 | // |
| 144 | // 3.A.II) WITH TOKENS | 144 | // 3.A.II) WITH TOKENS |
| 145 | // | 145 | // |
| 146 | enum TokenType { | 146 | enum TokenType { |
| 147 | WORD, NUMERIC // SIMPLIFICATION | 147 | WORD, NUMERIC // SIMPLIFICATION |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | interface Token extends TextualRange { | 150 | interface Token extends TextualRange { |
| 151 | TokenType tokenType(); | 151 | TokenType tokenType(); |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | interface TokenizedText extends LocalizedText { | 154 | interface TokenizedText extends LocalizedText { |
| 155 | Stream<? extends Token> tokenStream(); | 155 | Stream<? extends Token> tokenStream(); |
| 156 | 156 | ||
| 157 | int tokenCount(); | 157 | int tokenCount(); |
| 158 | 158 | ||
| 159 | @Override | 159 | @Override |
| 160 | default Stage maxStage() { | 160 | default Stage maxStage() { |
| 161 | return Stage.TOKENIZED; | 161 | return Stage.TOKENIZED; |
| 162 | } | 162 | } |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | // | 165 | // |
| 166 | // 3.A.III) WITH SENTENCES | 166 | // 3.A.III) WITH SENTENCES |
| 167 | // | 167 | // |
| 168 | interface Sentence extends TextualRange { | 168 | interface Sentence extends TextualRange { |
| 169 | List<? extends Token> tokens(); | 169 | List<? extends Token> tokens(); |
| 170 | 170 | ||
| 171 | default Stage stage() { | 171 | default Stage stage() { |
| 172 | return Stage.SENTENCIZED; | 172 | return Stage.SENTENCIZED; |
| 173 | } | 173 | } |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | interface SentencizedText extends TokenizedText { | 176 | interface SentencizedText extends TokenizedText { |
| 177 | List<? extends Sentence> sentences(); | 177 | List<? extends Sentence> sentences(); |
| 178 | 178 | ||
| 179 | @Override | 179 | @Override |
| 180 | default Stage maxStage() { | 180 | default Stage maxStage() { |
| 181 | return Stage.SENTENCIZED; | 181 | return Stage.SENTENCIZED; |
| 182 | } | 182 | } |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | // | 185 | // |
| 186 | // 3.A.IV) WITH PARTS OF SPEECH | 186 | // 3.A.IV) WITH PARTS OF SPEECH |
| 187 | // | 187 | // |
| 188 | interface TaggedToken<P extends PartOfSpeech> extends Token { | 188 | interface TaggedToken extends Token { |
| 189 | P partOfSpeech(); // see section 1.A above | 189 | PartOfSpeech partOfSpeech(); // see section 1.A above |
| 190 | } | 190 | } |
| 191 | 191 | ||
| 192 | interface TaggedSentence<P extends PartOfSpeech> extends Sentence { | 192 | interface TaggedSentence extends Sentence { |
| 193 | @Override | 193 | @Override |
| 194 | List<? extends TaggedToken<P>> tokens(); | 194 | List<? extends TaggedToken> tokens(); |
| 195 | 195 | ||
| 196 | @Override | 196 | @Override |
| 197 | default Stage stage() { | 197 | default Stage stage() { |
| 198 | return Stage.TAGGED; | 198 | return Stage.TAGGED; |
| 199 | } | 199 | } |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | interface TaggedText<P extends PartOfSpeech> extends SentencizedText { | 202 | interface TaggedText extends SentencizedText { |
| 203 | // some (or even all) sentences may remain untagged | 203 | // some (or even all) sentences may remain untagged |
| 204 | Stream<? extends TaggedSentence<P>> taggedSentenceStream(); | 204 | Stream<? extends TaggedSentence> taggedSentenceStream(); |
| 205 | 205 | ||
| 206 | @Override | 206 | @Override |
| 207 | default Stage maxStage() { | 207 | default Stage maxStage() { |
| 208 | return Stage.TAGGED; | 208 | return Stage.TAGGED; |
| 209 | } | 209 | } |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | // | 212 | // |
| 213 | // 3.A.V) WITH SYNTAX | 213 | // 3.A.V) WITH SYNTAX |
| 214 | // | 214 | // |
| 215 | interface Constituent<C extends ConstituentType, P extends PartOfSpeech> { | 215 | interface Constituent { |
| 216 | Stream<? extends Constituent<C, P>> childStream(); | 216 | Stream<? extends Constituent> childStream(); |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | interface ConstituentBranch<C extends ConstituentType, P extends PartOfSpeech> extends Constituent<C, P> { | 219 | interface ConstituentBranch extends Constituent { |
| 220 | C type(); // see section 1.B above | 220 | ConstituentType type(); // see section 1.B above |
| 221 | } | 221 | } |
| 222 | 222 | ||
| 223 | interface ConstituentLeaf<C extends ConstituentType, P extends PartOfSpeech> extends Constituent<C, P> { | 223 | interface ConstituentLeaf extends Constituent { |
| 224 | TaggedToken<P> token(); | 224 | TaggedToken token(); |
| 225 | 225 | ||
| 226 | @Override | 226 | @Override |
| 227 | default Stream<? extends Constituent<C, P>> childStream() { | 227 | default Stream<? extends Constituent> childStream() { |
| 228 | return Stream.empty(); | 228 | return Stream.empty(); |
| 229 | } | 229 | } |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | interface Dependency<D extends DependencyType, P extends PartOfSpeech> { | 232 | interface Dependency { |
| 233 | D type(); // see section 1.C above | 233 | DependencyType type(); // see section 1.C above |
| 234 | 234 | ||
| 235 | TaggedToken<P> governor(); | 235 | TaggedToken governor(); |
| 236 | 236 | ||
| 237 | TaggedToken<P> dependent(); | 237 | TaggedToken dependent(); |
| 238 | } | 238 | } |
| 239 | 239 | ||
| 240 | interface SentenceSyntax<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> { | 240 | interface SentenceSyntax { |
| 241 | ConstituentBranch<C, P> constituentTree(); | 241 | ConstituentBranch constituentTree(); |
| 242 | 242 | ||
| 243 | List<? extends Dependency<D, P>> dependencies(); | 243 | List<? extends Dependency> dependencies(); |
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | interface ParsedSentence<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> extends TaggedSent ence<P> { | 246 | interface ParsedSentence extends TaggedSentence { |
| 247 | SentenceSyntax<C, D, P> syntax(); | 247 | SentenceSyntax syntax(); |
| 248 | 248 | ||
| 249 | @Override | 249 | @Override |
| 250 | default Stage stage() { | 250 | default Stage stage() { |
| 251 | return Stage.PARSED; | 251 | return Stage.PARSED; |
| 252 | } | 252 | } |
| 253 | } | 253 | } |
| 254 | 254 | ||
| 255 | interface ParsedText<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> extends TaggedText<P> { | 255 | interface ParsedText extends TaggedText { |
| 256 | // some (or even all) sentences may remain unparsed | 256 | // some (or even all) sentences may remain unparsed |
| 257 | Stream<? extends ParsedSentence<C, D, P>> parsedSentenceStream(); | 257 | Stream<? extends ParsedSentence> parsedSentenceStream(); |
| 258 | 258 | ||
| 259 | @Override | 259 | @Override |
| 260 | default Stage maxStage() { | 260 | default Stage maxStage() { |
| 261 | return Stage.PARSED; | 261 | return Stage.PARSED; |
| 262 | } | 262 | } |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | /* | ||
| 266 | * 3.B) LANGUAGE-SPECIFIC HIERARCHY (AXIS Z OVER Y OVER X) | ||
| 267 | */ | ||
| 268 | |||
| 269 | // | ||
| 270 | // 3.B.I) ENGLISH | ||
| 271 | // | ||
| 272 | |||
| 273 | // 3.B.I.a) ENGLISH PARTS OF SPEECH | ||
| 274 | interface EnglishTaggedToken extends TaggedToken { | ||
| 275 | @Override | ||
| 276 | EnglishPartOfSpeech partOfSpeech(); | ||
| 277 | } | ||
| 278 | |||
| 279 | interface EnglishTaggedSentence extends TaggedSentence { | ||
| 280 | @Override | ||
| 281 | List<? extends EnglishTaggedToken> tokens(); | ||
| 282 | } | ||
| 283 | |||
| 284 | interface EnglishTaggedText extends TaggedText { | ||
| 285 | @Override // some (or even all) sentences may remain untagged | ||
| 286 | default Stream<? extends EnglishTaggedSentence> taggedSentenceStream() { | ||
| 287 | return Seq.ofType(sentences().stream(), EnglishTaggedSentence.class); | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 291 | // 3.B.I.b) ENGLISH SYNTAX | ||
| 292 | interface EnglishConstituent extends Constituent { | ||
| 293 | @Override | ||
| 294 | Stream<? extends EnglishConstituent> childStream(); | ||
| 295 | } | ||
| 296 | |||
| 297 | interface EnglishConstituentBranch extends EnglishConstituent, ConstituentBranch { | ||
| 298 | @Override | ||
| 299 | EnglishConstituentType type(); | ||
| 300 | } | ||
| 301 | |||
| 302 | interface EnglishConstituentLeaf extends EnglishConstituent, ConstituentLeaf { | ||
| 303 | @Override | ||
| 304 | EnglishTaggedToken token(); | ||
| 305 | |||
| 306 | @Override | ||
| 307 | default Stream<? extends EnglishConstituent> childStream() { | ||
| 308 | return Stream.empty(); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | |||
| 312 | interface EnglishDependency extends Dependency { | ||
| 313 | @Override | ||
| 314 | EnglishDependencyType type(); | ||
| 315 | |||
| 316 | @Override | ||
| 317 | EnglishTaggedToken governor(); | ||
| 318 | |||
| 319 | @Override | ||
| 320 | EnglishTaggedToken dependent(); | ||
| 321 | } | ||
| 322 | |||
| 323 | interface EnglishSentenceSyntax extends SentenceSyntax { | ||
| 324 | @Override | ||
| 325 | EnglishConstituentBranch constituentTree(); | ||
| 326 | |||
| 327 | @Override | ||
| 328 | List<? extends EnglishDependency> dependencies(); | ||
| 329 | } | ||
| 330 | |||
| 331 | interface EnglishParsedSentence extends EnglishTaggedSentence, ParsedSentence { | ||
| 332 | @Override | ||
| 333 | EnglishSentenceSyntax syntax(); | ||
| 334 | } | ||
| 335 | |||
| 336 | interface EnglishParsedText extends EnglishTaggedText, ParsedText { | ||
| 337 | @Override // some (or even all) sentences may remain unparsed | ||
| 338 | default Stream<? extends EnglishParsedSentence> parsedSentenceStream() { | ||
| 339 | return Seq.ofType(sentences().stream(), EnglishParsedSentence.class); | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | // | ||
| 344 | // 3.B.II) GERMAN | ||
| 345 | // | ||
| 346 | |||
| 347 | // 3.B.II.a) GERMAN PARTS OF SPEECH | ||
| 348 | interface GermanTaggedToken extends TaggedToken { | ||
| 349 | @Override | ||
| 350 | GermanPartOfSpeech partOfSpeech(); | ||
| 351 | } | ||
| 352 | |||
| 353 | interface GermanTaggedSentence extends TaggedSentence { | ||
| 354 | @Override | ||
| 355 | List<? extends GermanTaggedToken> tokens(); | ||
| 356 | } | ||
| 357 | |||
| 358 | interface GermanTaggedText extends TaggedText { | ||
| 359 | @Override | ||
| 360 | default Stream<? extends GermanTaggedSentence> taggedSentenceStream() { | ||
| 361 | return Seq.ofType(sentences().stream(), GermanTaggedSentence.class); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | // 3.B.II.b) GERMAN SYNTAX (missing as of yet) | ||
| 366 | |||
| 367 | |||
| 265 | /* *********************** | 368 | /* *********************** |
| 266 | * 4) PRODUCERS OF THE API | 369 | * 4) PRODUCERS OF THE API |
| 267 | *************************/ | 370 | *************************/ |
| 268 | 371 | ||
| 269 | interface TextProcessingRequest { // SIMPLIFICATION | 372 | interface TextProcessingRequest { // SIMPLIFICATION |
| 270 | Locale locale(); | 373 | Locale locale(); |
| 271 | 374 | ||
| 272 | String text(); | 375 | String text(); |
| 273 | 376 | ||
| 274 | Stage targetStage(); | 377 | Stage targetStage(); |
| 275 | } | 378 | } |
| 276 | 379 | ||
| 277 | interface TextProcessor { | 380 | interface TextProcessor { |
| 278 | LocalizedText processText(TextProcessingRequest request); | 381 | LocalizedText processText(TextProcessingRequest request); |
| 279 | } | 382 | } |
| 280 | 383 | ||
| 281 | /* *********************** | 384 | /* *********************** |
| 282 | * 5) CONSUMERS OF THE API | 385 | * 5) CONSUMERS OF THE API |
| 283 | *************************/ | 386 | *************************/ |
| 284 | 387 | ||
| 285 | /* | 388 | /* |
| 286 | * 5.A) DIRECT CONSUMERS | 389 | * 5.A) DIRECT CONSUMERS |
| 287 | */ | 390 | */ |
| 288 | 391 | ||
| 289 | class GermanPostpositionChecker { | 392 | class GermanPostpositionChecker { |
| 290 | void checkTaggedText(TaggedText<GermanPartOfSpeech> text) { | 393 | void checkTaggedText(GermanTaggedText text) { |
| 291 | text.taggedSentenceStream().forEach(this::checkTaggedSentence); | 394 | text.taggedSentenceStream().forEach(this::checkTaggedSentence); |
| 292 | } | 395 | } |
| 293 | 396 | ||
| 294 | private void checkTaggedSentence(TaggedSentence<GermanPartOfSpeech> sentence) { | 397 | private void checkTaggedSentence(GermanTaggedSentence sentence) { |
| 295 | sentence.tokens().stream() | 398 | sentence.tokens().stream() |
| 296 | .filter(this::isPostposition) | 399 | .filter(this::isPostposition) |
| 297 | .forEach(this::checkPostposition); | 400 | .forEach(this::checkPostposition); |
| 298 | } | 401 | } |
| 299 | 402 | ||
| 300 | private boolean isPostposition(TaggedToken<GermanPartOfSpeech> token) { | 403 | private boolean isPostposition(GermanTaggedToken token) { |
| 301 | return token.partOfSpeech().isPostposition(); | 404 | return token.partOfSpeech().isPostposition(); |
| 302 | } | 405 | } |
| 303 | 406 | ||
| 304 | 407 | ||
| 305 | private void checkPostposition(TaggedToken<GermanPartOfSpeech> token) { | 408 | private void checkPostposition(GermanTaggedToken token) { |
| 306 | // logic | 409 | // logic |
| 307 | } | 410 | } |
| 308 | } | 411 | } |
| 309 | 412 | ||
| 310 | class EnglishNominalSubjectChecker { | 413 | class EnglishNominalSubjectChecker { |
| 311 | 414 | ||
| 312 | void checkNominalSubjects(ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech> text) { | 415 | void checkNominalSubjects(EnglishParsedText text) { |
| 313 | text.parsedSentenceStream().forEach(this::checkNominalSubjects); | 416 | text.parsedSentenceStream().forEach(this::checkNominalSubjects); |
| 314 | } | 417 | } |
| 315 | 418 | ||
| 316 | private void checkNominalSubjects(ParsedSentence<?, EnglishDependencyType, EnglishPartOfSpeech> sentence) { | 419 | private void checkNominalSubjects(EnglishParsedSentence sentence) { |
| 317 | sentence.syntax().dependencies().stream() | 420 | sentence.syntax().dependencies().stream() |
| 318 | .filter(this::isCertainTypeOfNominalSubjectDependency) | 421 | .filter(this::isCertainTypeOfNominalSubjectDependency) |
| 319 | .forEach(this::checkNominalSubject); | 422 | .forEach(this::checkNominalSubject); |
| 320 | } | 423 | } |
| 321 | 424 | ||
| 322 | private boolean isCertainTypeOfNominalSubjectDependency(Dependency<EnglishDependencyType, EnglishPartOfSpeech> depen dency) { | 425 | private boolean isCertainTypeOfNominalSubjectDependency(EnglishDependency dependency) { |
| 323 | return dependency.type() == EnglishDependencyType.NOMINAL_SUBJECT && true; // SIMPLIFICATION | 426 | return dependency.type() == EnglishDependencyType.NOMINAL_SUBJECT && true; // SIMPLIFICATION |
| 324 | } | 427 | } |
| 325 | 428 | ||
| 326 | private void checkNominalSubject(Dependency<EnglishDependencyType, EnglishPartOfSpeech> dependency) { | 429 | private void checkNominalSubject(EnglishDependency dependency) { |
| 327 | checkNominalGovernor(dependency.governor()); | 430 | checkNominalGovernor(dependency.governor()); |
| 328 | checkNominalDependent(dependency.dependent()); | 431 | checkNominalDependent(dependency.dependent()); |
| 329 | } | 432 | } |
| 330 | 433 | ||
| 331 | private void checkNominalGovernor(TaggedToken<EnglishPartOfSpeech> governor) { | 434 | private void checkNominalGovernor(EnglishTaggedToken governor) { |
| 332 | if (checkGovernorPartOfSpeech(governor.partOfSpeech())) { | 435 | if (checkGovernorPartOfSpeech(governor.partOfSpeech())) { |
| 333 | // logic | 436 | // logic |
| 334 | } | 437 | } |
| 335 | } | 438 | } |
| 336 | 439 | ||
| 337 | private void checkNominalDependent(TaggedToken<EnglishPartOfSpeech> dependent) { | 440 | private void checkNominalDependent(EnglishTaggedToken dependent) { |
| 338 | if (checkDependentPartOfSpeech(dependent.partOfSpeech())) { | 441 | if (checkDependentPartOfSpeech(dependent.partOfSpeech())) { |
| 339 | // logic | 442 | // logic |
| 340 | } | 443 | } |
| 341 | } | 444 | } |
| 342 | 445 | ||
| 343 | private boolean checkGovernorPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { | 446 | private boolean checkGovernorPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { |
| 344 | return true; // SIMPLIFICATION | 447 | return true; // SIMPLIFICATION |
| 345 | } | 448 | } |
| 346 | 449 | ||
| 347 | private boolean checkDependentPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { | 450 | private boolean checkDependentPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { |
| 348 | return true; // SIMPLIFICATION | 451 | return true; // SIMPLIFICATION |
| 349 | } | 452 | } |
| 350 | } | 453 | } |
| 351 | 454 | ||
| 352 | class EnglishRelevantConstituentHandler { | 455 | class EnglishRelevantConstituentHandler { |
| 353 | 456 | ||
| 354 | void handleRelevantConstituents(ParsedText<EnglishConstituentType, ?, EnglishPartOfSpeech> text) { | 457 | void handleRelevantConstituents(EnglishParsedText text) { |
| 355 | text.parsedSentenceStream().forEach(this::handleRelevantConstiuents); | 458 | text.parsedSentenceStream().forEach(this::handleRelevantConstiuents); |
| 356 | } | 459 | } |
| 357 | 460 | ||
| 358 | private void handleRelevantConstiuents(ParsedSentence<EnglishConstituentType, ?, EnglishPartOfSpeech> sentence) { | 461 | private void handleRelevantConstiuents(EnglishParsedSentence sentence) { |
| 359 | Constituents.flatConstituentStream(sentence.syntax().constituentTree()) | 462 | EnglishConstituents.flatConstituentStream(sentence.syntax().constituentTree()) |
| 360 | .filter(this::isRelevantConstituent) | 463 | .filter(this::isRelevantConstituent) |
| 361 | .forEach(this::handleRelevantConstituent); | 464 | .forEach(this::handleRelevantConstituent); |
| 362 | } | 465 | } |
| 363 | 466 | ||
| 364 | private boolean isRelevantConstituent(Constituent<EnglishConstituentType, EnglishPartOfSpeech> constituent) { | 467 | private boolean isRelevantConstituent(EnglishConstituent constituent) { |
| 365 | return true; // SIMPLIFICATION | 468 | return true; // SIMPLIFICATION |
| 366 | } | 469 | } |
| 367 | 470 | ||
| 368 | private void handleRelevantConstituent(Constituent<EnglishConstituentType, EnglishPartOfSpeech> constituent) { | 471 | private void handleRelevantConstituent(EnglishConstituent constituent) { |
| 369 | // logic | 472 | // logic |
| 370 | } | 473 | } |
| 371 | } | 474 | } |
| 372 | 475 | ||
| 373 | class Constituents { | 476 | class EnglishConstituents { |
| 374 | /** | 477 | /** |
| 375 | * Returns the constituent and all its descendants as a flattended Stream. | 478 | * Returns the constituent and all its descendants as a flattended Stream. |
| 376 | */ | 479 | */ |
| 377 | static <C extends ConstituentType, P extends PartOfSpeech> Stream<Constituent<C, P>> flatConstituentStream(Constitue nt<C, P> constituent) { | 480 | static Stream<EnglishConstituent> flatConstituentStream(EnglishConstituent constituent) { |
| 378 | return Stream.concat( | 481 | return Stream.concat( |
| 379 | Stream.of(constituent), | 482 | Stream.of(constituent), |
| 380 | constituent.childStream().flatMap(Constituents::flatConstituentStream) | 483 | constituent.childStream().flatMap(EnglishConstituents::flatConstituentStream) |
| 381 | ); | 484 | ); |
| 382 | } | 485 | } |
| 383 | } | 486 | } |
| 384 | 487 | ||
| 385 | 488 | ||
| 386 | /* | 489 | /* |
| 387 | * 5.B) INDIRECT CONSUMERS | 490 | * 5.B) INDIRECT CONSUMERS |
| 388 | */ | 491 | */ |
| 389 | interface ProcessedTextConsumer { // SIMPLIFICATION | 492 | interface ProcessedTextConsumer { // SIMPLIFICATION |
| 390 | Locale supportedLocale(); | 493 | Locale supportedLocale(); |
| 391 | 494 | ||
| 392 | Stage requiredStage(); | 495 | Stage requiredStage(); |
| 393 | 496 | ||
| 394 | void consume(LocalizedText text); | 497 | void consume(LocalizedText text); |
| 395 | } | 498 | } |
| 396 | 499 | ||
| 397 | class SampleUniversalTokenizedTextConsumer implements ProcessedTextConsumer { | 500 | class SampleUniversalTokenizedTextConsumer implements ProcessedTextConsumer { |
| 398 | 501 | ||
| 399 | @Override | 502 | @Override |
| 400 | public Locale supportedLocale() { | 503 | public Locale supportedLocale() { |
| 401 | return Locale.ROOT; | 504 | return Locale.ROOT; |
| 402 | } | 505 | } |
| 403 | 506 | ||
| 404 | @Override | 507 | @Override |
| 405 | public Stage requiredStage() { | 508 | public Stage requiredStage() { |
| 406 | return Stage.TOKENIZED; | 509 | return Stage.TOKENIZED; |
| 407 | } | 510 | } |
| 408 | 511 | ||
| 409 | @Override | 512 | @Override |
| 410 | public void consume(LocalizedText text) { | 513 | public void consume(LocalizedText text) { |
| 411 | handleTokenizedText((TokenizedText) text); | 514 | handleTokenizedText((TokenizedText) text); |
| 412 | } | 515 | } |
| 413 | 516 | ||
| 414 | private void handleTokenizedText(TokenizedText tokenizedText) { | 517 | private void handleTokenizedText(TokenizedText tokenizedText) { |
| 415 | // logic | 518 | // logic |
| 416 | } | 519 | } |
| 417 | } | 520 | } |
| 418 | 521 | ||
| 419 | class SampleGermanTaggedConsumer implements ProcessedTextConsumer { | 522 | class SampleGermanTaggedConsumer implements ProcessedTextConsumer { |
| 420 | 523 | ||
| 421 | @Override | 524 | @Override |
| 422 | public Locale supportedLocale() { | 525 | public Locale supportedLocale() { |
| 423 | return Locale.GERMAN; | 526 | return Locale.GERMAN; |
| 424 | } | 527 | } |
| 425 | 528 | ||
| 426 | @Override | 529 | @Override |
| 427 | public Stage requiredStage() { | 530 | public Stage requiredStage() { |
| 428 | return Stage.TAGGED; | 531 | return Stage.TAGGED; |
| 429 | } | 532 | } |
| 430 | 533 | ||
| 431 | @Override | 534 | @Override |
| 432 | public void consume(LocalizedText text) { | 535 | public void consume(LocalizedText text) { |
| 433 | // UNSAFE UNCHECKED CAST! | 536 | GermanTaggedText germanTaggedText = (GermanTaggedText) text; // SAFE CAST |
| 434 | TaggedText<GermanPartOfSpeech> germanTaggedText = (TaggedText<GermanPartOfSpeech>) text; | ||
| 435 | new GermanPostpositionChecker().checkTaggedText(germanTaggedText); | 537 | new GermanPostpositionChecker().checkTaggedText(germanTaggedText); |
| 436 | } | 538 | } |
| 437 | } | 539 | } |
| 438 | 540 | ||
| 439 | class SampleEnglishParsedTextConsumer implements ProcessedTextConsumer { | 541 | class SampleEnglishParsedTextConsumer implements ProcessedTextConsumer { |
| 440 | 542 | ||
| 441 | @Override | 543 | @Override |
| 442 | public Locale supportedLocale() { | 544 | public Locale supportedLocale() { |
| 443 | return Locale.ENGLISH; | 545 | return Locale.ENGLISH; |
| 444 | } | 546 | } |
| 445 | 547 | ||
| 446 | @Override | 548 | @Override |
| 447 | public Stage requiredStage() { | 549 | public Stage requiredStage() { |
| 448 | return Stage.PARSED; | 550 | return Stage.PARSED; |
| 449 | } | 551 | } |
| 450 | 552 | ||
| 451 | @Override | 553 | @Override |
| 452 | public void consume(LocalizedText text) { | 554 | public void consume(LocalizedText text) { |
| 453 | // UNSAFE UNCHECKED CAST! | 555 | EnglishParsedText englishParsedText = (EnglishParsedText) text; // SAFE CAST |
| 454 | ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech> englishParsedText | ||
| 455 | = (ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech>) text; | ||
| 456 | new EnglishNominalSubjectChecker().checkNominalSubjects(englishParsedText); | 556 | new EnglishNominalSubjectChecker().checkNominalSubjects(englishParsedText); |
| 457 | } | 557 | } |
| 458 | } | 558 | } |
| 459 | 559 | ||
| 460 | 560 | ||