Parametrization-for-Z-Axis-Solution.java | Subtyping-for-Z-Axis-Solution.java | ||
1 | import java.util.List; | 1 | import java.util.List; |
2 | import java.util.Locale; | 2 | import java.util.Locale; |
3 | import java.util.stream.Stream; | 3 | import java.util.stream.Stream; |
4 | 4 | ||
5 | /* **************************************** | 5 | /* **************************************** |
6 | * 1) CORE LANGUAGE-SPECIFIC TYPES (AXIS Z) | 6 | * 1) CORE LANGUAGE-SPECIFIC TYPES (AXIS Z) |
7 | ******************************************/ | 7 | ******************************************/ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * 1.A) PART OF SPEECH | 10 | * 1.A) PART OF SPEECH |
11 | * https://en.wikipedia.org/wiki/Part_of_speech | 11 | * https://en.wikipedia.org/wiki/Part_of_speech |
12 | */ | 12 | */ |
13 | 13 | ||
14 | // | 14 | // |
15 | // 1.A.I) ENGLISH PART OF SPEECH | 15 | // 1.A.I) ENGLISH PART OF SPEECH |
16 | // | 16 | // |
17 | 17 | ||
18 | /** | 18 | /** |
19 | * Implementing classes consist of one or more part-of-speech tags (each tag provided by a different tagger). | 19 | * Implementing classes consist of one or more part-of-speech tags (each tag provided by a different tagger). |
20 | */ | 20 | */ |
21 | interface PartOfSpeech { | 21 | interface PartOfSpeech { |
22 | } | 22 | } |
23 | 23 | ||
24 | class EnglishPartOfSpeech implements PartOfSpeech { | 24 | class EnglishPartOfSpeech implements PartOfSpeech { |
25 | private final Type1EnglishTag type1Tag = Type1EnglishTag.TO; // SIMPLIFICATION | 25 | private final Type1EnglishTag type1Tag = Type1EnglishTag.TO; // SIMPLIFICATION |
26 | private final Type2EnglishTag type2Tag = Type2EnglishTag.GENERAL_PREPOSITION; // SIMPLIFICATION | 26 | private final Type2EnglishTag type2Tag = Type2EnglishTag.GENERAL_PREPOSITION; // SIMPLIFICATION |
27 | 27 | ||
28 | boolean isInfinitiveMarker() { | 28 | boolean isInfinitiveMarker() { |
29 | return type1Tag == Type1EnglishTag.TO && type2Tag == Type2EnglishTag.INFINITIVE_MARKER; | 29 | return type1Tag == Type1EnglishTag.TO && type2Tag == Type2EnglishTag.INFINITIVE_MARKER; |
30 | } | 30 | } |
31 | } | 31 | } |
32 | 32 | ||
33 | enum Type1EnglishTag { | 33 | enum Type1EnglishTag { |
34 | // SIMPLIFICATION (partial content) | 34 | // SIMPLIFICATION (partial content) |
35 | TO // "to" as a preposition or an infinitive marker | 35 | TO // "to" as a preposition or an infinitive marker |
36 | } | 36 | } |
37 | 37 | ||
38 | enum Type2EnglishTag { | 38 | enum Type2EnglishTag { |
39 | // SIMPLIFICATION (partial content) | 39 | // SIMPLIFICATION (partial content) |
40 | GENERAL_PREPOSITION, // e.g. "to" in "I said to him that..." | 40 | GENERAL_PREPOSITION, // e.g. "to" in "I said to him that..." |
41 | INFINITIVE_MARKER // "to", e.g. in "I want to ask." | 41 | INFINITIVE_MARKER // "to", e.g. in "I want to ask." |
42 | } | 42 | } |
43 | 43 | ||
44 | // | 44 | // |
45 | // 1.A.II) GERMAN PART OF SPEECH | 45 | // 1.A.II) GERMAN PART OF SPEECH |
46 | // | 46 | // |
47 | 47 | ||
48 | class GermanPartOfSpeech implements PartOfSpeech { | 48 | class GermanPartOfSpeech implements PartOfSpeech { |
49 | private final Type1GermanTag type1Tag = Type1GermanTag.POSTPOSITION; // SIMPLIFICATION | 49 | private final Type1GermanTag type1Tag = Type1GermanTag.POSTPOSITION; // SIMPLIFICATION |
50 | 50 | ||
51 | boolean isPostposition() { | 51 | boolean isPostposition() { |
52 | return type1Tag == Type1GermanTag.POSTPOSITION; | 52 | return type1Tag == Type1GermanTag.POSTPOSITION; |
53 | } | 53 | } |
54 | } | 54 | } |
55 | 55 | ||
56 | enum Type1GermanTag { | 56 | enum Type1GermanTag { |
57 | // SIMPLIFICATION (partial content) | 57 | // SIMPLIFICATION (partial content) |
58 | POSTPOSITION, // e.g. "zufolge" | 58 | POSTPOSITION, // e.g. "zufolge" |
59 | } | 59 | } |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * 1.B) CONSTITUENT | 62 | * 1.B) CONSTITUENT |
63 | * https://en.wikipedia.org/wiki/Phrase_structure_grammar | 63 | * https://en.wikipedia.org/wiki/Phrase_structure_grammar |
64 | */ | 64 | */ |
65 | interface ConstituentType { | 65 | interface ConstituentType { |
66 | } | 66 | } |
67 | 67 | ||
68 | enum EnglishConstituentType implements ConstituentType { | 68 | enum EnglishConstituentType implements ConstituentType { |
69 | // SIMPLIFICATION (partial content) | 69 | // SIMPLIFICATION (partial content) |
70 | VERB_PHRASE, // e.g. "is doing nice" | 70 | VERB_PHRASE, // e.g. "is doing nice" |
71 | WH_WORD_NOUN_PHRASE // e.g. "whose daughter" | 71 | WH_WORD_NOUN_PHRASE // e.g. "whose daughter" |
72 | } | 72 | } |
73 | 73 | ||
74 | // enum GermanConstituentType missing as of yet | 74 | // enum GermanConstituentType missing as of yet |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * 1.C) DEPENDENCY | 77 | * 1.C) DEPENDENCY |
78 | * https://en.wikipedia.org/wiki/Dependency_grammar | 78 | * https://en.wikipedia.org/wiki/Dependency_grammar |
79 | */ | 79 | */ |
80 | interface DependencyType { | 80 | interface DependencyType { |
81 | } | 81 | } |
82 | 82 | ||
83 | enum EnglishDependencyType implements DependencyType { | 83 | enum EnglishDependencyType implements DependencyType { |
84 | // SIMPLIFICATION (partial content) | 84 | // SIMPLIFICATION (partial content) |
85 | NOMINAL_SUBJECT, // e.g. "he" <- "walks" in "He walks." | 85 | NOMINAL_SUBJECT, // e.g. "he" <- "walks" in "He walks." |
86 | PHRASAL_VERB_PARTICLE // e.g. "shut" -> "down" in "It was shut down." | 86 | PHRASAL_VERB_PARTICLE // e.g. "shut" -> "down" in "It was shut down." |
87 | } | 87 | } |
88 | 88 | ||
89 | // enum GermanDependencyType missing as of yet | 89 | // enum GermanDependencyType missing as of yet |
90 | 90 | ||
91 | /* *************** | 91 | /* *************** |
92 | * 2) HELPER TYPES | 92 | * 2) HELPER TYPES |
93 | *****************/ | 93 | *****************/ |
94 | interface TextualRange { | 94 | interface TextualRange { |
95 | String text(); | 95 | String text(); |
96 | 96 | ||
97 | int startOffset(); // inclusive | 97 | int startOffset(); // inclusive |
98 | 98 | ||
99 | int endOffset(); // exclusive | 99 | int endOffset(); // exclusive |
100 | } | 100 | } |
101 | 101 | ||
102 | interface Seq { | 102 | interface Seq { |
103 | 103 | ||
104 | /** | 104 | /** |
105 | * Stub for <a href="https://www.jooq.org/products/jOOλ/javadoc/0.9.12/org/jooq/lambda/Seq.html#ofType-java.util.str eam.Stream-java.lang.Class-">Seq.ofType()</a>. | 105 | * Stub for <a href="https://www.jooq.org/products/jOOλ/javadoc/0.9.12/org/jooq/lambda/Seq.html#ofType-java.util.str eam.Stream-java.lang.Class-">Seq.ofType()</a>. |
106 | */ | 106 | */ |
107 | @SuppressWarnings("unchecked") | 107 | @SuppressWarnings("unchecked") |
108 | static <T, U> Stream<U> ofType(Stream<? extends T> stream, Class<? extends U> type) { | 108 | static <T, U> Stream<U> ofType(Stream<? extends T> stream, Class<? extends U> type) { |
109 | return stream.filter(type::isInstance).map(t -> (U) t); | 109 | return stream.filter(type::isInstance).map(t -> (U) t); |
110 | } | 110 | } |
111 | } | 111 | } |
112 | 112 | ||
113 | /** | 113 | /** |
114 | * Defines the stage of text processing (AXIS Y). | 114 | * Defines the stage of text processing (AXIS Y). |
115 | */ | 115 | */ |
116 | enum Stage { | 116 | enum Stage { |
117 | UNPROCESSED, | 117 | UNPROCESSED, |
118 | TOKENIZED, // tokens present | 118 | TOKENIZED, // tokens present |
119 | SENTENCIZED, // sentences present | 119 | SENTENCIZED, // sentences present |
120 | TAGGED, // parts of speech present | 120 | TAGGED, // parts of speech present |
121 | PARSED // syntax present | 121 | PARSED // syntax present |
122 | } | 122 | } |
123 | 123 | ||
124 | /* *************************** | 124 | /* *************************** |
125 | * 3) SOLUTION USING SUBTYPING | 125 | * 3) SOLUTION USING SUBTYPING |
126 | *****************************/ | 126 | *****************************/ |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * 3.A) GENERIC HIERARCHY (AXIS Y OVER X) | 129 | * 3.A) GENERIC HIERARCHY (AXIS Y OVER X) |
130 | */ | 130 | */ |
131 | 131 | ||
132 | // | 132 | // |
133 | // 3.A.I) UNPROCESSED | 133 | // 3.A.I) UNPROCESSED |
134 | // | 134 | // |
135 | interface LocalizedText extends TextualRange { | 135 | interface LocalizedText extends TextualRange { |
136 | Locale locale(); | 136 | Locale locale(); |
137 | 137 | ||
138 | default Stage maxStage() { | 138 | default Stage maxStage() { |
139 | return Stage.UNPROCESSED; | 139 | return Stage.UNPROCESSED; |
140 | } | 140 | } |
141 | } | 141 | } |
142 | 142 | ||
143 | // | 143 | // |
144 | // 3.A.II) WITH TOKENS | 144 | // 3.A.II) WITH TOKENS |
145 | // | 145 | // |
146 | enum TokenType { | 146 | enum TokenType { |
147 | WORD, NUMERIC // SIMPLIFICATION | 147 | WORD, NUMERIC // SIMPLIFICATION |
148 | } | 148 | } |
149 | 149 | ||
150 | interface Token extends TextualRange { | 150 | interface Token extends TextualRange { |
151 | TokenType tokenType(); | 151 | TokenType tokenType(); |
152 | } | 152 | } |
153 | 153 | ||
154 | interface TokenizedText extends LocalizedText { | 154 | interface TokenizedText extends LocalizedText { |
155 | Stream<? extends Token> tokenStream(); | 155 | Stream<? extends Token> tokenStream(); |
156 | 156 | ||
157 | int tokenCount(); | 157 | int tokenCount(); |
158 | 158 | ||
159 | @Override | 159 | @Override |
160 | default Stage maxStage() { | 160 | default Stage maxStage() { |
161 | return Stage.TOKENIZED; | 161 | return Stage.TOKENIZED; |
162 | } | 162 | } |
163 | } | 163 | } |
164 | 164 | ||
165 | // | 165 | // |
166 | // 3.A.III) WITH SENTENCES | 166 | // 3.A.III) WITH SENTENCES |
167 | // | 167 | // |
168 | interface Sentence extends TextualRange { | 168 | interface Sentence extends TextualRange { |
169 | List<? extends Token> tokens(); | 169 | List<? extends Token> tokens(); |
170 | 170 | ||
171 | default Stage stage() { | 171 | default Stage stage() { |
172 | return Stage.SENTENCIZED; | 172 | return Stage.SENTENCIZED; |
173 | } | 173 | } |
174 | } | 174 | } |
175 | 175 | ||
176 | interface SentencizedText extends TokenizedText { | 176 | interface SentencizedText extends TokenizedText { |
177 | List<? extends Sentence> sentences(); | 177 | List<? extends Sentence> sentences(); |
178 | 178 | ||
179 | @Override | 179 | @Override |
180 | default Stage maxStage() { | 180 | default Stage maxStage() { |
181 | return Stage.SENTENCIZED; | 181 | return Stage.SENTENCIZED; |
182 | } | 182 | } |
183 | } | 183 | } |
184 | 184 | ||
185 | // | 185 | // |
186 | // 3.A.IV) WITH PARTS OF SPEECH | 186 | // 3.A.IV) WITH PARTS OF SPEECH |
187 | // | 187 | // |
188 | interface TaggedToken<P extends PartOfSpeech> extends Token { | 188 | interface TaggedToken extends Token { |
189 | P partOfSpeech(); // see section 1.A above | 189 | PartOfSpeech partOfSpeech(); // see section 1.A above |
190 | } | 190 | } |
191 | 191 | ||
192 | interface TaggedSentence<P extends PartOfSpeech> extends Sentence { | 192 | interface TaggedSentence extends Sentence { |
193 | @Override | 193 | @Override |
194 | List<? extends TaggedToken<P>> tokens(); | 194 | List<? extends TaggedToken> tokens(); |
195 | 195 | ||
196 | @Override | 196 | @Override |
197 | default Stage stage() { | 197 | default Stage stage() { |
198 | return Stage.TAGGED; | 198 | return Stage.TAGGED; |
199 | } | 199 | } |
200 | } | 200 | } |
201 | 201 | ||
202 | interface TaggedText<P extends PartOfSpeech> extends SentencizedText { | 202 | interface TaggedText extends SentencizedText { |
203 | // some (or even all) sentences may remain untagged | 203 | // some (or even all) sentences may remain untagged |
204 | Stream<? extends TaggedSentence<P>> taggedSentenceStream(); | 204 | Stream<? extends TaggedSentence> taggedSentenceStream(); |
205 | 205 | ||
206 | @Override | 206 | @Override |
207 | default Stage maxStage() { | 207 | default Stage maxStage() { |
208 | return Stage.TAGGED; | 208 | return Stage.TAGGED; |
209 | } | 209 | } |
210 | } | 210 | } |
211 | 211 | ||
212 | // | 212 | // |
213 | // 3.A.V) WITH SYNTAX | 213 | // 3.A.V) WITH SYNTAX |
214 | // | 214 | // |
215 | interface Constituent<C extends ConstituentType, P extends PartOfSpeech> { | 215 | interface Constituent { |
216 | Stream<? extends Constituent<C, P>> childStream(); | 216 | Stream<? extends Constituent> childStream(); |
217 | } | 217 | } |
218 | 218 | ||
219 | interface ConstituentBranch<C extends ConstituentType, P extends PartOfSpeech> extends Constituent<C, P> { | 219 | interface ConstituentBranch extends Constituent { |
220 | C type(); // see section 1.B above | 220 | ConstituentType type(); // see section 1.B above |
221 | } | 221 | } |
222 | 222 | ||
223 | interface ConstituentLeaf<C extends ConstituentType, P extends PartOfSpeech> extends Constituent<C, P> { | 223 | interface ConstituentLeaf extends Constituent { |
224 | TaggedToken<P> token(); | 224 | TaggedToken token(); |
225 | 225 | ||
226 | @Override | 226 | @Override |
227 | default Stream<? extends Constituent<C, P>> childStream() { | 227 | default Stream<? extends Constituent> childStream() { |
228 | return Stream.empty(); | 228 | return Stream.empty(); |
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | interface Dependency<D extends DependencyType, P extends PartOfSpeech> { | 232 | interface Dependency { |
233 | D type(); // see section 1.C above | 233 | DependencyType type(); // see section 1.C above |
234 | 234 | ||
235 | TaggedToken<P> governor(); | 235 | TaggedToken governor(); |
236 | 236 | ||
237 | TaggedToken<P> dependent(); | 237 | TaggedToken dependent(); |
238 | } | 238 | } |
239 | 239 | ||
240 | interface SentenceSyntax<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> { | 240 | interface SentenceSyntax { |
241 | ConstituentBranch<C, P> constituentTree(); | 241 | ConstituentBranch constituentTree(); |
242 | 242 | ||
243 | List<? extends Dependency<D, P>> dependencies(); | 243 | List<? extends Dependency> dependencies(); |
244 | } | 244 | } |
245 | 245 | ||
246 | interface ParsedSentence<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> extends TaggedSent ence<P> { | 246 | interface ParsedSentence extends TaggedSentence { |
247 | SentenceSyntax<C, D, P> syntax(); | 247 | SentenceSyntax syntax(); |
248 | 248 | ||
249 | @Override | 249 | @Override |
250 | default Stage stage() { | 250 | default Stage stage() { |
251 | return Stage.PARSED; | 251 | return Stage.PARSED; |
252 | } | 252 | } |
253 | } | 253 | } |
254 | 254 | ||
255 | interface ParsedText<C extends ConstituentType, D extends DependencyType, P extends PartOfSpeech> extends TaggedText<P> { | 255 | interface ParsedText extends TaggedText { |
256 | // some (or even all) sentences may remain unparsed | 256 | // some (or even all) sentences may remain unparsed |
257 | Stream<? extends ParsedSentence<C, D, P>> parsedSentenceStream(); | 257 | Stream<? extends ParsedSentence> parsedSentenceStream(); |
258 | 258 | ||
259 | @Override | 259 | @Override |
260 | default Stage maxStage() { | 260 | default Stage maxStage() { |
261 | return Stage.PARSED; | 261 | return Stage.PARSED; |
262 | } | 262 | } |
263 | } | 263 | } |
264 | 264 | ||
265 | /* | ||
266 | * 3.B) LANGUAGE-SPECIFIC HIERARCHY (AXIS Z OVER Y OVER X) | ||
267 | */ | ||
268 | |||
269 | // | ||
270 | // 3.B.I) ENGLISH | ||
271 | // | ||
272 | |||
273 | // 3.B.I.a) ENGLISH PARTS OF SPEECH | ||
274 | interface EnglishTaggedToken extends TaggedToken { | ||
275 | @Override | ||
276 | EnglishPartOfSpeech partOfSpeech(); | ||
277 | } | ||
278 | |||
279 | interface EnglishTaggedSentence extends TaggedSentence { | ||
280 | @Override | ||
281 | List<? extends EnglishTaggedToken> tokens(); | ||
282 | } | ||
283 | |||
284 | interface EnglishTaggedText extends TaggedText { | ||
285 | @Override // some (or even all) sentences may remain untagged | ||
286 | default Stream<? extends EnglishTaggedSentence> taggedSentenceStream() { | ||
287 | return Seq.ofType(sentences().stream(), EnglishTaggedSentence.class); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | // 3.B.I.b) ENGLISH SYNTAX | ||
292 | interface EnglishConstituent extends Constituent { | ||
293 | @Override | ||
294 | Stream<? extends EnglishConstituent> childStream(); | ||
295 | } | ||
296 | |||
297 | interface EnglishConstituentBranch extends EnglishConstituent, ConstituentBranch { | ||
298 | @Override | ||
299 | EnglishConstituentType type(); | ||
300 | } | ||
301 | |||
302 | interface EnglishConstituentLeaf extends EnglishConstituent, ConstituentLeaf { | ||
303 | @Override | ||
304 | EnglishTaggedToken token(); | ||
305 | |||
306 | @Override | ||
307 | default Stream<? extends EnglishConstituent> childStream() { | ||
308 | return Stream.empty(); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | interface EnglishDependency extends Dependency { | ||
313 | @Override | ||
314 | EnglishDependencyType type(); | ||
315 | |||
316 | @Override | ||
317 | EnglishTaggedToken governor(); | ||
318 | |||
319 | @Override | ||
320 | EnglishTaggedToken dependent(); | ||
321 | } | ||
322 | |||
323 | interface EnglishSentenceSyntax extends SentenceSyntax { | ||
324 | @Override | ||
325 | EnglishConstituentBranch constituentTree(); | ||
326 | |||
327 | @Override | ||
328 | List<? extends EnglishDependency> dependencies(); | ||
329 | } | ||
330 | |||
331 | interface EnglishParsedSentence extends EnglishTaggedSentence, ParsedSentence { | ||
332 | @Override | ||
333 | EnglishSentenceSyntax syntax(); | ||
334 | } | ||
335 | |||
336 | interface EnglishParsedText extends EnglishTaggedText, ParsedText { | ||
337 | @Override // some (or even all) sentences may remain unparsed | ||
338 | default Stream<? extends EnglishParsedSentence> parsedSentenceStream() { | ||
339 | return Seq.ofType(sentences().stream(), EnglishParsedSentence.class); | ||
340 | } | ||
341 | } | ||
342 | |||
343 | // | ||
344 | // 3.B.II) GERMAN | ||
345 | // | ||
346 | |||
347 | // 3.B.II.a) GERMAN PARTS OF SPEECH | ||
348 | interface GermanTaggedToken extends TaggedToken { | ||
349 | @Override | ||
350 | GermanPartOfSpeech partOfSpeech(); | ||
351 | } | ||
352 | |||
353 | interface GermanTaggedSentence extends TaggedSentence { | ||
354 | @Override | ||
355 | List<? extends GermanTaggedToken> tokens(); | ||
356 | } | ||
357 | |||
358 | interface GermanTaggedText extends TaggedText { | ||
359 | @Override | ||
360 | default Stream<? extends GermanTaggedSentence> taggedSentenceStream() { | ||
361 | return Seq.ofType(sentences().stream(), GermanTaggedSentence.class); | ||
362 | } | ||
363 | } | ||
364 | |||
365 | // 3.B.II.b) GERMAN SYNTAX (missing as of yet) | ||
366 | |||
367 | |||
265 | /* *********************** | 368 | /* *********************** |
266 | * 4) PRODUCERS OF THE API | 369 | * 4) PRODUCERS OF THE API |
267 | *************************/ | 370 | *************************/ |
268 | 371 | ||
269 | interface TextProcessingRequest { // SIMPLIFICATION | 372 | interface TextProcessingRequest { // SIMPLIFICATION |
270 | Locale locale(); | 373 | Locale locale(); |
271 | 374 | ||
272 | String text(); | 375 | String text(); |
273 | 376 | ||
274 | Stage targetStage(); | 377 | Stage targetStage(); |
275 | } | 378 | } |
276 | 379 | ||
277 | interface TextProcessor { | 380 | interface TextProcessor { |
278 | LocalizedText processText(TextProcessingRequest request); | 381 | LocalizedText processText(TextProcessingRequest request); |
279 | } | 382 | } |
280 | 383 | ||
281 | /* *********************** | 384 | /* *********************** |
282 | * 5) CONSUMERS OF THE API | 385 | * 5) CONSUMERS OF THE API |
283 | *************************/ | 386 | *************************/ |
284 | 387 | ||
285 | /* | 388 | /* |
286 | * 5.A) DIRECT CONSUMERS | 389 | * 5.A) DIRECT CONSUMERS |
287 | */ | 390 | */ |
288 | 391 | ||
289 | class GermanPostpositionChecker { | 392 | class GermanPostpositionChecker { |
290 | void checkTaggedText(TaggedText<GermanPartOfSpeech> text) { | 393 | void checkTaggedText(GermanTaggedText text) { |
291 | text.taggedSentenceStream().forEach(this::checkTaggedSentence); | 394 | text.taggedSentenceStream().forEach(this::checkTaggedSentence); |
292 | } | 395 | } |
293 | 396 | ||
294 | private void checkTaggedSentence(TaggedSentence<GermanPartOfSpeech> sentence) { | 397 | private void checkTaggedSentence(GermanTaggedSentence sentence) { |
295 | sentence.tokens().stream() | 398 | sentence.tokens().stream() |
296 | .filter(this::isPostposition) | 399 | .filter(this::isPostposition) |
297 | .forEach(this::checkPostposition); | 400 | .forEach(this::checkPostposition); |
298 | } | 401 | } |
299 | 402 | ||
300 | private boolean isPostposition(TaggedToken<GermanPartOfSpeech> token) { | 403 | private boolean isPostposition(GermanTaggedToken token) { |
301 | return token.partOfSpeech().isPostposition(); | 404 | return token.partOfSpeech().isPostposition(); |
302 | } | 405 | } |
303 | 406 | ||
304 | 407 | ||
305 | private void checkPostposition(TaggedToken<GermanPartOfSpeech> token) { | 408 | private void checkPostposition(GermanTaggedToken token) { |
306 | // logic | 409 | // logic |
307 | } | 410 | } |
308 | } | 411 | } |
309 | 412 | ||
310 | class EnglishNominalSubjectChecker { | 413 | class EnglishNominalSubjectChecker { |
311 | 414 | ||
312 | void checkNominalSubjects(ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech> text) { | 415 | void checkNominalSubjects(EnglishParsedText text) { |
313 | text.parsedSentenceStream().forEach(this::checkNominalSubjects); | 416 | text.parsedSentenceStream().forEach(this::checkNominalSubjects); |
314 | } | 417 | } |
315 | 418 | ||
316 | private void checkNominalSubjects(ParsedSentence<?, EnglishDependencyType, EnglishPartOfSpeech> sentence) { | 419 | private void checkNominalSubjects(EnglishParsedSentence sentence) { |
317 | sentence.syntax().dependencies().stream() | 420 | sentence.syntax().dependencies().stream() |
318 | .filter(this::isCertainTypeOfNominalSubjectDependency) | 421 | .filter(this::isCertainTypeOfNominalSubjectDependency) |
319 | .forEach(this::checkNominalSubject); | 422 | .forEach(this::checkNominalSubject); |
320 | } | 423 | } |
321 | 424 | ||
322 | private boolean isCertainTypeOfNominalSubjectDependency(Dependency<EnglishDependencyType, EnglishPartOfSpeech> depen dency) { | 425 | private boolean isCertainTypeOfNominalSubjectDependency(EnglishDependency dependency) { |
323 | return dependency.type() == EnglishDependencyType.NOMINAL_SUBJECT && true; // SIMPLIFICATION | 426 | return dependency.type() == EnglishDependencyType.NOMINAL_SUBJECT && true; // SIMPLIFICATION |
324 | } | 427 | } |
325 | 428 | ||
326 | private void checkNominalSubject(Dependency<EnglishDependencyType, EnglishPartOfSpeech> dependency) { | 429 | private void checkNominalSubject(EnglishDependency dependency) { |
327 | checkNominalGovernor(dependency.governor()); | 430 | checkNominalGovernor(dependency.governor()); |
328 | checkNominalDependent(dependency.dependent()); | 431 | checkNominalDependent(dependency.dependent()); |
329 | } | 432 | } |
330 | 433 | ||
331 | private void checkNominalGovernor(TaggedToken<EnglishPartOfSpeech> governor) { | 434 | private void checkNominalGovernor(EnglishTaggedToken governor) { |
332 | if (checkGovernorPartOfSpeech(governor.partOfSpeech())) { | 435 | if (checkGovernorPartOfSpeech(governor.partOfSpeech())) { |
333 | // logic | 436 | // logic |
334 | } | 437 | } |
335 | } | 438 | } |
336 | 439 | ||
337 | private void checkNominalDependent(TaggedToken<EnglishPartOfSpeech> dependent) { | 440 | private void checkNominalDependent(EnglishTaggedToken dependent) { |
338 | if (checkDependentPartOfSpeech(dependent.partOfSpeech())) { | 441 | if (checkDependentPartOfSpeech(dependent.partOfSpeech())) { |
339 | // logic | 442 | // logic |
340 | } | 443 | } |
341 | } | 444 | } |
342 | 445 | ||
343 | private boolean checkGovernorPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { | 446 | private boolean checkGovernorPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { |
344 | return true; // SIMPLIFICATION | 447 | return true; // SIMPLIFICATION |
345 | } | 448 | } |
346 | 449 | ||
347 | private boolean checkDependentPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { | 450 | private boolean checkDependentPartOfSpeech(EnglishPartOfSpeech partOfSpeech) { |
348 | return true; // SIMPLIFICATION | 451 | return true; // SIMPLIFICATION |
349 | } | 452 | } |
350 | } | 453 | } |
351 | 454 | ||
352 | class EnglishRelevantConstituentHandler { | 455 | class EnglishRelevantConstituentHandler { |
353 | 456 | ||
354 | void handleRelevantConstituents(ParsedText<EnglishConstituentType, ?, EnglishPartOfSpeech> text) { | 457 | void handleRelevantConstituents(EnglishParsedText text) { |
355 | text.parsedSentenceStream().forEach(this::handleRelevantConstiuents); | 458 | text.parsedSentenceStream().forEach(this::handleRelevantConstiuents); |
356 | } | 459 | } |
357 | 460 | ||
358 | private void handleRelevantConstiuents(ParsedSentence<EnglishConstituentType, ?, EnglishPartOfSpeech> sentence) { | 461 | private void handleRelevantConstiuents(EnglishParsedSentence sentence) { |
359 | Constituents.flatConstituentStream(sentence.syntax().constituentTree()) | 462 | EnglishConstituents.flatConstituentStream(sentence.syntax().constituentTree()) |
360 | .filter(this::isRelevantConstituent) | 463 | .filter(this::isRelevantConstituent) |
361 | .forEach(this::handleRelevantConstituent); | 464 | .forEach(this::handleRelevantConstituent); |
362 | } | 465 | } |
363 | 466 | ||
364 | private boolean isRelevantConstituent(Constituent<EnglishConstituentType, EnglishPartOfSpeech> constituent) { | 467 | private boolean isRelevantConstituent(EnglishConstituent constituent) { |
365 | return true; // SIMPLIFICATION | 468 | return true; // SIMPLIFICATION |
366 | } | 469 | } |
367 | 470 | ||
368 | private void handleRelevantConstituent(Constituent<EnglishConstituentType, EnglishPartOfSpeech> constituent) { | 471 | private void handleRelevantConstituent(EnglishConstituent constituent) { |
369 | // logic | 472 | // logic |
370 | } | 473 | } |
371 | } | 474 | } |
372 | 475 | ||
373 | class Constituents { | 476 | class EnglishConstituents { |
374 | /** | 477 | /** |
375 | * Returns the constituent and all its descendants as a flattended Stream. | 478 | * Returns the constituent and all its descendants as a flattended Stream. |
376 | */ | 479 | */ |
377 | static <C extends ConstituentType, P extends PartOfSpeech> Stream<Constituent<C, P>> flatConstituentStream(Constitue nt<C, P> constituent) { | 480 | static Stream<EnglishConstituent> flatConstituentStream(EnglishConstituent constituent) { |
378 | return Stream.concat( | 481 | return Stream.concat( |
379 | Stream.of(constituent), | 482 | Stream.of(constituent), |
380 | constituent.childStream().flatMap(Constituents::flatConstituentStream) | 483 | constituent.childStream().flatMap(EnglishConstituents::flatConstituentStream) |
381 | ); | 484 | ); |
382 | } | 485 | } |
383 | } | 486 | } |
384 | 487 | ||
385 | 488 | ||
386 | /* | 489 | /* |
387 | * 5.B) INDIRECT CONSUMERS | 490 | * 5.B) INDIRECT CONSUMERS |
388 | */ | 491 | */ |
389 | interface ProcessedTextConsumer { // SIMPLIFICATION | 492 | interface ProcessedTextConsumer { // SIMPLIFICATION |
390 | Locale supportedLocale(); | 493 | Locale supportedLocale(); |
391 | 494 | ||
392 | Stage requiredStage(); | 495 | Stage requiredStage(); |
393 | 496 | ||
394 | void consume(LocalizedText text); | 497 | void consume(LocalizedText text); |
395 | } | 498 | } |
396 | 499 | ||
397 | class SampleUniversalTokenizedTextConsumer implements ProcessedTextConsumer { | 500 | class SampleUniversalTokenizedTextConsumer implements ProcessedTextConsumer { |
398 | 501 | ||
399 | @Override | 502 | @Override |
400 | public Locale supportedLocale() { | 503 | public Locale supportedLocale() { |
401 | return Locale.ROOT; | 504 | return Locale.ROOT; |
402 | } | 505 | } |
403 | 506 | ||
404 | @Override | 507 | @Override |
405 | public Stage requiredStage() { | 508 | public Stage requiredStage() { |
406 | return Stage.TOKENIZED; | 509 | return Stage.TOKENIZED; |
407 | } | 510 | } |
408 | 511 | ||
409 | @Override | 512 | @Override |
410 | public void consume(LocalizedText text) { | 513 | public void consume(LocalizedText text) { |
411 | handleTokenizedText((TokenizedText) text); | 514 | handleTokenizedText((TokenizedText) text); |
412 | } | 515 | } |
413 | 516 | ||
414 | private void handleTokenizedText(TokenizedText tokenizedText) { | 517 | private void handleTokenizedText(TokenizedText tokenizedText) { |
415 | // logic | 518 | // logic |
416 | } | 519 | } |
417 | } | 520 | } |
418 | 521 | ||
419 | class SampleGermanTaggedConsumer implements ProcessedTextConsumer { | 522 | class SampleGermanTaggedConsumer implements ProcessedTextConsumer { |
420 | 523 | ||
421 | @Override | 524 | @Override |
422 | public Locale supportedLocale() { | 525 | public Locale supportedLocale() { |
423 | return Locale.GERMAN; | 526 | return Locale.GERMAN; |
424 | } | 527 | } |
425 | 528 | ||
426 | @Override | 529 | @Override |
427 | public Stage requiredStage() { | 530 | public Stage requiredStage() { |
428 | return Stage.TAGGED; | 531 | return Stage.TAGGED; |
429 | } | 532 | } |
430 | 533 | ||
431 | @Override | 534 | @Override |
432 | public void consume(LocalizedText text) { | 535 | public void consume(LocalizedText text) { |
433 | // UNSAFE UNCHECKED CAST! | 536 | GermanTaggedText germanTaggedText = (GermanTaggedText) text; // SAFE CAST |
434 | TaggedText<GermanPartOfSpeech> germanTaggedText = (TaggedText<GermanPartOfSpeech>) text; | ||
435 | new GermanPostpositionChecker().checkTaggedText(germanTaggedText); | 537 | new GermanPostpositionChecker().checkTaggedText(germanTaggedText); |
436 | } | 538 | } |
437 | } | 539 | } |
438 | 540 | ||
439 | class SampleEnglishParsedTextConsumer implements ProcessedTextConsumer { | 541 | class SampleEnglishParsedTextConsumer implements ProcessedTextConsumer { |
440 | 542 | ||
441 | @Override | 543 | @Override |
442 | public Locale supportedLocale() { | 544 | public Locale supportedLocale() { |
443 | return Locale.ENGLISH; | 545 | return Locale.ENGLISH; |
444 | } | 546 | } |
445 | 547 | ||
446 | @Override | 548 | @Override |
447 | public Stage requiredStage() { | 549 | public Stage requiredStage() { |
448 | return Stage.PARSED; | 550 | return Stage.PARSED; |
449 | } | 551 | } |
450 | 552 | ||
451 | @Override | 553 | @Override |
452 | public void consume(LocalizedText text) { | 554 | public void consume(LocalizedText text) { |
453 | // UNSAFE UNCHECKED CAST! | 555 | EnglishParsedText englishParsedText = (EnglishParsedText) text; // SAFE CAST |
454 | ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech> englishParsedText | ||
455 | = (ParsedText<?, EnglishDependencyType, EnglishPartOfSpeech>) text; | ||
456 | new EnglishNominalSubjectChecker().checkNominalSubjects(englishParsedText); | 556 | new EnglishNominalSubjectChecker().checkNominalSubjects(englishParsedText); |
457 | } | 557 | } |
458 | } | 558 | } |
459 | 559 | ||
460 | 560 |