diff --git a/src/earley/parser.ts b/src/earley/parser.ts index a5a0ad4..562cce5 100644 --- a/src/earley/parser.ts +++ b/src/earley/parser.ts @@ -88,10 +88,10 @@ export function getViterbiParseFromChart(state: State, chart: Chart< } - export function parseSentenceIntoChart(Start: NonTerminal, grammar: Grammar, - tokens: T[]): [Chart, number, State] { + tokens: T[], + scanProbability?: (x: T, t: Terminal[]) => S): [Chart, number, State] { // ScanProbability scanProbability//TODO const stateSets: Chart = new Chart(grammar); @@ -125,7 +125,7 @@ export function parseSentenceIntoChart(Start: NonTerminal, tokensWithWords.forEach( (token: WordWithTypes) => { predict(i, grammar, stateSets); - scan(i, token, grammar.probabilityMapping.semiring, stateSets); + scan(i, token, grammar.probabilityMapping.semiring, stateSets, scanProbability); complete(i + 1, stateSets, grammar); const completedStates: State[] = []; @@ -153,8 +153,13 @@ export interface ParseTreeWithScore { export function getViterbiParse(Start: NonTerminal, grammar: Grammar, - tokens: T[]): ParseTreeWithScore { - const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens); + tokens: T[], + scanProbability?: (x: T, t: Terminal[]) => S): ParseTreeWithScore { + const [chart, ignored, init] = parseSentenceIntoChart(Start, grammar, tokens, scanProbability); + + if (!chart.has(init.rule, tokens.length, + 0, + init.rule.right.length)) throw new Error("Could not parse sentence."); const finalState = chart.getOrCreate( tokens.length, @@ -163,6 +168,7 @@ export function getViterbiParse(Start: NonTerminal, init.rule ); + const parseTree: ParseTree = getViterbiParseFromChart(finalState, chart); const toProbability = grammar.probabilityMapping.toProbability; const finalScore = chart.getViterbiScore(finalState).innerScore; diff --git a/src/earley/scan.ts b/src/earley/scan.ts index 316398a..d29fda0 100644 --- a/src/earley/scan.ts +++ b/src/earley/scan.ts @@ -1,4 +1,4 @@ -import {isNonTerminal, WordWithTypes} from "../grammar/category"; +import {isNonTerminal, WordWithTypes, Terminal} from "../grammar/category"; import {Semiring} from "semiring"; import {Chart} from "./chart/chart"; import {getActiveCategory, State, advanceDot} from "./chart/state"; @@ -10,19 +10,18 @@ import {getActiveCategory, State, advanceDot} from "./chart/state"; * @param tokenPosition The start index of the scan. * @param word * @param types - * //@param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0. + * @param scanProbability Function that provides the probability of scanning the given token at this position. Might be null for a probability of 1.0. * @param sr * @param stateSets */ export function scan(tokenPosition: number, {word, types}: WordWithTypes, - // scanProbability:(x:T)=>number,//TODO sr: Semiring, - stateSets: Chart) { + stateSets: Chart, + scanProbability?: (x: T, t: Terminal[]) => S) { const changes: any[] = []; - // TODO - // const scanProb:number = !scanProbability ? NaN : scanProbability(tokenPosition); - const scanProb: S = sr.multiplicativeIdentity; + + const scanProb: S = !!scanProbability ? scanProbability(word, types) : undefined; /* * Get all states that are active on a terminal diff --git a/test/earley/parser.spec.ts b/test/earley/parser.spec.ts index b93e316..2688c02 100644 --- a/test/earley/parser.spec.ts +++ b/test/earley/parser.spec.ts @@ -1,19 +1,15 @@ -import {NonTerminal, Terminal, Category} from "../../src/grammar/category"; +import {NonTerminal, Terminal} from "../../src/grammar/category"; import {getViterbiParse, ParseTreeWithScore, Grammar} from "../../src/index"; -import * as Mocha from 'mocha' -import {expect} from 'chai'; -import {scan} from "../../src/earley/scan"; -import {LogSemiring} from "semiring"; -import {Chart} from "../../src/earley/chart/chart"; +import {expect} from "chai"; import {g, A} from "../sample-grammar"; import {parseSentenceIntoChart} from "../../src/earley/parser"; -//TODO -describe('parser', () => { +// TODO +describe("parser", () => { - it('should complete correctly', () => { + it("should complete correctly", () => { // complete( // 0, // "e", @@ -21,7 +17,7 @@ describe('parser', () => { // ss // ) }); - it('should predict correctly', () => { + it("should predict correctly", () => { // complete( // 0, // "e", @@ -29,7 +25,7 @@ describe('parser', () => { // ss // ) }); - it('should parse the man chase the man with a stick', () => { + it("should parse the man chase the man with a stick", () => { const S: NonTerminal = "S"; const NP: NonTerminal = "NP"; const VP: NonTerminal = "VP"; @@ -47,8 +43,8 @@ describe('parser', () => { const stick: Terminal = (token) => !!token.match(/stick/); const with_: Terminal = (token) => !!token.match(/with/); - const grammar: Grammar = Grammar.builder("test") - //.setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want + const grammar: Grammar = Grammar.builder("test") + // .setSemiring(new LogSemiring()) // If not set, defaults to Log semiring which is probably what you want .addNewRule( 1.0, // Probability between 0.0 and 1.0, defaults to 1.0. The builder takes care of converting it to the semiring element S, // Left hand side of the rule @@ -88,18 +84,45 @@ describe('parser', () => { grammar, tokens ); - //console.log(JSON.stringify(viterbi.parseTree)); // {"category":"","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]} - //console.log(viterbi.probability); // 0.6 - //Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man")) + // console.log(JSON.stringify(viterbi.parseTree)); // {"category":"","children":[{"category":"S","children":[{"category":"NP","children":[{"category":"Det","children":[{"token":"The","children":[ ]}]},{"category":"N","children":[{"token":"man","children":[]}]}]},{"category":"VP","children":[{"category":"TV","children":[{"token":"chased","children":[]}]},{"category":"NP","children":[{"category":"Det","children":[{"token":"the","children":[]}]},{"category":"N","children":[{"token":"man","c hildren":[]}]},{"category":"Mod","children":[{"token":"with","children":[]},{"category":"NP","children":[{"category":"Det","children":[{"token":"a", "children":[]}]},{"category":"N","children":[{"token":"stick","children":[]}]}]}]}]}]}]}]} + // console.log(viterbi.probability); // 0.6 + // Parser.recognize(S, grammar, Tokens.tokenize("the", "stick", "chased", "the", "man")) }); -it('should parse aaaaa', () => { - const tokens = ["a", "a", "a", "e"]; - const [chart, i, init] = parseSentenceIntoChart( + const tokens = ["a", "a", "a", "e"]; + it("should deal with scan probability correctly", () => { + const p1 = getViterbiParse( A, g, - tokens + tokens, + (ignore, ignored) => { + return g.probabilityMapping.fromProbability(1.0); + } + ).probability; + + const p2 = getViterbiParse( + A, + g, + tokens, + (word, ignored) => { + return word === "a" ? g.probabilityMapping.fromProbability(0.5) : undefined; + } + ).probability; + + const eq = p2 * 2 * 2 * 2; + const epsilon = 0.0000000000000001; + expect(p1).to.be.above(eq - epsilon).and.below(eq + epsilon); + }); + + it("should parse aaae", () => { + const [chart, ignored, init] = parseSentenceIntoChart( + A, + g, + tokens, + (word, terminalTypes) => { + return g.probabilityMapping.fromProbability(1.0); + } ); expect(chart.getCompletedStates(tokens.length).has( @@ -108,9 +131,5 @@ it('should parse aaaaa', () => { ) )).to.equal(true); - /*console.log(g.probabilityMapping.toProbability( - chart.viterbiScores.get(chart.getOrCreate( - tokens.length, 0, init.rule.right.length, init.rule - )).innerScore));*/ }); }); diff --git a/test/sample-grammar.ts b/test/sample-grammar.ts index 3c8cf22..672c752 100644 --- a/test/sample-grammar.ts +++ b/test/sample-grammar.ts @@ -20,7 +20,7 @@ export const g:Grammar = builder .addNewRule(0.5, C, [D]) .addNewRule(0.5, D, [E]) .addNewRule(0.5, D, [a]) - .addNewRule(0.5, E, [E,E]) + .addNewRule(0.5, E, [E, E]) .addNewRule(0.5, E, [e]) //.addRule(0.1, E, [C]) .build();